From 529a11018661830c71dcc15ecc767a9815bbb5d6 Mon Sep 17 00:00:00 2001 From: jsmariegaard Date: Wed, 13 Nov 2024 08:31:52 +0000 Subject: [PATCH] deploy: 91d2f5235f9fdf1ef6a30d1a199854edcb0fedb1 --- api/comparer/index.html | 3606 +++++++++++++++++++------------------- api/matching/index.html | 364 ++-- search/search_index.json | 2 +- sitemap.xml | 66 +- sitemap.xml.gz | Bin 433 -> 433 bytes 5 files changed, 2021 insertions(+), 2017 deletions(-) diff --git a/api/comparer/index.html b/api/comparer/index.html index 12d55aaa..a7b0a66e 100644 --- a/api/comparer/index.html +++ b/api/comparer/index.html @@ -1976,28 +1976,7 @@

Source code in modelskill/comparison/_comparison.py -
 433
- 434
- 435
- 436
- 437
- 438
- 439
- 440
- 441
- 442
- 443
- 444
- 445
- 446
- 447
- 448
- 449
- 450
- 451
- 452
- 453
- 454
+                
 454
  455
  456
  457
@@ -3030,514 +3009,514 @@ 

1484 1485 1486 -1487

class Comparer(Scoreable):
-    """
-    Comparer class for comparing model and observation data.
-
-    Typically, the Comparer is part of a ComparerCollection,
-    created with the `match` function.
-
-    Parameters
-    ----------
-    matched_data : xr.Dataset
-        Matched data
-    raw_mod_data : dict of modelskill.TimeSeries, optional
-        Raw model data. If None, observation and modeldata must be provided.
-
-    Examples
-    --------
-    >>> import modelskill as ms
-    >>> cmp1 = ms.match(observation, modeldata)
-    >>> cmp2 = ms.from_matched(matched_data)
-
-    See Also
-    --------
-    modelskill.match, modelskill.from_matched
-    """
+1487
+1488
+1489
+1490
+1491
+1492
+1493
+1494
+1495
+1496
+1497
+1498
+1499
+1500
+1501
+1502
+1503
+1504
+1505
+1506
+1507
+1508
class Comparer(Scoreable):
+    """
+    Comparer class for comparing model and observation data.
 
-    data: xr.Dataset
-    raw_mod_data: Dict[str, TimeSeries]
-    _obs_str = "Observation"
-    plotter = ComparerPlotter
-
-    def __init__(
-        self,
-        matched_data: xr.Dataset,
-        raw_mod_data: Optional[Dict[str, TimeSeries]] = None,
-    ) -> None:
-        self.data = _parse_dataset(matched_data)
-        self.raw_mod_data = (
-            raw_mod_data
-            if raw_mod_data is not None
-            else {
-                # key: ModelResult(value, gtype=self.data.gtype, name=key, x=self.x, y=self.y)
-                key: TimeSeries(self.data[[key]])
-                for key, value in matched_data.data_vars.items()
-                if value.attrs["kind"] == "model"
-            }
-        )
-        # TODO: validate that the names in raw_mod_data are the same as in matched_data
-        assert isinstance(self.raw_mod_data, dict)
-        for k in self.raw_mod_data.keys():
-            v = self.raw_mod_data[k]
-            if not isinstance(v, TimeSeries):
-                try:
-                    self.raw_mod_data[k] = TimeSeries(v)
-                except Exception:
-                    raise ValueError(
-                        f"raw_mod_data[{k}] could not be converted to a TimeSeries object"
-                    )
-            else:
-                assert isinstance(
-                    v, TimeSeries
-                ), f"raw_mod_data[{k}] must be a TimeSeries object"
-
-        self.plot = Comparer.plotter(self)
-        """Plot using the ComparerPlotter
-
-        Examples
-        --------
-        >>> cmp.plot.timeseries()
-        >>> cmp.plot.scatter()
-        >>> cmp.plot.qq()
-        >>> cmp.plot.hist()
-        >>> cmp.plot.kde()
-        >>> cmp.plot.box()
-        >>> cmp.plot.residual_hist()
-        >>> cmp.plot.taylor()        
-        """
-
-    @staticmethod
-    def from_matched_data(
-        data: xr.Dataset | pd.DataFrame,
-        raw_mod_data: Optional[Dict[str, TimeSeries]] = None,
-        obs_item: str | int | None = None,
-        mod_items: Optional[Iterable[str | int]] = None,
-        aux_items: Optional[Iterable[str | int]] = None,
-        name: Optional[str] = None,
-        weight: float = 1.0,
-        x: Optional[float] = None,
-        y: Optional[float] = None,
-        z: Optional[float] = None,
-        x_item: str | int | None = None,
-        y_item: str | int | None = None,
-        quantity: Optional[Quantity] = None,
-    ) -> "Comparer":
-        """Initialize from compared data"""
-        if not isinstance(data, xr.Dataset):
-            # TODO: handle raw_mod_data by accessing data.attrs["kind"] and only remove nan after
-            data = _matched_data_to_xarray(
-                data,
-                obs_item=obs_item,
-                mod_items=mod_items,
-                aux_items=aux_items,
-                name=name,
-                x=x,
-                y=y,
-                z=z,
-                x_item=x_item,
-                y_item=y_item,
-                quantity=quantity,
-            )
-            data.attrs["weight"] = weight
-        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)
-
-    def __repr__(self):
-        out = [
-            "<Comparer>",
-            f"Quantity: {self.quantity}",
-            f"Observation: {self.name}, n_points={self.n_points}",
-            "Model(s):",
-        ]
-        for index, model in enumerate(self.mod_names):
-            out.append(f"{index}: {model}")
-
-        for var in self.aux_names:
-            out.append(f" Auxiliary: {var}")
-        return str.join("\n", out)
-
-    @property
-    def name(self) -> str:
-        """Name of comparer (=name of observation)"""
-        return str(self.data.attrs["name"])
-
-    @name.setter
-    def name(self, name: str) -> None:
-        if name in _RESERVED_NAMES:
-            raise ValueError(
-                f"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!"
-            )
-        self.data.attrs["name"] = name
-
-    @property
-    def gtype(self) -> str:
-        """Geometry type"""
-        return str(self.data.attrs["gtype"])
-
-    @property
-    def quantity(self) -> Quantity:
-        """Quantity object"""
-        return Quantity(
-            name=self.data[self._obs_str].attrs["long_name"],
-            unit=self.data[self._obs_str].attrs["units"],
-            is_directional=bool(
-                self.data[self._obs_str].attrs.get("is_directional", False)
-            ),
-        )
-
-    @quantity.setter
-    def quantity(self, quantity: Quantity) -> None:
-        assert isinstance(quantity, Quantity), "value must be a Quantity object"
-        self.data[self._obs_str].attrs["long_name"] = quantity.name
-        self.data[self._obs_str].attrs["units"] = quantity.unit
-        self.data[self._obs_str].attrs["is_directional"] = int(quantity.is_directional)
-
-    @property
-    def n_points(self) -> int:
-        """number of compared points"""
-        return len(self.data[self._obs_str]) if self.data else 0
-
-    @property
-    def time(self) -> pd.DatetimeIndex:
-        """time of compared data as pandas DatetimeIndex"""
-        return self.data.time.to_index()
-
-    # TODO: Should we keep these? (renamed to start_time and end_time)
-    # @property
-    # def start(self) -> pd.Timestamp:
-    #     """start pd.Timestamp of compared data"""
-    #     return self.time[0]
-
-    # @property
-    # def end(self) -> pd.Timestamp:
-    #     """end pd.Timestamp of compared data"""
-    #     return self.time[-1]
+    Typically, the Comparer is part of a ComparerCollection,
+    created with the `match` function.
+
+    Parameters
+    ----------
+    matched_data : xr.Dataset
+        Matched data
+    raw_mod_data : dict of modelskill.TimeSeries, optional
+        Raw model data. If None, observation and modeldata must be provided.
+
+    Examples
+    --------
+    >>> import modelskill as ms
+    >>> cmp1 = ms.match(observation, modeldata)
+    >>> cmp2 = ms.from_matched(matched_data)
+
+    See Also
+    --------
+    modelskill.match, modelskill.from_matched
+    """
+
+    data: xr.Dataset
+    raw_mod_data: Dict[str, TimeSeries]
+    _obs_str = "Observation"
+    plotter = ComparerPlotter
+
+    def __init__(
+        self,
+        matched_data: xr.Dataset,
+        raw_mod_data: Optional[Dict[str, TimeSeries]] = None,
+    ) -> None:
+        self.data = _parse_dataset(matched_data)
+        self.raw_mod_data = (
+            raw_mod_data
+            if raw_mod_data is not None
+            else {
+                # key: ModelResult(value, gtype=self.data.gtype, name=key, x=self.x, y=self.y)
+                key: TimeSeries(self.data[[key]])
+                for key, value in matched_data.data_vars.items()
+                if value.attrs["kind"] == "model"
+            }
+        )
+        # TODO: validate that the names in raw_mod_data are the same as in matched_data
+        assert isinstance(self.raw_mod_data, dict)
+        for k in self.raw_mod_data.keys():
+            v = self.raw_mod_data[k]
+            if not isinstance(v, TimeSeries):
+                try:
+                    self.raw_mod_data[k] = TimeSeries(v)
+                except Exception:
+                    raise ValueError(
+                        f"raw_mod_data[{k}] could not be converted to a TimeSeries object"
+                    )
+            else:
+                assert isinstance(
+                    v, TimeSeries
+                ), f"raw_mod_data[{k}] must be a TimeSeries object"
+
+        self.plot = Comparer.plotter(self)
+        """Plot using the ComparerPlotter
+
+        Examples
+        --------
+        >>> cmp.plot.timeseries()
+        >>> cmp.plot.scatter()
+        >>> cmp.plot.qq()
+        >>> cmp.plot.hist()
+        >>> cmp.plot.kde()
+        >>> cmp.plot.box()
+        >>> cmp.plot.residual_hist()
+        >>> cmp.plot.taylor()        
+        """
+
+    @staticmethod
+    def from_matched_data(
+        data: xr.Dataset | pd.DataFrame,
+        raw_mod_data: Optional[Dict[str, TimeSeries]] = None,
+        obs_item: str | int | None = None,
+        mod_items: Optional[Iterable[str | int]] = None,
+        aux_items: Optional[Iterable[str | int]] = None,
+        name: Optional[str] = None,
+        weight: float = 1.0,
+        x: Optional[float] = None,
+        y: Optional[float] = None,
+        z: Optional[float] = None,
+        x_item: str | int | None = None,
+        y_item: str | int | None = None,
+        quantity: Optional[Quantity] = None,
+    ) -> "Comparer":
+        """Initialize from compared data"""
+        if not isinstance(data, xr.Dataset):
+            # TODO: handle raw_mod_data by accessing data.attrs["kind"] and only remove nan after
+            data = _matched_data_to_xarray(
+                data,
+                obs_item=obs_item,
+                mod_items=mod_items,
+                aux_items=aux_items,
+                name=name,
+                x=x,
+                y=y,
+                z=z,
+                x_item=x_item,
+                y_item=y_item,
+                quantity=quantity,
+            )
+            data.attrs["weight"] = weight
+        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)
+
+    def __repr__(self):
+        out = [
+            "<Comparer>",
+            f"Quantity: {self.quantity}",
+            f"Observation: {self.name}, n_points={self.n_points}",
+            "Model(s):",
+        ]
+        for index, model in enumerate(self.mod_names):
+            out.append(f"{index}: {model}")
+
+        for var in self.aux_names:
+            out.append(f" Auxiliary: {var}")
+        return str.join("\n", out)
+
+    @property
+    def name(self) -> str:
+        """Name of comparer (=name of observation)"""
+        return str(self.data.attrs["name"])
+
+    @name.setter
+    def name(self, name: str) -> None:
+        if name in _RESERVED_NAMES:
+            raise ValueError(
+                f"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!"
+            )
+        self.data.attrs["name"] = name
+
+    @property
+    def gtype(self) -> str:
+        """Geometry type"""
+        return str(self.data.attrs["gtype"])
+
+    @property
+    def quantity(self) -> Quantity:
+        """Quantity object"""
+        return Quantity(
+            name=self.data[self._obs_str].attrs["long_name"],
+            unit=self.data[self._obs_str].attrs["units"],
+            is_directional=bool(
+                self.data[self._obs_str].attrs.get("is_directional", False)
+            ),
+        )
+
+    @quantity.setter
+    def quantity(self, quantity: Quantity) -> None:
+        assert isinstance(quantity, Quantity), "value must be a Quantity object"
+        self.data[self._obs_str].attrs["long_name"] = quantity.name
+        self.data[self._obs_str].attrs["units"] = quantity.unit
+        self.data[self._obs_str].attrs["is_directional"] = int(quantity.is_directional)
 
     @property
-    def x(self) -> Any:
-        """x-coordinate"""
-        return self._coordinate_values("x")
+    def n_points(self) -> int:
+        """number of compared points"""
+        return len(self.data[self._obs_str]) if self.data else 0
 
     @property
-    def y(self) -> Any:
-        """y-coordinate"""
-        return self._coordinate_values("y")
+    def time(self) -> pd.DatetimeIndex:
+        """time of compared data as pandas DatetimeIndex"""
+        return self.data.time.to_index()
 
-    @property
-    def z(self) -> Any:
-        """z-coordinate"""
-        return self._coordinate_values("z")
-
-    def _coordinate_values(self, coord: str) -> Any:
-        vals = self.data[coord].values
-        return np.atleast_1d(vals)[0] if vals.ndim == 0 else vals
-
-    @property
-    def n_models(self) -> int:
-        """Number of model results"""
-        return len(self.mod_names)
-
-    @property
-    def mod_names(self) -> List[str]:
-        """List of model result names"""
-        return list(self.raw_mod_data.keys())
-
-    def __contains__(self, key: str) -> bool:
-        return key in self.data.data_vars
-
-    @property
-    def aux_names(self) -> List[str]:
-        """List of auxiliary data names"""
-        # we don't require the kind attribute to be "auxiliary"
-        return list(
-            [
-                k
-                for k, v in self.data.data_vars.items()
-                if v.attrs["kind"] not in ["observation", "model"]
-            ]
-        )
-
-    # TODO: always "Observation", necessary to have this property?
+    # TODO: Should we keep these? (renamed to start_time and end_time)
+    # @property
+    # def start(self) -> pd.Timestamp:
+    #     """start pd.Timestamp of compared data"""
+    #     return self.time[0]
+
+    # @property
+    # def end(self) -> pd.Timestamp:
+    #     """end pd.Timestamp of compared data"""
+    #     return self.time[-1]
+
+    @property
+    def x(self) -> Any:
+        """x-coordinate"""
+        return self._coordinate_values("x")
+
+    @property
+    def y(self) -> Any:
+        """y-coordinate"""
+        return self._coordinate_values("y")
+
+    @property
+    def z(self) -> Any:
+        """z-coordinate"""
+        return self._coordinate_values("z")
+
+    def _coordinate_values(self, coord: str) -> Any:
+        vals = self.data[coord].values
+        return np.atleast_1d(vals)[0] if vals.ndim == 0 else vals
+
+    @property
+    def n_models(self) -> int:
+        """Number of model results"""
+        return len(self.mod_names)
+
     @property
-    def _obs_name(self) -> str:
-        return self._obs_str
-
-    @property
-    def weight(self) -> float:
-        """Weight of observation (used in ComparerCollection score() and mean_skill())"""
-        return float(self.data.attrs["weight"])
-
-    @weight.setter
-    def weight(self, value: float) -> None:
-        self.data.attrs["weight"] = float(value)
-
-    @property
-    def _unit_text(self) -> str:
-        # Quantity name and unit as text suitable for plot labels
-        return f"{self.quantity.name} [{self.quantity.unit}]"
-
-    @property
-    def attrs(self) -> dict[str, Any]:
-        """Attributes of the observation"""
-        return self.data.attrs
-
-    @attrs.setter
-    def attrs(self, value: dict[str, Serializable]) -> None:
-        self.data.attrs = value
-
-    # TODO: is this the best way to copy (self.data.copy.. )
-    def __copy__(self) -> "Comparer":
-        return deepcopy(self)
-
-    def copy(self) -> "Comparer":
-        return self.__copy__()
+    def mod_names(self) -> List[str]:
+        """List of model result names"""
+        return list(self.raw_mod_data.keys())
+
+    def __contains__(self, key: str) -> bool:
+        return key in self.data.data_vars
+
+    @property
+    def aux_names(self) -> List[str]:
+        """List of auxiliary data names"""
+        # we don't require the kind attribute to be "auxiliary"
+        return list(
+            [
+                k
+                for k, v in self.data.data_vars.items()
+                if v.attrs["kind"] not in ["observation", "model"]
+            ]
+        )
+
+    # TODO: always "Observation", necessary to have this property?
+    @property
+    def _obs_name(self) -> str:
+        return self._obs_str
+
+    @property
+    def weight(self) -> float:
+        """Weight of observation (used in ComparerCollection score() and mean_skill())"""
+        return float(self.data.attrs["weight"])
+
+    @weight.setter
+    def weight(self, value: float) -> None:
+        self.data.attrs["weight"] = float(value)
 
-    def rename(
-        self, mapping: Mapping[str, str], errors: Literal["raise", "ignore"] = "raise"
-    ) -> "Comparer":
-        """Rename observation, model or auxiliary data variables
+    @property
+    def _unit_text(self) -> str:
+        # Quantity name and unit as text suitable for plot labels
+        return f"{self.quantity.name} [{self.quantity.unit}]"
 
-        Parameters
-        ----------
-        mapping : dict
-            mapping of old names to new names
-        errors : {'raise', 'ignore'}, optional
-            If 'raise', raise a KeyError if any of the old names
-            do not exist in the data. By default 'raise'.
-
-        Returns
-        -------
-        Comparer
-
-        Examples
-        --------
-        >>> cmp = ms.match(observation, modeldata)
-        >>> cmp.mod_names
-        ['model1']
-        >>> cmp2 = cmp.rename({'model1': 'model2'})
-        >>> cmp2.mod_names
-        ['model2']
-        """
-        if errors not in ["raise", "ignore"]:
-            raise ValueError("errors must be 'raise' or 'ignore'")
-
-        allowed_keys = [self.name] + self.mod_names + self.aux_names
-        if errors == "raise":
-            for k in mapping.keys():
-                if k not in allowed_keys:
-                    raise KeyError(f"Unknown key: {k}; must be one of {allowed_keys}")
-        else:
-            # "ignore": silently remove keys that are not in allowed_keys
-            mapping = {k: v for k, v in mapping.items() if k in allowed_keys}
+    @property
+    def attrs(self) -> dict[str, Any]:
+        """Attributes of the observation"""
+        return self.data.attrs
+
+    @attrs.setter
+    def attrs(self, value: dict[str, Serializable]) -> None:
+        self.data.attrs = value
+
+    # TODO: is this the best way to copy (self.data.copy.. )
+    def __copy__(self) -> "Comparer":
+        return deepcopy(self)
+
+    def copy(self) -> "Comparer":
+        return self.__copy__()
+
+    def rename(
+        self, mapping: Mapping[str, str], errors: Literal["raise", "ignore"] = "raise"
+    ) -> "Comparer":
+        """Rename observation, model or auxiliary data variables
+
+        Parameters
+        ----------
+        mapping : dict
+            mapping of old names to new names
+        errors : {'raise', 'ignore'}, optional
+            If 'raise', raise a KeyError if any of the old names
+            do not exist in the data. By default 'raise'.
+
+        Returns
+        -------
+        Comparer
 
-        if any([k in _RESERVED_NAMES for k in mapping.values()]):
-            # TODO: also check for duplicates
-            raise ValueError(
-                f"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!"
-            )
-
-        # rename observation
-        obs_name = mapping.get(self.name, self.name)
-        ma_mapping = {k: v for k, v in mapping.items() if k != self.name}
-
-        data = self.data.rename(ma_mapping)
-        data.attrs["name"] = obs_name
-        raw_mod_data = dict()
-        for k, v in self.raw_mod_data.items():
-            if k in ma_mapping:
-                # copy is needed here as the same raw data could be
-                # used for multiple Comparers!
-                v2 = v.copy()
-                v2.data = v2.data.rename({k: ma_mapping[k]})
-                raw_mod_data[ma_mapping[k]] = v2
-            else:
-                raw_mod_data[k] = v
-
-        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)
-
-    def _to_observation(self) -> PointObservation | TrackObservation:
-        """Convert to Observation"""
-        if self.gtype == "point":
-            df = self.data.drop_vars(["x", "y", "z"])[self._obs_str].to_dataframe()
-            return PointObservation(
-                data=df,
-                name=self.name,
-                x=self.x,
-                y=self.y,
-                z=self.z,
-                quantity=self.quantity,
-                # TODO: add attrs
-            )
-        elif self.gtype == "track":
-            df = self.data.drop_vars(["z"])[[self._obs_str]].to_dataframe()
-            return TrackObservation(
-                data=df,
-                item=0,
-                x_item=1,
-                y_item=2,
-                name=self.name,
-                quantity=self.quantity,
-                # TODO: add attrs
-            )
-        else:
-            raise NotImplementedError(f"Unknown gtype: {self.gtype}")
-
-    def __iadd__(self, other: Comparer):  # type: ignore
-        from ..matching import match_space_time
-
-        missing_models = set(self.mod_names) - set(other.mod_names)
-        if len(missing_models) == 0:
-            # same obs name and same model names
-            self.data = xr.concat([self.data, other.data], dim="time").drop_duplicates(
-                "time"
-            )
-        else:
-            self.raw_mod_data.update(other.raw_mod_data)
-            matched = match_space_time(
-                observation=self._to_observation(),
-                raw_mod_data=self.raw_mod_data,  # type: ignore
-            )
-            self.data = matched
-
-        return self
-
-    def __add__(
-        self, other: Union["Comparer", "ComparerCollection"]
-    ) -> "ComparerCollection" | "Comparer":
-        from ._collection import ComparerCollection
-        from ..matching import match_space_time
-
-        if not isinstance(other, (Comparer, ComparerCollection)):
-            raise TypeError(f"Cannot add {type(other)} to {type(self)}")
-
-        if isinstance(other, Comparer) and (self.name == other.name):
-            missing_models = set(self.mod_names) - set(other.mod_names)
-            if len(missing_models) == 0:
-                # same obs name and same model names
-                cmp = self.copy()
-                cmp.data = xr.concat(
-                    [cmp.data, other.data], dim="time"
-                ).drop_duplicates("time")
-
-            else:
-                raw_mod_data = self.raw_mod_data.copy()
-                raw_mod_data.update(other.raw_mod_data)  # TODO!
-                matched = match_space_time(
-                    observation=self._to_observation(),
-                    raw_mod_data=raw_mod_data,  # type: ignore
-                )
-                cmp = Comparer(matched_data=matched, raw_mod_data=raw_mod_data)
+        Examples
+        --------
+        >>> cmp = ms.match(observation, modeldata)
+        >>> cmp.mod_names
+        ['model1']
+        >>> cmp2 = cmp.rename({'model1': 'model2'})
+        >>> cmp2.mod_names
+        ['model2']
+        """
+        if errors not in ["raise", "ignore"]:
+            raise ValueError("errors must be 'raise' or 'ignore'")
+
+        allowed_keys = [self.name] + self.mod_names + self.aux_names
+        if errors == "raise":
+            for k in mapping.keys():
+                if k not in allowed_keys:
+                    raise KeyError(f"Unknown key: {k}; must be one of {allowed_keys}")
+        else:
+            # "ignore": silently remove keys that are not in allowed_keys
+            mapping = {k: v for k, v in mapping.items() if k in allowed_keys}
+
+        if any([k in _RESERVED_NAMES for k in mapping.values()]):
+            # TODO: also check for duplicates
+            raise ValueError(
+                f"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!"
+            )
+
+        # rename observation
+        obs_name = mapping.get(self.name, self.name)
+        ma_mapping = {k: v for k, v in mapping.items() if k != self.name}
+
+        data = self.data.rename(ma_mapping)
+        data.attrs["name"] = obs_name
+        raw_mod_data = dict()
+        for k, v in self.raw_mod_data.items():
+            if k in ma_mapping:
+                # copy is needed here as the same raw data could be
+                # used for multiple Comparers!
+                v2 = v.copy()
+                v2.data = v2.data.rename({k: ma_mapping[k]})
+                raw_mod_data[ma_mapping[k]] = v2
+            else:
+                raw_mod_data[k] = v
+
+        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)
+
+    def _to_observation(self) -> PointObservation | TrackObservation:
+        """Convert to Observation"""
+        if self.gtype == "point":
+            df = self.data.drop_vars(["x", "y", "z"])[self._obs_str].to_dataframe()
+            return PointObservation(
+                data=df,
+                name=self.name,
+                x=self.x,
+                y=self.y,
+                z=self.z,
+                quantity=self.quantity,
+                # TODO: add attrs
+            )
+        elif self.gtype == "track":
+            df = self.data.drop_vars(["z"])[[self._obs_str]].to_dataframe()
+            return TrackObservation(
+                data=df,
+                item=0,
+                x_item=1,
+                y_item=2,
+                name=self.name,
+                quantity=self.quantity,
+                # TODO: add attrs
+            )
+        else:
+            raise NotImplementedError(f"Unknown gtype: {self.gtype}")
+
+    def __iadd__(self, other: Comparer):  # type: ignore
+        from ..matching import match_space_time
+
+        missing_models = set(self.mod_names) - set(other.mod_names)
+        if len(missing_models) == 0:
+            # same obs name and same model names
+            self.data = xr.concat([self.data, other.data], dim="time").drop_duplicates(
+                "time"
+            )
+        else:
+            self.raw_mod_data.update(other.raw_mod_data)
+            matched = match_space_time(
+                observation=self._to_observation(),
+                raw_mod_data=self.raw_mod_data,  # type: ignore
+            )
+            self.data = matched
+
+        return self
+
+    def __add__(
+        self, other: Union["Comparer", "ComparerCollection"]
+    ) -> "ComparerCollection" | "Comparer":
+        from ._collection import ComparerCollection
+        from ..matching import match_space_time
 
-            return cmp
-        else:
-            if isinstance(other, Comparer):
-                return ComparerCollection([self, other])
-            elif isinstance(other, ComparerCollection):
-                return ComparerCollection([self, *other])
-
-    def sel(
-        self,
-        model: Optional[IdxOrNameTypes] = None,
-        start: Optional[TimeTypes] = None,
-        end: Optional[TimeTypes] = None,
-        time: Optional[TimeTypes] = None,
-        area: Optional[List[float]] = None,
-    ) -> "Comparer":
-        """Select data based on model, time and/or area.
-
-        Parameters
-        ----------
-        model : str or int or list of str or list of int, optional
-            Model name or index. If None, all models are selected.
-        start : str or datetime, optional
-            Start time. If None, all times are selected.
-        end : str or datetime, optional
-            End time. If None, all times are selected.
-        time : str or datetime, optional
-            Time. If None, all times are selected.
-        area : list of float, optional
-            bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.
-
-        Returns
-        -------
-        Comparer
-            New Comparer with selected data.
-        """
-        if (time is not None) and ((start is not None) or (end is not None)):
-            raise ValueError("Cannot use both time and start/end")
+        if not isinstance(other, (Comparer, ComparerCollection)):
+            raise TypeError(f"Cannot add {type(other)} to {type(self)}")
+
+        if isinstance(other, Comparer) and (self.name == other.name):
+            missing_models = set(self.mod_names) - set(other.mod_names)
+            if len(missing_models) == 0:
+                # same obs name and same model names
+                cmp = self.copy()
+                cmp.data = xr.concat(
+                    [cmp.data, other.data], dim="time"
+                ).drop_duplicates("time")
+
+            else:
+                raw_mod_data = self.raw_mod_data.copy()
+                raw_mod_data.update(other.raw_mod_data)  # TODO!
+                matched = match_space_time(
+                    observation=self._to_observation(),
+                    raw_mod_data=raw_mod_data,  # type: ignore
+                )
+                cmp = Comparer(matched_data=matched, raw_mod_data=raw_mod_data)
+
+            return cmp
+        else:
+            if isinstance(other, Comparer):
+                return ComparerCollection([self, other])
+            elif isinstance(other, ComparerCollection):
+                return ComparerCollection([self, *other])
+
+    def sel(
+        self,
+        model: Optional[IdxOrNameTypes] = None,
+        start: Optional[TimeTypes] = None,
+        end: Optional[TimeTypes] = None,
+        time: Optional[TimeTypes] = None,
+        area: Optional[List[float]] = None,
+    ) -> "Comparer":
+        """Select data based on model, time and/or area.
 
-        d = self.data
-        raw_mod_data = self.raw_mod_data
-        if model is not None:
-            if isinstance(model, (str, int)):
-                models = [model]
-            else:
-                models = list(model)
-            mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]
-            dropped_models = [m for m in self.mod_names if m not in mod_names]
-            d = d.drop_vars(dropped_models)
-            raw_mod_data = {m: raw_mod_data[m] for m in mod_names}
-        if (start is not None) or (end is not None):
-            # TODO: can this be done without to_index? (simplify)
-            d = d.sel(time=d.time.to_index().to_frame().loc[start:end].index)  # type: ignore
-
-            # Note: if user asks for a specific time, we also filter raw
-            raw_mod_data = {
-                k: v.sel(time=slice(start, end)) for k, v in raw_mod_data.items()
-            }  # type: ignore
-        if time is not None:
-            d = d.sel(time=time)
-
-            # Note: if user asks for a specific time, we also filter raw
-            raw_mod_data = {k: v.sel(time=time) for k, v in raw_mod_data.items()}
-        if area is not None:
-            if _area_is_bbox(area):
-                x0, y0, x1, y1 = area
-                mask = (d.x > x0) & (d.x < x1) & (d.y > y0) & (d.y < y1)
-            elif _area_is_polygon(area):
-                polygon = np.array(area)
-                xy = np.column_stack((d.x, d.y))
-                mask = _inside_polygon(polygon, xy)
-            else:
-                raise ValueError("area supports bbox [x0,y0,x1,y1] and closed polygon")
-            if self.gtype == "point":
-                # if False, return empty data
-                d = d if mask else d.isel(time=slice(None, 0))
-            else:
-                d = d.isel(time=mask)
-        return Comparer.from_matched_data(data=d, raw_mod_data=raw_mod_data)
-
-    def where(
-        self,
-        cond: Union[bool, np.ndarray, xr.DataArray],
-    ) -> "Comparer":
-        """Return a new Comparer with values where cond is True
-
-        Parameters
-        ----------
-        cond : bool, np.ndarray, xr.DataArray
-            This selects the values to return.
-
-        Returns
-        -------
-        Comparer
-            New Comparer with values where cond is True and other otherwise.
-
-        Examples
-        --------
-        >>> c2 = c.where(c.data.Observation > 0)
-        """
-        d = self.data.where(cond, other=np.nan)
-        d = d.dropna(dim="time", how="all")
-        return Comparer.from_matched_data(d, self.raw_mod_data)
-
-    def query(self, query: str) -> "Comparer":
-        """Return a new Comparer with values where query cond is True
+        Parameters
+        ----------
+        model : str or int or list of str or list of int, optional
+            Model name or index. If None, all models are selected.
+        start : str or datetime, optional
+            Start time. If None, all times are selected.
+        end : str or datetime, optional
+            End time. If None, all times are selected.
+        time : str or datetime, optional
+            Time. If None, all times are selected.
+        area : list of float, optional
+            bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.
+
+        Returns
+        -------
+        Comparer
+            New Comparer with selected data.
+        """
+        if (time is not None) and ((start is not None) or (end is not None)):
+            raise ValueError("Cannot use both time and start/end")
+
+        d = self.data
+        raw_mod_data = self.raw_mod_data
+        if model is not None:
+            if isinstance(model, (str, int)):
+                models = [model]
+            else:
+                models = list(model)
+            mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]
+            dropped_models = [m for m in self.mod_names if m not in mod_names]
+            d = d.drop_vars(dropped_models)
+            raw_mod_data = {m: raw_mod_data[m] for m in mod_names}
+        if (start is not None) or (end is not None):
+            # TODO: can this be done without to_index? (simplify)
+            d = d.sel(time=d.time.to_index().to_frame().loc[start:end].index)  # type: ignore
+
+            # Note: if user asks for a specific time, we also filter raw
+            raw_mod_data = {
+                k: v.sel(time=slice(start, end)) for k, v in raw_mod_data.items()
+            }  # type: ignore
+        if time is not None:
+            d = d.sel(time=time)
+
+            # Note: if user asks for a specific time, we also filter raw
+            raw_mod_data = {k: v.sel(time=time) for k, v in raw_mod_data.items()}
+        if area is not None:
+            if _area_is_bbox(area):
+                x0, y0, x1, y1 = area
+                mask = (d.x > x0) & (d.x < x1) & (d.y > y0) & (d.y < y1)
+            elif _area_is_polygon(area):
+                polygon = np.array(area)
+                xy = np.column_stack((d.x, d.y))
+                mask = _inside_polygon(polygon, xy)
+            else:
+                raise ValueError("area supports bbox [x0,y0,x1,y1] and closed polygon")
+            if self.gtype == "point":
+                # if False, return empty data
+                d = d if mask else d.isel(time=slice(None, 0))
+            else:
+                d = d.isel(time=mask)
+        return Comparer.from_matched_data(data=d, raw_mod_data=raw_mod_data)
+
+    def where(
+        self,
+        cond: Union[bool, np.ndarray, xr.DataArray],
+    ) -> "Comparer":
+        """Return a new Comparer with values where cond is True
 
         Parameters
         ----------
-        query : str
-            Query string, see pandas.DataFrame.query
+        cond : bool, np.ndarray, xr.DataArray
+            This selects the values to return.
 
         Returns
         -------
@@ -3546,545 +3525,566 @@ 

Examples -------- - >>> c2 = c.query("Observation > 0") + >>> c2 = c.where(c.data.Observation > 0) """ - d = self.data.query({"time": query}) + d = self.data.where(cond, other=np.nan) d = d.dropna(dim="time", how="all") return Comparer.from_matched_data(d, self.raw_mod_data) - def _to_long_dataframe( - self, attrs_keys: Iterable[str] | None = None - ) -> pd.DataFrame: - """Return a copy of the data as a long-format pandas DataFrame (for groupby operations)""" - - data = self.data.drop_vars("z", errors="ignore") - - # this step is necessary since we keep arbitrary derived data in the dataset, but not z - # i.e. using a hardcoded whitelist of variables to keep is less flexible - id_vars = [v for v in data.variables if v not in self.mod_names] - - attrs = ( - {key: data.attrs.get(key, False) for key in attrs_keys} - if attrs_keys - else {} - ) - - df = ( - data.to_dataframe() - .reset_index() - .melt( - value_vars=self.mod_names, - var_name="model", - value_name="mod_val", - id_vars=id_vars, - ) - .rename(columns={self._obs_str: "obs_val"}) - .assign(observation=self.name) - .assign(**attrs) - .astype({"model": "category", "observation": "category"}) - ) + def query(self, query: str) -> "Comparer": + """Return a new Comparer with values where query cond is True + + Parameters + ---------- + query : str + Query string, see pandas.DataFrame.query + + Returns + ------- + Comparer + New Comparer with values where cond is True and other otherwise. + + Examples + -------- + >>> c2 = c.query("Observation > 0") + """ + d = self.data.query({"time": query}) + d = d.dropna(dim="time", how="all") + return Comparer.from_matched_data(d, self.raw_mod_data) + + def _to_long_dataframe( + self, attrs_keys: Iterable[str] | None = None + ) -> pd.DataFrame: + """Return a copy of the data as a long-format pandas DataFrame (for groupby operations)""" + + data = self.data.drop_vars("z", errors="ignore") + + # this step is necessary since we keep arbitrary derived data in the dataset, but not z + # i.e. using a hardcoded whitelist of variables to keep is less flexible + id_vars = [v for v in data.variables if v not in self.mod_names] - return df - - def skill( - self, - by: str | Iterable[str] | None = None, - metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None, - **kwargs: Any, - ) -> SkillTable: - """Skill assessment of model(s) - - Parameters - ---------- - by : str or List[str], optional - group by, by default ["model"] - - - by column name - - by temporal bin of the DateTimeIndex via the freq-argument - (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily - - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the - syntax 'dt:month'. The dt-argument is different from the freq-argument - in that it gives month-of-year rather than month-of-data. - metrics : list, optional - list of modelskill.metrics, by default modelskill.options.metrics.list - - Returns - ------- - SkillTable - skill assessment object - - See also - -------- - sel - a method for filtering/selecting data - - Examples - -------- - >>> import modelskill as ms - >>> cc = ms.match(c2, mod) - >>> cc['c2'].skill().round(2) - n bias rmse urmse mae cc si r2 - observation - c2 113 -0.00 0.35 0.35 0.29 0.97 0.12 0.99 - - >>> cc['c2'].skill(by='freq:D').round(2) - n bias rmse urmse mae cc si r2 - 2017-10-27 72 -0.19 0.31 0.25 0.26 0.48 0.12 0.98 - 2017-10-28 0 NaN NaN NaN NaN NaN NaN NaN - 2017-10-29 41 0.33 0.41 0.25 0.36 0.96 0.06 0.99 - """ - metrics = _parse_metric(metrics, directional=self.quantity.is_directional) - - # TODO remove in v1.1 - model, start, end, area = _get_deprecated_args(kwargs) # type: ignore - if kwargs != {}: - raise AttributeError(f"Unknown keyword arguments: {kwargs}") - - cmp = self.sel( - model=model, - start=start, - end=end, - area=area, - ) - if cmp.n_points == 0: - raise ValueError("No data selected for skill assessment") - - by = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=1) - - df = cmp._to_long_dataframe() - res = _groupby_df(df, by=by, metrics=metrics) - res["x"] = np.nan if self.gtype == "track" else cmp.x - res["y"] = np.nan if self.gtype == "track" else cmp.y - res = self._add_as_col_if_not_in_index(df, skilldf=res) - return SkillTable(res) - - def _add_as_col_if_not_in_index( - self, df: pd.DataFrame, skilldf: pd.DataFrame - ) -> pd.DataFrame: - """Add a field to skilldf if unique in df""" - FIELDS = ("observation", "model") - - for field in FIELDS: - if (field == "model") and (self.n_models <= 1): - continue - if field not in skilldf.index.names: - unames = df[field].unique() - if len(unames) == 1: - skilldf.insert(loc=0, column=field, value=unames[0]) - return skilldf - - def score( - self, - metric: str | Callable = mtr.rmse, - **kwargs: Any, - ) -> Dict[str, float]: - """Model skill score - - Parameters - ---------- - metric : list, optional - a single metric from modelskill.metrics, by default rmse + attrs = ( + {key: data.attrs.get(key, False) for key in attrs_keys} + if attrs_keys + else {} + ) + + df = ( + data.to_dataframe() + .reset_index() + .melt( + value_vars=self.mod_names, + var_name="model", + value_name="mod_val", + id_vars=id_vars, + ) + .rename(columns={self._obs_str: "obs_val"}) + .assign(observation=self.name) + .assign(**attrs) + .astype({"model": "category", "observation": "category"}) + ) + + return df + + def skill( + self, + by: str | Iterable[str] | None = None, + metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None, + **kwargs: Any, + ) -> SkillTable: + """Skill assessment of model(s) + + Parameters + ---------- + by : str or List[str], optional + group by, by default ["model"] + + - by column name + - by temporal bin of the DateTimeIndex via the freq-argument + (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily + - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the + syntax 'dt:month'. The dt-argument is different from the freq-argument + in that it gives month-of-year rather than month-of-data. + metrics : list, optional + list of modelskill.metrics, by default modelskill.options.metrics.list + + Returns + ------- + SkillTable + skill assessment object + + See also + -------- + sel + a method for filtering/selecting data + + Examples + -------- + >>> import modelskill as ms + >>> cc = ms.match(c2, mod) + >>> cc['c2'].skill().round(2) + n bias rmse urmse mae cc si r2 + observation + c2 113 -0.00 0.35 0.35 0.29 0.97 0.12 0.99 + + >>> cc['c2'].skill(by='freq:D').round(2) + n bias rmse urmse mae cc si r2 + 2017-10-27 72 -0.19 0.31 0.25 0.26 0.48 0.12 0.98 + 2017-10-28 0 NaN NaN NaN NaN NaN NaN NaN + 2017-10-29 41 0.33 0.41 0.25 0.36 0.96 0.06 0.99 + """ + metrics = _parse_metric(metrics, directional=self.quantity.is_directional) + + # TODO remove in v1.1 + model, start, end, area = _get_deprecated_args(kwargs) # type: ignore + if kwargs != {}: + raise AttributeError(f"Unknown keyword arguments: {kwargs}") + + cmp = self.sel( + model=model, + start=start, + end=end, + area=area, + ) + if cmp.n_points == 0: + raise ValueError("No data selected for skill assessment") + + by = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=1) + + df = cmp._to_long_dataframe() + res = _groupby_df(df, by=by, metrics=metrics) + res["x"] = np.nan if self.gtype == "track" else cmp.x + res["y"] = np.nan if self.gtype == "track" else cmp.y + res = self._add_as_col_if_not_in_index(df, skilldf=res) + return SkillTable(res) + + def _add_as_col_if_not_in_index( + self, df: pd.DataFrame, skilldf: pd.DataFrame + ) -> pd.DataFrame: + """Add a field to skilldf if unique in df""" + FIELDS = ("observation", "model") - Returns - ------- - dict[str, float] - skill score as a single number (for each model) - - See also - -------- - skill - a method for skill assessment returning a pd.DataFrame - - Examples - -------- - >>> import modelskill as ms - >>> cmp = ms.match(c2, mod) - >>> cmp.score() - {'mod': 0.3517964910888918} - - >>> cmp.score(metric="mape") - {'mod': 11.567399646108198} - """ - metric = _parse_metric(metric)[0] - if not (callable(metric) or isinstance(metric, str)): - raise ValueError("metric must be a string or a function") - - # TODO remove in v1.1 - model, start, end, area = _get_deprecated_args(kwargs) # type: ignore - assert kwargs == {}, f"Unknown keyword arguments: {kwargs}" - - sk = self.skill( - by=["model", "observation"], - metrics=[metric], - model=model, # deprecated - start=start, # deprecated - end=end, # deprecated - area=area, # deprecated - ) - df = sk.to_dataframe() + for field in FIELDS: + if (field == "model") and (self.n_models <= 1): + continue + if field not in skilldf.index.names: + unames = df[field].unique() + if len(unames) == 1: + skilldf.insert(loc=0, column=field, value=unames[0]) + return skilldf + + def score( + self, + metric: str | Callable = mtr.rmse, + **kwargs: Any, + ) -> Dict[str, float]: + """Model skill score + + Parameters + ---------- + metric : list, optional + a single metric from modelskill.metrics, by default rmse + + Returns + ------- + dict[str, float] + skill score as a single number (for each model) + + See also + -------- + skill + a method for skill assessment returning a pd.DataFrame + + Examples + -------- + >>> import modelskill as ms + >>> cmp = ms.match(c2, mod) + >>> cmp.score() + {'mod': 0.3517964910888918} - metric_name = metric if isinstance(metric, str) else metric.__name__ - ser = df.reset_index().groupby("model", observed=True)[metric_name].mean() - score = {str(k): float(v) for k, v in ser.items()} - return score - - def gridded_skill( - self, - bins: int = 5, - binsize: float | None = None, - by: str | Iterable[str] | None = None, - metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None, - n_min: int | None = None, - **kwargs: Any, - ): - """Aggregated spatial skill assessment of model(s) on a regular spatial grid. - - Parameters - ---------- - bins: int, list of scalars, or IntervalIndex, or tuple of, optional - criteria to bin x and y by, argument bins to pd.cut(), default 5 - define different bins for x and y a tuple - e.g.: bins = 5, bins = (5,[2,3,5]) - binsize : float, optional - bin size for x and y dimension, overwrites bins - creates bins with reference to round(mean(x)), round(mean(y)) - by : (str, List[str]), optional - group by column name or by temporal bin via the freq-argument - (using pandas pd.Grouper(freq)), - e.g.: 'freq:M' = monthly; 'freq:D' daily - by default ["model","observation"] - metrics : list, optional - list of modelskill.metrics, by default modelskill.options.metrics.list - n_min : int, optional - minimum number of observations in a grid cell; - cells with fewer observations get a score of `np.nan` - - Returns - ------- - SkillGrid - skill assessment as a SkillGrid object - - See also - -------- - skill - a method for aggregated skill assessment - - Examples - -------- - >>> import modelskill as ms - >>> cmp = ms.match(c2, mod) # satellite altimeter vs. model - >>> cmp.gridded_skill(metrics='bias') - <xarray.Dataset> - Dimensions: (x: 5, y: 5) - Coordinates: - observation 'alti' - * x (x) float64 -0.436 1.543 3.517 5.492 7.466 - * y (y) float64 50.6 51.66 52.7 53.75 54.8 - Data variables: - n (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76 - bias (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143 - - >>> gs = cc.gridded_skill(binsize=0.5) - >>> gs.data.coords - Coordinates: - observation 'alti' - * x (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5 - * y (y) float64 51.5 52.5 53.5 54.5 55.5 56.5 - """ - - # TODO remove in v1.1 - model, start, end, area = _get_deprecated_args(kwargs) - assert kwargs == {}, f"Unknown keyword arguments: {kwargs}" - - cmp = self.sel( - model=model, - start=start, - end=end, - area=area, - ) - - metrics = _parse_metric(metrics) - if cmp.n_points == 0: - raise ValueError("No data to compare") - - df = cmp._to_long_dataframe() - df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize) - - agg_cols = _parse_groupby(by=by, n_mod=cmp.n_models, n_qnt=1) - if "x" not in agg_cols: - agg_cols.insert(0, "x") - if "y" not in agg_cols: - agg_cols.insert(0, "y") - - df = df.drop(columns=["x", "y"]).rename(columns=dict(xBin="x", yBin="y")) - res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min) - ds = res.to_xarray().squeeze() - - # change categorial index to coordinates - for dim in ("x", "y"): - ds[dim] = ds[dim].astype(float) + >>> cmp.score(metric="mape") + {'mod': 11.567399646108198} + """ + metric = _parse_metric(metric)[0] + if not (callable(metric) or isinstance(metric, str)): + raise ValueError("metric must be a string or a function") + + # TODO remove in v1.1 + model, start, end, area = _get_deprecated_args(kwargs) # type: ignore + assert kwargs == {}, f"Unknown keyword arguments: {kwargs}" + + sk = self.skill( + by=["model", "observation"], + metrics=[metric], + model=model, # deprecated + start=start, # deprecated + end=end, # deprecated + area=area, # deprecated + ) + df = sk.to_dataframe() + + metric_name = metric if isinstance(metric, str) else metric.__name__ + ser = df.reset_index().groupby("model", observed=True)[metric_name].mean() + score = {str(k): float(v) for k, v in ser.items()} + return score + + def gridded_skill( + self, + bins: int = 5, + binsize: float | None = None, + by: str | Iterable[str] | None = None, + metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None, + n_min: int | None = None, + **kwargs: Any, + ): + """Aggregated spatial skill assessment of model(s) on a regular spatial grid. + + Parameters + ---------- + bins: int, list of scalars, or IntervalIndex, or tuple of, optional + criteria to bin x and y by, argument bins to pd.cut(), default 5 + define different bins for x and y a tuple + e.g.: bins = 5, bins = (5,[2,3,5]) + binsize : float, optional + bin size for x and y dimension, overwrites bins + creates bins with reference to round(mean(x)), round(mean(y)) + by : (str, List[str]), optional + group by column name or by temporal bin via the freq-argument + (using pandas pd.Grouper(freq)), + e.g.: 'freq:M' = monthly; 'freq:D' daily + by default ["model","observation"] + metrics : list, optional + list of modelskill.metrics, by default modelskill.options.metrics.list + n_min : int, optional + minimum number of observations in a grid cell; + cells with fewer observations get a score of `np.nan` + + Returns + ------- + SkillGrid + skill assessment as a SkillGrid object + + See also + -------- + skill + a method for aggregated skill assessment + + Examples + -------- + >>> import modelskill as ms + >>> cmp = ms.match(c2, mod) # satellite altimeter vs. model + >>> cmp.gridded_skill(metrics='bias') + <xarray.Dataset> + Dimensions: (x: 5, y: 5) + Coordinates: + observation 'alti' + * x (x) float64 -0.436 1.543 3.517 5.492 7.466 + * y (y) float64 50.6 51.66 52.7 53.75 54.8 + Data variables: + n (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76 + bias (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143 + + >>> gs = cc.gridded_skill(binsize=0.5) + >>> gs.data.coords + Coordinates: + observation 'alti' + * x (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5 + * y (y) float64 51.5 52.5 53.5 54.5 55.5 56.5 + """ + + # TODO remove in v1.1 + model, start, end, area = _get_deprecated_args(kwargs) + assert kwargs == {}, f"Unknown keyword arguments: {kwargs}" + + cmp = self.sel( + model=model, + start=start, + end=end, + area=area, + ) - return SkillGrid(ds) - - @property - def _residual(self) -> np.ndarray: - df = self.data.drop_vars(["x", "y", "z"]).to_dataframe() - obs = df[self._obs_str].values - mod = df[self.mod_names].values - return mod - np.vstack(obs) - - def remove_bias( - self, correct: Literal["Model", "Observation"] = "Model" - ) -> Comparer: - cmp = self.copy() - - bias = cmp._residual.mean(axis=0) - if correct == "Model": - for j in range(cmp.n_models): - mod_name = cmp.mod_names[j] - mod_ts = cmp.raw_mod_data[mod_name] - with xr.set_options(keep_attrs=True): # type: ignore - mod_ts.data[mod_name].values = mod_ts.values - bias[j] - cmp.data[mod_name].values = cmp.data[mod_name].values - bias[j] - elif correct == "Observation": - # what if multiple models? - with xr.set_options(keep_attrs=True): # type: ignore - cmp.data[cmp._obs_str].values = cmp.data[cmp._obs_str].values + bias - else: - raise ValueError( - f"Unknown correct={correct}. Only know 'Model' and 'Observation'" - ) - return cmp - - def to_dataframe(self) -> pd.DataFrame: - """Convert matched data to pandas DataFrame + metrics = _parse_metric(metrics) + if cmp.n_points == 0: + raise ValueError("No data to compare") + + df = cmp._to_long_dataframe() + df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize) + + agg_cols = _parse_groupby(by=by, n_mod=cmp.n_models, n_qnt=1) + if "x" not in agg_cols: + agg_cols.insert(0, "x") + if "y" not in agg_cols: + agg_cols.insert(0, "y") + + df = df.drop(columns=["x", "y"]).rename(columns=dict(xBin="x", yBin="y")) + res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min) + ds = res.to_xarray().squeeze() + + # change categorial index to coordinates + for dim in ("x", "y"): + ds[dim] = ds[dim].astype(float) + + return SkillGrid(ds) + + @property + def _residual(self) -> np.ndarray: + df = self.data.drop_vars(["x", "y", "z"]).to_dataframe() + obs = df[self._obs_str].values + mod = df[self.mod_names].values + return mod - np.vstack(obs) + + def remove_bias( + self, correct: Literal["Model", "Observation"] = "Model" + ) -> Comparer: + cmp = self.copy() - Include x, y coordinates only if gtype=track - - Returns - ------- - pd.DataFrame - data as a pandas DataFrame - """ - if self.gtype == str(GeometryType.POINT): - # we remove the scalar coordinate variables as they - # will otherwise be columns in the dataframe - return self.data.drop_vars(["x", "y", "z"]).to_dataframe() - elif self.gtype == str(GeometryType.TRACK): - df = self.data.drop_vars(["z"]).to_dataframe() - # make sure that x, y cols are first - cols = ["x", "y"] + [c for c in df.columns if c not in ["x", "y"]] - return df[cols] - else: - raise NotImplementedError(f"Unknown gtype: {self.gtype}") - - def save(self, filename: Union[str, Path]) -> None: - """Save to netcdf file - - Parameters - ---------- - filename : str or Path - filename - """ - ds = self.data - - # add self.raw_mod_data to ds with prefix 'raw_' to avoid name conflicts - # an alternative strategy would be to use NetCDF groups - # https://docs.xarray.dev/en/stable/user-guide/io.html#groups - - # There is no need to save raw data for track data, since it is identical to the matched data - if self.gtype == "point": - ds = self.data.copy() # copy needed to avoid modifying self.data - - for key, ts_mod in self.raw_mod_data.items(): - ts_mod = ts_mod.copy() - # rename time to unique name - ts_mod.data = ts_mod.data.rename({"time": "_time_raw_" + key}) - # da = ds_mod.to_xarray()[key] - ds["_raw_" + key] = ts_mod.data[key] - - ds.to_netcdf(filename) - - @staticmethod - def load(filename: Union[str, Path]) -> "Comparer": - """Load from netcdf file + bias = cmp._residual.mean(axis=0) + if correct == "Model": + for j in range(cmp.n_models): + mod_name = cmp.mod_names[j] + mod_ts = cmp.raw_mod_data[mod_name] + with xr.set_options(keep_attrs=True): # type: ignore + mod_ts.data[mod_name].values = mod_ts.values - bias[j] + cmp.data[mod_name].values = cmp.data[mod_name].values - bias[j] + elif correct == "Observation": + # what if multiple models? + with xr.set_options(keep_attrs=True): # type: ignore + cmp.data[cmp._obs_str].values = cmp.data[cmp._obs_str].values + bias + else: + raise ValueError( + f"Unknown correct={correct}. Only know 'Model' and 'Observation'" + ) + return cmp + + def to_dataframe(self) -> pd.DataFrame: + """Convert matched data to pandas DataFrame + + Include x, y coordinates only if gtype=track + + Returns + ------- + pd.DataFrame + data as a pandas DataFrame + """ + if self.gtype == str(GeometryType.POINT): + # we remove the scalar coordinate variables as they + # will otherwise be columns in the dataframe + return self.data.drop_vars(["x", "y", "z"]).to_dataframe() + elif self.gtype == str(GeometryType.TRACK): + df = self.data.drop_vars(["z"]).to_dataframe() + # make sure that x, y cols are first + cols = ["x", "y"] + [c for c in df.columns if c not in ["x", "y"]] + return df[cols] + else: + raise NotImplementedError(f"Unknown gtype: {self.gtype}") + + def save(self, filename: Union[str, Path]) -> None: + """Save to netcdf file + + Parameters + ---------- + filename : str or Path + filename + """ + ds = self.data - Parameters - ---------- - filename : str or Path - filename - - Returns - ------- - Comparer - """ - with xr.open_dataset(filename) as ds: - data = ds.load() - - if data.gtype == "track": - return Comparer(matched_data=data) + # add self.raw_mod_data to ds with prefix 'raw_' to avoid name conflicts + # an alternative strategy would be to use NetCDF groups + # https://docs.xarray.dev/en/stable/user-guide/io.html#groups + + # There is no need to save raw data for track data, since it is identical to the matched data + if self.gtype == "point": + ds = self.data.copy() # copy needed to avoid modifying self.data + + for key, ts_mod in self.raw_mod_data.items(): + ts_mod = ts_mod.copy() + # rename time to unique name + ts_mod.data = ts_mod.data.rename({"time": "_time_raw_" + key}) + # da = ds_mod.to_xarray()[key] + ds["_raw_" + key] = ts_mod.data[key] - if data.gtype == "point": - raw_mod_data: Dict[str, TimeSeries] = {} - - for var in data.data_vars: - var_name = str(var) - if var_name[:5] == "_raw_": - new_key = var_name[5:] # remove prefix '_raw_' - ds = data[[var_name]].rename( - {"_time_raw_" + new_key: "time", var_name: new_key} - ) - ts = PointObservation(data=ds, name=new_key) - # TODO: name of time? - # ts.name = new_key - # df = ( - # data[var_name] - # .to_dataframe() - # .rename( - # columns={"_time_raw_" + new_key: "time", var_name: new_key} - # ) - # ) - raw_mod_data[new_key] = ts - - # data = data.drop(var_name).drop("_time_raw_" + new_key) + ds.to_netcdf(filename) + + @staticmethod + def load(filename: Union[str, Path]) -> "Comparer": + """Load from netcdf file + + Parameters + ---------- + filename : str or Path + filename + + Returns + ------- + Comparer + """ + with xr.open_dataset(filename) as ds: + data = ds.load() + + if data.gtype == "track": + return Comparer(matched_data=data) + + if data.gtype == "point": + raw_mod_data: Dict[str, TimeSeries] = {} - # filter variables, only keep the ones with a 'time' dimension - data = data[[v for v in data.data_vars if "time" in data[v].dims]] - - return Comparer(matched_data=data, raw_mod_data=raw_mod_data) - - else: - raise NotImplementedError(f"Unknown gtype: {data.gtype}") - - # =============== Deprecated methods =============== - - def spatial_skill( - self, - bins=5, - binsize=None, - by=None, - metrics=None, - n_min=None, - **kwargs, - ): - # deprecated - warnings.warn( - "spatial_skill is deprecated, use gridded_skill instead", FutureWarning - ) - return self.gridded_skill( - bins=bins, - binsize=binsize, - by=by, - metrics=metrics, - n_min=n_min, - **kwargs, - ) - - # TODO remove plotting methods in v1.1 - def scatter( - self, - *, - bins=120, - quantiles=None, - fit_to_quantiles=False, - show_points=None, - show_hist=None, - show_density=None, - norm=None, - backend="matplotlib", - figsize=(8, 8), - xlim=None, - ylim=None, - reg_method="ols", - title=None, - xlabel=None, - ylabel=None, - skill_table=None, - **kwargs, - ): - warnings.warn( - "This method is deprecated, use plot.scatter instead", FutureWarning - ) - - # TODO remove in v1.1 - model, start, end, area = _get_deprecated_args(kwargs) - - # self.plot.scatter( - self.sel( - model=model, - start=start, - end=end, - area=area, - ).plot.scatter( - bins=bins, - quantiles=quantiles, - fit_to_quantiles=fit_to_quantiles, - show_points=show_points, - show_hist=show_hist, - show_density=show_density, - norm=norm, - backend=backend, - figsize=figsize, - xlim=xlim, - ylim=ylim, - reg_method=reg_method, - title=title, - xlabel=xlabel, - ylabel=ylabel, - **kwargs, - ) - - def taylor( - self, - normalize_std=False, - figsize=(7, 7), - marker="o", - marker_size=6.0, - title="Taylor diagram", - **kwargs, - ): - warnings.warn("taylor is deprecated, use plot.taylor instead", FutureWarning) - - self.plot.taylor( - normalize_std=normalize_std, - figsize=figsize, - marker=marker, - marker_size=marker_size, - title=title, - **kwargs, - ) - - def hist( - self, *, model=None, bins=100, title=None, density=True, alpha=0.5, **kwargs - ): - warnings.warn("hist is deprecated. Use plot.hist instead.", FutureWarning) - return self.plot.hist( - model=model, bins=bins, title=title, density=density, alpha=alpha, **kwargs - ) - - def kde(self, ax=None, **kwargs) -> Axes: - warnings.warn("kde is deprecated. Use plot.kde instead.", FutureWarning) - - return self.plot.kde(ax=ax, **kwargs) - - def plot_timeseries( - self, title=None, *, ylim=None, figsize=None, backend="matplotlib", **kwargs - ): - warnings.warn( - "plot_timeseries is deprecated. Use plot.timeseries instead.", FutureWarning - ) - - return self.plot.timeseries( - title=title, ylim=ylim, figsize=figsize, backend=backend, **kwargs - ) - - def residual_hist(self, bins=100, title=None, color=None, **kwargs): - warnings.warn( - "residual_hist is deprecated. Use plot.residual_hist instead.", - FutureWarning, - ) - - return self.plot.residual_hist(bins=bins, title=title, color=color, **kwargs) + for var in data.data_vars: + var_name = str(var) + if var_name[:5] == "_raw_": + new_key = var_name[5:] # remove prefix '_raw_' + ds = data[[var_name]].rename( + {"_time_raw_" + new_key: "time", var_name: new_key} + ) + ts = PointObservation(data=ds, name=new_key) + # TODO: name of time? + # ts.name = new_key + # df = ( + # data[var_name] + # .to_dataframe() + # .rename( + # columns={"_time_raw_" + new_key: "time", var_name: new_key} + # ) + # ) + raw_mod_data[new_key] = ts + + # data = data.drop(var_name).drop("_time_raw_" + new_key) + + # filter variables, only keep the ones with a 'time' dimension + data = data[[v for v in data.data_vars if "time" in data[v].dims]] + + return Comparer(matched_data=data, raw_mod_data=raw_mod_data) + + else: + raise NotImplementedError(f"Unknown gtype: {data.gtype}") + + # =============== Deprecated methods =============== + + def spatial_skill( + self, + bins=5, + binsize=None, + by=None, + metrics=None, + n_min=None, + **kwargs, + ): + # deprecated + warnings.warn( + "spatial_skill is deprecated, use gridded_skill instead", FutureWarning + ) + return self.gridded_skill( + bins=bins, + binsize=binsize, + by=by, + metrics=metrics, + n_min=n_min, + **kwargs, + ) + + # TODO remove plotting methods in v1.1 + def scatter( + self, + *, + bins=120, + quantiles=None, + fit_to_quantiles=False, + show_points=None, + show_hist=None, + show_density=None, + norm=None, + backend="matplotlib", + figsize=(8, 8), + xlim=None, + ylim=None, + reg_method="ols", + title=None, + xlabel=None, + ylabel=None, + skill_table=None, + **kwargs, + ): + warnings.warn( + "This method is deprecated, use plot.scatter instead", FutureWarning + ) + + # TODO remove in v1.1 + model, start, end, area = _get_deprecated_args(kwargs) + + # self.plot.scatter( + self.sel( + model=model, + start=start, + end=end, + area=area, + ).plot.scatter( + bins=bins, + quantiles=quantiles, + fit_to_quantiles=fit_to_quantiles, + show_points=show_points, + show_hist=show_hist, + show_density=show_density, + norm=norm, + backend=backend, + figsize=figsize, + xlim=xlim, + ylim=ylim, + reg_method=reg_method, + title=title, + xlabel=xlabel, + ylabel=ylabel, + **kwargs, + ) + + def taylor( + self, + normalize_std=False, + figsize=(7, 7), + marker="o", + marker_size=6.0, + title="Taylor diagram", + **kwargs, + ): + warnings.warn("taylor is deprecated, use plot.taylor instead", FutureWarning) + + self.plot.taylor( + normalize_std=normalize_std, + figsize=figsize, + marker=marker, + marker_size=marker_size, + title=title, + **kwargs, + ) + + def hist( + self, *, model=None, bins=100, title=None, density=True, alpha=0.5, **kwargs + ): + warnings.warn("hist is deprecated. Use plot.hist instead.", FutureWarning) + return self.plot.hist( + model=model, bins=bins, title=title, density=density, alpha=alpha, **kwargs + ) + + def kde(self, ax=None, **kwargs) -> Axes: + warnings.warn("kde is deprecated. Use plot.kde instead.", FutureWarning) + + return self.plot.kde(ax=ax, **kwargs) + + def plot_timeseries( + self, title=None, *, ylim=None, figsize=None, backend="matplotlib", **kwargs + ): + warnings.warn( + "plot_timeseries is deprecated. Use plot.timeseries instead.", FutureWarning + ) + + return self.plot.timeseries( + title=title, ylim=ylim, figsize=figsize, backend=backend, **kwargs + ) + + def residual_hist(self, bins=100, title=None, color=None, **kwargs): + warnings.warn( + "residual_hist is deprecated. Use plot.residual_hist instead.", + FutureWarning, + ) + + return self.plot.residual_hist(bins=bins, title=title, color=color, **kwargs)

@@ -4459,28 +4459,7 @@

Source code in modelskill/comparison/_comparison.py -
510
-511
-512
-513
-514
-515
-516
-517
-518
-519
-520
-521
-522
-523
-524
-525
-526
-527
-528
-529
-530
-531
+              
531
 532
 533
 534
@@ -4492,40 +4471,61 @@ 

540 541 542 -543

@staticmethod
-def from_matched_data(
-    data: xr.Dataset | pd.DataFrame,
-    raw_mod_data: Optional[Dict[str, TimeSeries]] = None,
-    obs_item: str | int | None = None,
-    mod_items: Optional[Iterable[str | int]] = None,
-    aux_items: Optional[Iterable[str | int]] = None,
-    name: Optional[str] = None,
-    weight: float = 1.0,
-    x: Optional[float] = None,
-    y: Optional[float] = None,
-    z: Optional[float] = None,
-    x_item: str | int | None = None,
-    y_item: str | int | None = None,
-    quantity: Optional[Quantity] = None,
-) -> "Comparer":
-    """Initialize from compared data"""
-    if not isinstance(data, xr.Dataset):
-        # TODO: handle raw_mod_data by accessing data.attrs["kind"] and only remove nan after
-        data = _matched_data_to_xarray(
-            data,
-            obs_item=obs_item,
-            mod_items=mod_items,
-            aux_items=aux_items,
-            name=name,
-            x=x,
-            y=y,
-            z=z,
-            x_item=x_item,
-            y_item=y_item,
-            quantity=quantity,
-        )
-        data.attrs["weight"] = weight
-    return Comparer(matched_data=data, raw_mod_data=raw_mod_data)
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
@staticmethod
+def from_matched_data(
+    data: xr.Dataset | pd.DataFrame,
+    raw_mod_data: Optional[Dict[str, TimeSeries]] = None,
+    obs_item: str | int | None = None,
+    mod_items: Optional[Iterable[str | int]] = None,
+    aux_items: Optional[Iterable[str | int]] = None,
+    name: Optional[str] = None,
+    weight: float = 1.0,
+    x: Optional[float] = None,
+    y: Optional[float] = None,
+    z: Optional[float] = None,
+    x_item: str | int | None = None,
+    y_item: str | int | None = None,
+    quantity: Optional[Quantity] = None,
+) -> "Comparer":
+    """Initialize from compared data"""
+    if not isinstance(data, xr.Dataset):
+        # TODO: handle raw_mod_data by accessing data.attrs["kind"] and only remove nan after
+        data = _matched_data_to_xarray(
+            data,
+            obs_item=obs_item,
+            mod_items=mod_items,
+            aux_items=aux_items,
+            name=name,
+            x=x,
+            y=y,
+            z=z,
+            x_item=x_item,
+            y_item=y_item,
+            quantity=quantity,
+        )
+        data.attrs["weight"] = weight
+    return Comparer(matched_data=data, raw_mod_data=raw_mod_data)
 
@@ -4693,28 +4693,7 @@

Source code in modelskill/comparison/_comparison.py -
1131
-1132
-1133
-1134
-1135
-1136
-1137
-1138
-1139
-1140
-1141
-1142
-1143
-1144
-1145
-1146
-1147
-1148
-1149
-1150
-1151
-1152
+              
1152
 1153
 1154
 1155
@@ -4789,103 +4768,124 @@ 

1224 1225 1226 -1227

def gridded_skill(
-    self,
-    bins: int = 5,
-    binsize: float | None = None,
-    by: str | Iterable[str] | None = None,
-    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,
-    n_min: int | None = None,
-    **kwargs: Any,
-):
-    """Aggregated spatial skill assessment of model(s) on a regular spatial grid.
-
-    Parameters
-    ----------
-    bins: int, list of scalars, or IntervalIndex, or tuple of, optional
-        criteria to bin x and y by, argument bins to pd.cut(), default 5
-        define different bins for x and y a tuple
-        e.g.: bins = 5, bins = (5,[2,3,5])
-    binsize : float, optional
-        bin size for x and y dimension, overwrites bins
-        creates bins with reference to round(mean(x)), round(mean(y))
-    by : (str, List[str]), optional
-        group by column name or by temporal bin via the freq-argument
-        (using pandas pd.Grouper(freq)),
-        e.g.: 'freq:M' = monthly; 'freq:D' daily
-        by default ["model","observation"]
-    metrics : list, optional
-        list of modelskill.metrics, by default modelskill.options.metrics.list
-    n_min : int, optional
-        minimum number of observations in a grid cell;
-        cells with fewer observations get a score of `np.nan`
-
-    Returns
-    -------
-    SkillGrid
-        skill assessment as a SkillGrid object
-
-    See also
-    --------
-    skill
-        a method for aggregated skill assessment
-
-    Examples
-    --------
-    >>> import modelskill as ms
-    >>> cmp = ms.match(c2, mod)   # satellite altimeter vs. model
-    >>> cmp.gridded_skill(metrics='bias')
-    <xarray.Dataset>
-    Dimensions:      (x: 5, y: 5)
-    Coordinates:
-        observation   'alti'
-    * x            (x) float64 -0.436 1.543 3.517 5.492 7.466
-    * y            (y) float64 50.6 51.66 52.7 53.75 54.8
-    Data variables:
-        n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76
-        bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143
-
-    >>> gs = cc.gridded_skill(binsize=0.5)
-    >>> gs.data.coords
-    Coordinates:
-        observation   'alti'
-    * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5
-    * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5
-    """
-
-    # TODO remove in v1.1
-    model, start, end, area = _get_deprecated_args(kwargs)
-    assert kwargs == {}, f"Unknown keyword arguments: {kwargs}"
-
-    cmp = self.sel(
-        model=model,
-        start=start,
-        end=end,
-        area=area,
-    )
-
-    metrics = _parse_metric(metrics)
-    if cmp.n_points == 0:
-        raise ValueError("No data to compare")
-
-    df = cmp._to_long_dataframe()
-    df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)
-
-    agg_cols = _parse_groupby(by=by, n_mod=cmp.n_models, n_qnt=1)
-    if "x" not in agg_cols:
-        agg_cols.insert(0, "x")
-    if "y" not in agg_cols:
-        agg_cols.insert(0, "y")
-
-    df = df.drop(columns=["x", "y"]).rename(columns=dict(xBin="x", yBin="y"))
-    res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)
-    ds = res.to_xarray().squeeze()
-
-    # change categorial index to coordinates
-    for dim in ("x", "y"):
-        ds[dim] = ds[dim].astype(float)
+1227
+1228
+1229
+1230
+1231
+1232
+1233
+1234
+1235
+1236
+1237
+1238
+1239
+1240
+1241
+1242
+1243
+1244
+1245
+1246
+1247
+1248
def gridded_skill(
+    self,
+    bins: int = 5,
+    binsize: float | None = None,
+    by: str | Iterable[str] | None = None,
+    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,
+    n_min: int | None = None,
+    **kwargs: Any,
+):
+    """Aggregated spatial skill assessment of model(s) on a regular spatial grid.
+
+    Parameters
+    ----------
+    bins: int, list of scalars, or IntervalIndex, or tuple of, optional
+        criteria to bin x and y by, argument bins to pd.cut(), default 5
+        define different bins for x and y a tuple
+        e.g.: bins = 5, bins = (5,[2,3,5])
+    binsize : float, optional
+        bin size for x and y dimension, overwrites bins
+        creates bins with reference to round(mean(x)), round(mean(y))
+    by : (str, List[str]), optional
+        group by column name or by temporal bin via the freq-argument
+        (using pandas pd.Grouper(freq)),
+        e.g.: 'freq:M' = monthly; 'freq:D' daily
+        by default ["model","observation"]
+    metrics : list, optional
+        list of modelskill.metrics, by default modelskill.options.metrics.list
+    n_min : int, optional
+        minimum number of observations in a grid cell;
+        cells with fewer observations get a score of `np.nan`
+
+    Returns
+    -------
+    SkillGrid
+        skill assessment as a SkillGrid object
+
+    See also
+    --------
+    skill
+        a method for aggregated skill assessment
+
+    Examples
+    --------
+    >>> import modelskill as ms
+    >>> cmp = ms.match(c2, mod)   # satellite altimeter vs. model
+    >>> cmp.gridded_skill(metrics='bias')
+    <xarray.Dataset>
+    Dimensions:      (x: 5, y: 5)
+    Coordinates:
+        observation   'alti'
+    * x            (x) float64 -0.436 1.543 3.517 5.492 7.466
+    * y            (y) float64 50.6 51.66 52.7 53.75 54.8
+    Data variables:
+        n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76
+        bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143
+
+    >>> gs = cc.gridded_skill(binsize=0.5)
+    >>> gs.data.coords
+    Coordinates:
+        observation   'alti'
+    * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5
+    * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5
+    """
+
+    # TODO remove in v1.1
+    model, start, end, area = _get_deprecated_args(kwargs)
+    assert kwargs == {}, f"Unknown keyword arguments: {kwargs}"
+
+    cmp = self.sel(
+        model=model,
+        start=start,
+        end=end,
+        area=area,
+    )
 
-    return SkillGrid(ds)
+    metrics = _parse_metric(metrics)
+    if cmp.n_points == 0:
+        raise ValueError("No data to compare")
+
+    df = cmp._to_long_dataframe()
+    df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)
+
+    agg_cols = _parse_groupby(by=by, n_mod=cmp.n_models, n_qnt=1)
+    if "x" not in agg_cols:
+        agg_cols.insert(0, "x")
+    if "y" not in agg_cols:
+        agg_cols.insert(0, "y")
+
+    df = df.drop(columns=["x", "y"]).rename(columns=dict(xBin="x", yBin="y"))
+    res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)
+    ds = res.to_xarray().squeeze()
+
+    # change categorial index to coordinates
+    for dim in ("x", "y"):
+        ds[dim] = ds[dim].astype(float)
+
+    return SkillGrid(ds)
 
@@ -4965,28 +4965,7 @@

Source code in modelskill/comparison/_comparison.py -
1308
-1309
-1310
-1311
-1312
-1313
-1314
-1315
-1316
-1317
-1318
-1319
-1320
-1321
-1322
-1323
-1324
-1325
-1326
-1327
-1328
-1329
+              
1329
 1330
 1331
 1332
@@ -5014,56 +4993,77 @@ 

1354 1355 1356 -1357

@staticmethod
-def load(filename: Union[str, Path]) -> "Comparer":
-    """Load from netcdf file
-
-    Parameters
-    ----------
-    filename : str or Path
-        filename
-
-    Returns
-    -------
-    Comparer
-    """
-    with xr.open_dataset(filename) as ds:
-        data = ds.load()
-
-    if data.gtype == "track":
-        return Comparer(matched_data=data)
-
-    if data.gtype == "point":
-        raw_mod_data: Dict[str, TimeSeries] = {}
-
-        for var in data.data_vars:
-            var_name = str(var)
-            if var_name[:5] == "_raw_":
-                new_key = var_name[5:]  # remove prefix '_raw_'
-                ds = data[[var_name]].rename(
-                    {"_time_raw_" + new_key: "time", var_name: new_key}
-                )
-                ts = PointObservation(data=ds, name=new_key)
-                # TODO: name of time?
-                # ts.name = new_key
-                # df = (
-                #     data[var_name]
-                #     .to_dataframe()
-                #     .rename(
-                #         columns={"_time_raw_" + new_key: "time", var_name: new_key}
-                #     )
-                # )
-                raw_mod_data[new_key] = ts
-
-                # data = data.drop(var_name).drop("_time_raw_" + new_key)
+1357
+1358
+1359
+1360
+1361
+1362
+1363
+1364
+1365
+1366
+1367
+1368
+1369
+1370
+1371
+1372
+1373
+1374
+1375
+1376
+1377
+1378
@staticmethod
+def load(filename: Union[str, Path]) -> "Comparer":
+    """Load from netcdf file
+
+    Parameters
+    ----------
+    filename : str or Path
+        filename
+
+    Returns
+    -------
+    Comparer
+    """
+    with xr.open_dataset(filename) as ds:
+        data = ds.load()
+
+    if data.gtype == "track":
+        return Comparer(matched_data=data)
+
+    if data.gtype == "point":
+        raw_mod_data: Dict[str, TimeSeries] = {}
 
-        # filter variables, only keep the ones with a 'time' dimension
-        data = data[[v for v in data.data_vars if "time" in data[v].dims]]
-
-        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)
-
-    else:
-        raise NotImplementedError(f"Unknown gtype: {data.gtype}")
+        for var in data.data_vars:
+            var_name = str(var)
+            if var_name[:5] == "_raw_":
+                new_key = var_name[5:]  # remove prefix '_raw_'
+                ds = data[[var_name]].rename(
+                    {"_time_raw_" + new_key: "time", var_name: new_key}
+                )
+                ts = PointObservation(data=ds, name=new_key)
+                # TODO: name of time?
+                # ts.name = new_key
+                # df = (
+                #     data[var_name]
+                #     .to_dataframe()
+                #     .rename(
+                #         columns={"_time_raw_" + new_key: "time", var_name: new_key}
+                #     )
+                # )
+                raw_mod_data[new_key] = ts
+
+                # data = data.drop(var_name).drop("_time_raw_" + new_key)
+
+        # filter variables, only keep the ones with a 'time' dimension
+        data = data[[v for v in data.data_vars if "time" in data[v].dims]]
+
+        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)
+
+    else:
+        raise NotImplementedError(f"Unknown gtype: {data.gtype}")
 
@@ -5144,45 +5144,45 @@

Source code in modelskill/comparison/_comparison.py -
def query(self, query: str) -> "Comparer":
-    """Return a new Comparer with values where query cond is True
-
-    Parameters
-    ----------
-    query : str
-        Query string, see pandas.DataFrame.query
-
-    Returns
-    -------
-    Comparer
-        New Comparer with values where cond is True and other otherwise.
-
-    Examples
-    --------
-    >>> c2 = c.query("Observation > 0")
-    """
-    d = self.data.query({"time": query})
-    d = d.dropna(dim="time", how="all")
-    return Comparer.from_matched_data(d, self.raw_mod_data)
+              
def query(self, query: str) -> "Comparer":
+    """Return a new Comparer with values where query cond is True
+
+    Parameters
+    ----------
+    query : str
+        Query string, see pandas.DataFrame.query
+
+    Returns
+    -------
+    Comparer
+        New Comparer with values where cond is True and other otherwise.
+
+    Examples
+    --------
+    >>> c2 = c.query("Observation > 0")
+    """
+    d = self.data.query({"time": query})
+    d = d.dropna(dim="time", how="all")
+    return Comparer.from_matched_data(d, self.raw_mod_data)
 
@@ -5283,28 +5283,7 @@

Source code in modelskill/comparison/_comparison.py -
695
-696
-697
-698
-699
-700
-701
-702
-703
-704
-705
-706
-707
-708
-709
-710
-711
-712
-713
-714
-715
-716
+              
716
 717
 718
 719
@@ -5344,68 +5323,89 @@ 

753 754 755 -756

def rename(
-    self, mapping: Mapping[str, str], errors: Literal["raise", "ignore"] = "raise"
-) -> "Comparer":
-    """Rename observation, model or auxiliary data variables
-
-    Parameters
-    ----------
-    mapping : dict
-        mapping of old names to new names
-    errors : {'raise', 'ignore'}, optional
-        If 'raise', raise a KeyError if any of the old names
-        do not exist in the data. By default 'raise'.
-
-    Returns
-    -------
-    Comparer
-
-    Examples
-    --------
-    >>> cmp = ms.match(observation, modeldata)
-    >>> cmp.mod_names
-    ['model1']
-    >>> cmp2 = cmp.rename({'model1': 'model2'})
-    >>> cmp2.mod_names
-    ['model2']
-    """
-    if errors not in ["raise", "ignore"]:
-        raise ValueError("errors must be 'raise' or 'ignore'")
-
-    allowed_keys = [self.name] + self.mod_names + self.aux_names
-    if errors == "raise":
-        for k in mapping.keys():
-            if k not in allowed_keys:
-                raise KeyError(f"Unknown key: {k}; must be one of {allowed_keys}")
-    else:
-        # "ignore": silently remove keys that are not in allowed_keys
-        mapping = {k: v for k, v in mapping.items() if k in allowed_keys}
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
def rename(
+    self, mapping: Mapping[str, str], errors: Literal["raise", "ignore"] = "raise"
+) -> "Comparer":
+    """Rename observation, model or auxiliary data variables
+
+    Parameters
+    ----------
+    mapping : dict
+        mapping of old names to new names
+    errors : {'raise', 'ignore'}, optional
+        If 'raise', raise a KeyError if any of the old names
+        do not exist in the data. By default 'raise'.
+
+    Returns
+    -------
+    Comparer
 
-    if any([k in _RESERVED_NAMES for k in mapping.values()]):
-        # TODO: also check for duplicates
-        raise ValueError(
-            f"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!"
-        )
-
-    # rename observation
-    obs_name = mapping.get(self.name, self.name)
-    ma_mapping = {k: v for k, v in mapping.items() if k != self.name}
-
-    data = self.data.rename(ma_mapping)
-    data.attrs["name"] = obs_name
-    raw_mod_data = dict()
-    for k, v in self.raw_mod_data.items():
-        if k in ma_mapping:
-            # copy is needed here as the same raw data could be
-            # used for multiple Comparers!
-            v2 = v.copy()
-            v2.data = v2.data.rename({k: ma_mapping[k]})
-            raw_mod_data[ma_mapping[k]] = v2
-        else:
-            raw_mod_data[k] = v
-
-    return Comparer(matched_data=data, raw_mod_data=raw_mod_data)
+    Examples
+    --------
+    >>> cmp = ms.match(observation, modeldata)
+    >>> cmp.mod_names
+    ['model1']
+    >>> cmp2 = cmp.rename({'model1': 'model2'})
+    >>> cmp2.mod_names
+    ['model2']
+    """
+    if errors not in ["raise", "ignore"]:
+        raise ValueError("errors must be 'raise' or 'ignore'")
+
+    allowed_keys = [self.name] + self.mod_names + self.aux_names
+    if errors == "raise":
+        for k in mapping.keys():
+            if k not in allowed_keys:
+                raise KeyError(f"Unknown key: {k}; must be one of {allowed_keys}")
+    else:
+        # "ignore": silently remove keys that are not in allowed_keys
+        mapping = {k: v for k, v in mapping.items() if k in allowed_keys}
+
+    if any([k in _RESERVED_NAMES for k in mapping.values()]):
+        # TODO: also check for duplicates
+        raise ValueError(
+            f"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!"
+        )
+
+    # rename observation
+    obs_name = mapping.get(self.name, self.name)
+    ma_mapping = {k: v for k, v in mapping.items() if k != self.name}
+
+    data = self.data.rename(ma_mapping)
+    data.attrs["name"] = obs_name
+    raw_mod_data = dict()
+    for k, v in self.raw_mod_data.items():
+        if k in ma_mapping:
+            # copy is needed here as the same raw data could be
+            # used for multiple Comparers!
+            v2 = v.copy()
+            v2.data = v2.data.rename({k: ma_mapping[k]})
+            raw_mod_data[ma_mapping[k]] = v2
+        else:
+            raw_mod_data[k] = v
+
+    return Comparer(matched_data=data, raw_mod_data=raw_mod_data)
 
@@ -5458,57 +5458,57 @@

Source code in modelskill/comparison/_comparison.py -
1281
-1282
-1283
-1284
-1285
-1286
-1287
-1288
-1289
-1290
-1291
-1292
-1293
-1294
-1295
-1296
-1297
-1298
-1299
-1300
-1301
-1302
+              
def save(self, filename: Union[str, Path]) -> None:
-    """Save to netcdf file
-
-    Parameters
-    ----------
-    filename : str or Path
-        filename
-    """
-    ds = self.data
-
-    # add self.raw_mod_data to ds with prefix 'raw_' to avoid name conflicts
-    # an alternative strategy would be to use NetCDF groups
-    # https://docs.xarray.dev/en/stable/user-guide/io.html#groups
-
-    # There is no need to save raw data for track data, since it is identical to the matched data
-    if self.gtype == "point":
-        ds = self.data.copy()  # copy needed to avoid modifying self.data
-
-        for key, ts_mod in self.raw_mod_data.items():
-            ts_mod = ts_mod.copy()
-            #  rename time to unique name
-            ts_mod.data = ts_mod.data.rename({"time": "_time_raw_" + key})
-            # da = ds_mod.to_xarray()[key]
-            ds["_raw_" + key] = ts_mod.data[key]
-
-    ds.to_netcdf(filename)
+1306
+1307
+1308
+1309
+1310
+1311
+1312
+1313
+1314
+1315
+1316
+1317
+1318
+1319
+1320
+1321
+1322
+1323
+1324
+1325
+1326
+1327
def save(self, filename: Union[str, Path]) -> None:
+    """Save to netcdf file
+
+    Parameters
+    ----------
+    filename : str or Path
+        filename
+    """
+    ds = self.data
+
+    # add self.raw_mod_data to ds with prefix 'raw_' to avoid name conflicts
+    # an alternative strategy would be to use NetCDF groups
+    # https://docs.xarray.dev/en/stable/user-guide/io.html#groups
+
+    # There is no need to save raw data for track data, since it is identical to the matched data
+    if self.gtype == "point":
+        ds = self.data.copy()  # copy needed to avoid modifying self.data
+
+        for key, ts_mod in self.raw_mod_data.items():
+            ts_mod = ts_mod.copy()
+            #  rename time to unique name
+            ts_mod.data = ts_mod.data.rename({"time": "_time_raw_" + key})
+            # da = ds_mod.to_xarray()[key]
+            ds["_raw_" + key] = ts_mod.data[key]
+
+    ds.to_netcdf(filename)
 
@@ -5601,28 +5601,7 @@

Source code in modelskill/comparison/_comparison.py -
1076
-1077
-1078
-1079
-1080
-1081
-1082
-1083
-1084
-1085
-1086
-1087
-1088
-1089
-1090
-1091
-1092
-1093
-1094
-1095
-1096
-1097
+              
1097
 1098
 1099
 1100
@@ -5654,60 +5633,81 @@ 

1126 1127 1128 -1129

def score(
-    self,
-    metric: str | Callable = mtr.rmse,
-    **kwargs: Any,
-) -> Dict[str, float]:
-    """Model skill score
-
-    Parameters
-    ----------
-    metric : list, optional
-        a single metric from modelskill.metrics, by default rmse
-
-    Returns
-    -------
-    dict[str, float]
-        skill score as a single number (for each model)
-
-    See also
-    --------
-    skill
-        a method for skill assessment returning a pd.DataFrame
-
-    Examples
-    --------
-    >>> import modelskill as ms
-    >>> cmp = ms.match(c2, mod)
-    >>> cmp.score()
-    {'mod': 0.3517964910888918}
-
-    >>> cmp.score(metric="mape")
-    {'mod': 11.567399646108198}
-    """
-    metric = _parse_metric(metric)[0]
-    if not (callable(metric) or isinstance(metric, str)):
-        raise ValueError("metric must be a string or a function")
-
-    # TODO remove in v1.1
-    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore
-    assert kwargs == {}, f"Unknown keyword arguments: {kwargs}"
-
-    sk = self.skill(
-        by=["model", "observation"],
-        metrics=[metric],
-        model=model,  # deprecated
-        start=start,  # deprecated
-        end=end,  # deprecated
-        area=area,  # deprecated
-    )
-    df = sk.to_dataframe()
+1129
+1130
+1131
+1132
+1133
+1134
+1135
+1136
+1137
+1138
+1139
+1140
+1141
+1142
+1143
+1144
+1145
+1146
+1147
+1148
+1149
+1150
def score(
+    self,
+    metric: str | Callable = mtr.rmse,
+    **kwargs: Any,
+) -> Dict[str, float]:
+    """Model skill score
+
+    Parameters
+    ----------
+    metric : list, optional
+        a single metric from modelskill.metrics, by default rmse
+
+    Returns
+    -------
+    dict[str, float]
+        skill score as a single number (for each model)
+
+    See also
+    --------
+    skill
+        a method for skill assessment returning a pd.DataFrame
+
+    Examples
+    --------
+    >>> import modelskill as ms
+    >>> cmp = ms.match(c2, mod)
+    >>> cmp.score()
+    {'mod': 0.3517964910888918}
 
-    metric_name = metric if isinstance(metric, str) else metric.__name__
-    ser = df.reset_index().groupby("model", observed=True)[metric_name].mean()
-    score = {str(k): float(v) for k, v in ser.items()}
-    return score
+    >>> cmp.score(metric="mape")
+    {'mod': 11.567399646108198}
+    """
+    metric = _parse_metric(metric)[0]
+    if not (callable(metric) or isinstance(metric, str)):
+        raise ValueError("metric must be a string or a function")
+
+    # TODO remove in v1.1
+    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore
+    assert kwargs == {}, f"Unknown keyword arguments: {kwargs}"
+
+    sk = self.skill(
+        by=["model", "observation"],
+        metrics=[metric],
+        model=model,  # deprecated
+        start=start,  # deprecated
+        end=end,  # deprecated
+        area=area,  # deprecated
+    )
+    df = sk.to_dataframe()
+
+    metric_name = metric if isinstance(metric, str) else metric.__name__
+    ser = df.reset_index().groupby("model", observed=True)[metric_name].mean()
+    score = {str(k): float(v) for k, v in ser.items()}
+    return score
 
@@ -5839,28 +5839,7 @@

Source code in modelskill/comparison/_comparison.py -
838
-839
-840
-841
-842
-843
-844
-845
-846
-847
-848
-849
-850
-851
-852
-853
-854
-855
-856
-857
-858
-859
+              
859
 860
 861
 862
@@ -5909,77 +5888,98 @@ 

905 906 907 -908

def sel(
-    self,
-    model: Optional[IdxOrNameTypes] = None,
-    start: Optional[TimeTypes] = None,
-    end: Optional[TimeTypes] = None,
-    time: Optional[TimeTypes] = None,
-    area: Optional[List[float]] = None,
-) -> "Comparer":
-    """Select data based on model, time and/or area.
-
-    Parameters
-    ----------
-    model : str or int or list of str or list of int, optional
-        Model name or index. If None, all models are selected.
-    start : str or datetime, optional
-        Start time. If None, all times are selected.
-    end : str or datetime, optional
-        End time. If None, all times are selected.
-    time : str or datetime, optional
-        Time. If None, all times are selected.
-    area : list of float, optional
-        bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.
-
-    Returns
-    -------
-    Comparer
-        New Comparer with selected data.
-    """
-    if (time is not None) and ((start is not None) or (end is not None)):
-        raise ValueError("Cannot use both time and start/end")
+908
+909
+910
+911
+912
+913
+914
+915
+916
+917
+918
+919
+920
+921
+922
+923
+924
+925
+926
+927
+928
+929
def sel(
+    self,
+    model: Optional[IdxOrNameTypes] = None,
+    start: Optional[TimeTypes] = None,
+    end: Optional[TimeTypes] = None,
+    time: Optional[TimeTypes] = None,
+    area: Optional[List[float]] = None,
+) -> "Comparer":
+    """Select data based on model, time and/or area.
 
-    d = self.data
-    raw_mod_data = self.raw_mod_data
-    if model is not None:
-        if isinstance(model, (str, int)):
-            models = [model]
-        else:
-            models = list(model)
-        mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]
-        dropped_models = [m for m in self.mod_names if m not in mod_names]
-        d = d.drop_vars(dropped_models)
-        raw_mod_data = {m: raw_mod_data[m] for m in mod_names}
-    if (start is not None) or (end is not None):
-        # TODO: can this be done without to_index? (simplify)
-        d = d.sel(time=d.time.to_index().to_frame().loc[start:end].index)  # type: ignore
-
-        # Note: if user asks for a specific time, we also filter raw
-        raw_mod_data = {
-            k: v.sel(time=slice(start, end)) for k, v in raw_mod_data.items()
-        }  # type: ignore
-    if time is not None:
-        d = d.sel(time=time)
-
-        # Note: if user asks for a specific time, we also filter raw
-        raw_mod_data = {k: v.sel(time=time) for k, v in raw_mod_data.items()}
-    if area is not None:
-        if _area_is_bbox(area):
-            x0, y0, x1, y1 = area
-            mask = (d.x > x0) & (d.x < x1) & (d.y > y0) & (d.y < y1)
-        elif _area_is_polygon(area):
-            polygon = np.array(area)
-            xy = np.column_stack((d.x, d.y))
-            mask = _inside_polygon(polygon, xy)
-        else:
-            raise ValueError("area supports bbox [x0,y0,x1,y1] and closed polygon")
-        if self.gtype == "point":
-            # if False, return empty data
-            d = d if mask else d.isel(time=slice(None, 0))
-        else:
-            d = d.isel(time=mask)
-    return Comparer.from_matched_data(data=d, raw_mod_data=raw_mod_data)
+    Parameters
+    ----------
+    model : str or int or list of str or list of int, optional
+        Model name or index. If None, all models are selected.
+    start : str or datetime, optional
+        Start time. If None, all times are selected.
+    end : str or datetime, optional
+        End time. If None, all times are selected.
+    time : str or datetime, optional
+        Time. If None, all times are selected.
+    area : list of float, optional
+        bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.
+
+    Returns
+    -------
+    Comparer
+        New Comparer with selected data.
+    """
+    if (time is not None) and ((start is not None) or (end is not None)):
+        raise ValueError("Cannot use both time and start/end")
+
+    d = self.data
+    raw_mod_data = self.raw_mod_data
+    if model is not None:
+        if isinstance(model, (str, int)):
+            models = [model]
+        else:
+            models = list(model)
+        mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]
+        dropped_models = [m for m in self.mod_names if m not in mod_names]
+        d = d.drop_vars(dropped_models)
+        raw_mod_data = {m: raw_mod_data[m] for m in mod_names}
+    if (start is not None) or (end is not None):
+        # TODO: can this be done without to_index? (simplify)
+        d = d.sel(time=d.time.to_index().to_frame().loc[start:end].index)  # type: ignore
+
+        # Note: if user asks for a specific time, we also filter raw
+        raw_mod_data = {
+            k: v.sel(time=slice(start, end)) for k, v in raw_mod_data.items()
+        }  # type: ignore
+    if time is not None:
+        d = d.sel(time=time)
+
+        # Note: if user asks for a specific time, we also filter raw
+        raw_mod_data = {k: v.sel(time=time) for k, v in raw_mod_data.items()}
+    if area is not None:
+        if _area_is_bbox(area):
+            x0, y0, x1, y1 = area
+            mask = (d.x > x0) & (d.x < x1) & (d.y > y0) & (d.y < y1)
+        elif _area_is_polygon(area):
+            polygon = np.array(area)
+            xy = np.column_stack((d.x, d.y))
+            mask = _inside_polygon(polygon, xy)
+        else:
+            raise ValueError("area supports bbox [x0,y0,x1,y1] and closed polygon")
+        if self.gtype == "point":
+            # if False, return empty data
+            d = d if mask else d.isel(time=slice(None, 0))
+        else:
+            d = d.isel(time=mask)
+    return Comparer.from_matched_data(data=d, raw_mod_data=raw_mod_data)
 
@@ -6099,28 +6099,7 @@

Source code in modelskill/comparison/_comparison.py -
 989
- 990
- 991
- 992
- 993
- 994
- 995
- 996
- 997
- 998
- 999
-1000
-1001
-1002
-1003
-1004
-1005
-1006
-1007
-1008
-1009
-1010
+              
1010
 1011
 1012
 1013
@@ -6169,77 +6148,98 @@ 

1056 1057 1058 -1059

def skill(
-    self,
-    by: str | Iterable[str] | None = None,
-    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,
-    **kwargs: Any,
-) -> SkillTable:
-    """Skill assessment of model(s)
-
-    Parameters
-    ----------
-    by : str or List[str], optional
-        group by, by default ["model"]
-
-        - by column name
-        - by temporal bin of the DateTimeIndex via the freq-argument
-        (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily
-        - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the
-        syntax 'dt:month'. The dt-argument is different from the freq-argument
-        in that it gives month-of-year rather than month-of-data.
-    metrics : list, optional
-        list of modelskill.metrics, by default modelskill.options.metrics.list
-
-    Returns
-    -------
-    SkillTable
-        skill assessment object
-
-    See also
-    --------
-    sel
-        a method for filtering/selecting data
-
-    Examples
-    --------
-    >>> import modelskill as ms
-    >>> cc = ms.match(c2, mod)
-    >>> cc['c2'].skill().round(2)
-                   n  bias  rmse  urmse   mae    cc    si    r2
-    observation
-    c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99
-
-    >>> cc['c2'].skill(by='freq:D').round(2)
-                 n  bias  rmse  urmse   mae    cc    si    r2
-    2017-10-27  72 -0.19  0.31   0.25  0.26  0.48  0.12  0.98
-    2017-10-28   0   NaN   NaN    NaN   NaN   NaN   NaN   NaN
-    2017-10-29  41  0.33  0.41   0.25  0.36  0.96  0.06  0.99
-    """
-    metrics = _parse_metric(metrics, directional=self.quantity.is_directional)
-
-    # TODO remove in v1.1
-    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore
-    if kwargs != {}:
-        raise AttributeError(f"Unknown keyword arguments: {kwargs}")
-
-    cmp = self.sel(
-        model=model,
-        start=start,
-        end=end,
-        area=area,
-    )
-    if cmp.n_points == 0:
-        raise ValueError("No data selected for skill assessment")
-
-    by = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=1)
-
-    df = cmp._to_long_dataframe()
-    res = _groupby_df(df, by=by, metrics=metrics)
-    res["x"] = np.nan if self.gtype == "track" else cmp.x
-    res["y"] = np.nan if self.gtype == "track" else cmp.y
-    res = self._add_as_col_if_not_in_index(df, skilldf=res)
-    return SkillTable(res)
+1059
+1060
+1061
+1062
+1063
+1064
+1065
+1066
+1067
+1068
+1069
+1070
+1071
+1072
+1073
+1074
+1075
+1076
+1077
+1078
+1079
+1080
def skill(
+    self,
+    by: str | Iterable[str] | None = None,
+    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,
+    **kwargs: Any,
+) -> SkillTable:
+    """Skill assessment of model(s)
+
+    Parameters
+    ----------
+    by : str or List[str], optional
+        group by, by default ["model"]
+
+        - by column name
+        - by temporal bin of the DateTimeIndex via the freq-argument
+        (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily
+        - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the
+        syntax 'dt:month'. The dt-argument is different from the freq-argument
+        in that it gives month-of-year rather than month-of-data.
+    metrics : list, optional
+        list of modelskill.metrics, by default modelskill.options.metrics.list
+
+    Returns
+    -------
+    SkillTable
+        skill assessment object
+
+    See also
+    --------
+    sel
+        a method for filtering/selecting data
+
+    Examples
+    --------
+    >>> import modelskill as ms
+    >>> cc = ms.match(c2, mod)
+    >>> cc['c2'].skill().round(2)
+                   n  bias  rmse  urmse   mae    cc    si    r2
+    observation
+    c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99
+
+    >>> cc['c2'].skill(by='freq:D').round(2)
+                 n  bias  rmse  urmse   mae    cc    si    r2
+    2017-10-27  72 -0.19  0.31   0.25  0.26  0.48  0.12  0.98
+    2017-10-28   0   NaN   NaN    NaN   NaN   NaN   NaN   NaN
+    2017-10-29  41  0.33  0.41   0.25  0.36  0.96  0.06  0.99
+    """
+    metrics = _parse_metric(metrics, directional=self.quantity.is_directional)
+
+    # TODO remove in v1.1
+    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore
+    if kwargs != {}:
+        raise AttributeError(f"Unknown keyword arguments: {kwargs}")
+
+    cmp = self.sel(
+        model=model,
+        start=start,
+        end=end,
+        area=area,
+    )
+    if cmp.n_points == 0:
+        raise ValueError("No data selected for skill assessment")
+
+    by = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=1)
+
+    df = cmp._to_long_dataframe()
+    res = _groupby_df(df, by=by, metrics=metrics)
+    res["x"] = np.nan if self.gtype == "track" else cmp.x
+    res["y"] = np.nan if self.gtype == "track" else cmp.y
+    res = self._add_as_col_if_not_in_index(df, skilldf=res)
+    return SkillTable(res)
 
@@ -6287,47 +6287,47 @@

Source code in modelskill/comparison/_comparison.py -
def to_dataframe(self) -> pd.DataFrame:
-    """Convert matched data to pandas DataFrame
-
-    Include x, y coordinates only if gtype=track
-
-    Returns
-    -------
-    pd.DataFrame
-        data as a pandas DataFrame
-    """
-    if self.gtype == str(GeometryType.POINT):
-        # we remove the scalar coordinate variables as they
-        # will otherwise be columns in the dataframe
-        return self.data.drop_vars(["x", "y", "z"]).to_dataframe()
-    elif self.gtype == str(GeometryType.TRACK):
-        df = self.data.drop_vars(["z"]).to_dataframe()
-        # make sure that x, y cols are first
-        cols = ["x", "y"] + [c for c in df.columns if c not in ["x", "y"]]
-        return df[cols]
-    else:
-        raise NotImplementedError(f"Unknown gtype: {self.gtype}")
+              
def to_dataframe(self) -> pd.DataFrame:
+    """Convert matched data to pandas DataFrame
+
+    Include x, y coordinates only if gtype=track
+
+    Returns
+    -------
+    pd.DataFrame
+        data as a pandas DataFrame
+    """
+    if self.gtype == str(GeometryType.POINT):
+        # we remove the scalar coordinate variables as they
+        # will otherwise be columns in the dataframe
+        return self.data.drop_vars(["x", "y", "z"]).to_dataframe()
+    elif self.gtype == str(GeometryType.TRACK):
+        df = self.data.drop_vars(["z"]).to_dataframe()
+        # make sure that x, y cols are first
+        cols = ["x", "y"] + [c for c in df.columns if c not in ["x", "y"]]
+        return df[cols]
+    else:
+        raise NotImplementedError(f"Unknown gtype: {self.gtype}")
 
@@ -6408,51 +6408,51 @@

Source code in modelskill/comparison/_comparison.py -
def where(
-    self,
-    cond: Union[bool, np.ndarray, xr.DataArray],
-) -> "Comparer":
-    """Return a new Comparer with values where cond is True
-
-    Parameters
-    ----------
-    cond : bool, np.ndarray, xr.DataArray
-        This selects the values to return.
-
-    Returns
-    -------
-    Comparer
-        New Comparer with values where cond is True and other otherwise.
-
-    Examples
-    --------
-    >>> c2 = c.where(c.data.Observation > 0)
-    """
-    d = self.data.where(cond, other=np.nan)
-    d = d.dropna(dim="time", how="all")
-    return Comparer.from_matched_data(d, self.raw_mod_data)
+              
def where(
+    self,
+    cond: Union[bool, np.ndarray, xr.DataArray],
+) -> "Comparer":
+    """Return a new Comparer with values where cond is True
+
+    Parameters
+    ----------
+    cond : bool, np.ndarray, xr.DataArray
+        This selects the values to return.
+
+    Returns
+    -------
+    Comparer
+        New Comparer with values where cond is True and other otherwise.
+
+    Examples
+    --------
+    >>> c2 = c.where(c.data.Observation > 0)
+    """
+    d = self.data.where(cond, other=np.nan)
+    d = d.dropna(dim="time", how="all")
+    return Comparer.from_matched_data(d, self.raw_mod_data)
 
diff --git a/api/matching/index.html b/api/matching/index.html index eac4a827..8ad62aec 100644 --- a/api/matching/index.html +++ b/api/matching/index.html @@ -1440,9 +1440,7 @@

Source code in modelskill/matching.py -
190
-191
-192
+              
192
 193
 194
 195
@@ -1540,107 +1538,109 @@ 

287 288 289 -290

def match(
-    obs,
-    mod,
-    *,
-    obs_item=None,
-    mod_item=None,
-    gtype=None,
-    max_model_gap=None,
-    spatial_method: Optional[str] = None,
-):
-    """Match observation and model result data in space and time
-
-    NOTE: In case of multiple model results with different time coverage,
-    only the _overlapping_ time period will be used! (intersection)
-
-    NOTE: In case of multiple observations, multiple models can _only_
-    be matched if they are _all_ of SpatialField type, e.g. DfsuModelResult
-    or GridModelResult.
-
-    Parameters
-    ----------
-    obs : (str, Path, pd.DataFrame, Observation, Sequence[Observation])
-        Observation(s) to be compared
-    mod : (str, Path, pd.DataFrame, ModelResult, Sequence[ModelResult])
-        Model result(s) to be compared
-    obs_item : int or str, optional
-        observation item if obs is a file/dataframe, by default None
-    mod_item : (int, str), optional
-        model item if mod is a file/dataframe, by default None
-    gtype : (str, optional)
-        Geometry type of the model result (if mod is a file/dataframe).
-        If not specified, it will be guessed.
-    max_model_gap : (float, optional)
-        Maximum time gap (s) in the model result (e.g. for event-based
-        model results), by default None
-    spatial_method : str, optional
-        For Dfsu- and GridModelResult, spatial interpolation/selection method.
-
-        - For DfsuModelResult, one of: 'contained' (=isel), 'nearest',
-        'inverse_distance' (with 5 nearest points), by default "inverse_distance".
-        - For GridModelResult, passed to xarray.interp() as method argument,
-        by default 'linear'.
-
-    Returns
-    -------
-    Comparer
-        In case of a single observation
-    ComparerCollection
-        In case of multiple observations
-
-    See Also
-    --------
-    [from_matched][modelskill.from_matched]
-        Create a Comparer from observation and model results that are already matched
-    """
-    if isinstance(obs, get_args(ObsInputType)):
-        return _single_obs_compare(
-            obs,
-            mod,
-            obs_item=obs_item,
-            mod_item=mod_item,
-            gtype=gtype,
-            max_model_gap=max_model_gap,
-            spatial_method=spatial_method,
-        )
-
-    if isinstance(obs, Collection):
-        assert all(isinstance(o, get_args(ObsInputType)) for o in obs)
-    else:
-        raise TypeError(
-            f"Obs is not the correct type: it is {type(obs)}. Check the order of the arguments (obs, mod)."
-        )
-
-    if len(obs) > 1 and isinstance(mod, Collection) and len(mod) > 1:
-        if not all(isinstance(m, (DfsuModelResult, GridModelResult)) for m in mod):
-            raise ValueError(
-                """
-                In case of multiple observations, multiple models can _only_ 
-                be matched if they are _all_ of SpatialField type, e.g. DfsuModelResult 
-                or GridModelResult. 
-
-                If you want match multiple point observations with multiple point model results, 
-                please match one observation at a time and then create a collection of these 
-                using modelskill.ComparerCollection(cmp_list) afterwards. The same applies to track data.
-                """
-            )
-
-    clist = [
-        _single_obs_compare(
-            o,
-            mod,
-            obs_item=obs_item,
-            mod_item=mod_item,
-            gtype=gtype,
-            max_model_gap=max_model_gap,
-            spatial_method=spatial_method,
-        )
-        for o in obs
-    ]
-
-    return ComparerCollection(clist)
+290
+291
+292
def match(
+    obs,
+    mod,
+    *,
+    obs_item=None,
+    mod_item=None,
+    gtype=None,
+    max_model_gap=None,
+    spatial_method: Optional[str] = None,
+):
+    """Match observation and model result data in space and time
+
+    NOTE: In case of multiple model results with different time coverage,
+    only the _overlapping_ time period will be used! (intersection)
+
+    NOTE: In case of multiple observations, multiple models can _only_
+    be matched if they are _all_ of SpatialField type, e.g. DfsuModelResult
+    or GridModelResult.
+
+    Parameters
+    ----------
+    obs : (str, Path, pd.DataFrame, Observation, Sequence[Observation])
+        Observation(s) to be compared
+    mod : (str, Path, pd.DataFrame, ModelResult, Sequence[ModelResult])
+        Model result(s) to be compared
+    obs_item : int or str, optional
+        observation item if obs is a file/dataframe, by default None
+    mod_item : (int, str), optional
+        model item if mod is a file/dataframe, by default None
+    gtype : (str, optional)
+        Geometry type of the model result (if mod is a file/dataframe).
+        If not specified, it will be guessed.
+    max_model_gap : (float, optional)
+        Maximum time gap (s) in the model result (e.g. for event-based
+        model results), by default None
+    spatial_method : str, optional
+        For Dfsu- and GridModelResult, spatial interpolation/selection method.
+
+        - For DfsuModelResult, one of: 'contained' (=isel), 'nearest',
+        'inverse_distance' (with 5 nearest points), by default "inverse_distance".
+        - For GridModelResult, passed to xarray.interp() as method argument,
+        by default 'linear'.
+
+    Returns
+    -------
+    Comparer
+        In case of a single observation
+    ComparerCollection
+        In case of multiple observations
+
+    See Also
+    --------
+    [from_matched][modelskill.from_matched]
+        Create a Comparer from observation and model results that are already matched
+    """
+    if isinstance(obs, get_args(ObsInputType)):
+        return _single_obs_compare(
+            obs,
+            mod,
+            obs_item=obs_item,
+            mod_item=mod_item,
+            gtype=gtype,
+            max_model_gap=max_model_gap,
+            spatial_method=spatial_method,
+        )
+
+    if isinstance(obs, Collection):
+        assert all(isinstance(o, get_args(ObsInputType)) for o in obs)
+    else:
+        raise TypeError(
+            f"Obs is not the correct type: it is {type(obs)}. Check the order of the arguments (obs, mod)."
+        )
+
+    if len(obs) > 1 and isinstance(mod, Collection) and len(mod) > 1:
+        if not all(isinstance(m, (DfsuModelResult, GridModelResult)) for m in mod):
+            raise ValueError(
+                """
+                In case of multiple observations, multiple models can _only_ 
+                be matched if they are _all_ of SpatialField type, e.g. DfsuModelResult 
+                or GridModelResult. 
+
+                If you want match multiple point observations with multiple point model results, 
+                please match one observation at a time and then create a collection of these 
+                using modelskill.ComparerCollection(cmp_list) afterwards. The same applies to track data.
+                """
+            )
+
+    clist = [
+        _single_obs_compare(
+            o,
+            mod,
+            obs_item=obs_item,
+            mod_item=mod_item,
+            gtype=gtype,
+            max_model_gap=max_model_gap,
+            spatial_method=spatial_method,
+        )
+        for o in obs
+    ]
+
+    return ComparerCollection(clist)
 
@@ -1945,7 +1945,9 @@

158 159 160 -161

def from_matched(
+161
+162
+163
def from_matched(
     data: Union[str, Path, pd.DataFrame, mikeio.Dfs0, mikeio.Dataset],
     *,
     obs_item: str | int | None = 0,
@@ -1961,81 +1963,83 @@ 

y_item: str | int | None = None, ) -> Comparer: """Create a Comparer from observation and model results that are already matched (aligned) - Parameters - ---------- - data : [pd.DataFrame, str, Path, mikeio.Dfs0, mikeio.Dataset] - DataFrame (or object that can be converted to a DataFrame e.g. dfs0) - with columns obs_item, mod_items, aux_items - obs_item : [str, int], optional - Name or index of observation item, by default first item - mod_items : Iterable[str, int], optional - Names or indicies of model items, if None all remaining columns are model items, by default None - aux_items : Iterable[str, int], optional - Names or indicies of auxiliary items, by default None - quantity : Quantity, optional - Quantity of the observation and model results, by default Quantity(name="Undefined", unit="Undefined") - name : str, optional - Name of the comparer, by default None (will be set to obs_item) - x : float, optional - x-coordinate of observation, by default None - y : float, optional - y-coordinate of observation, by default None - z : float, optional - z-coordinate of observation, by default None - x_item: [str, int], optional, - Name of x item, only relevant for track data - y_item: [str, int], optional - Name of y item, only relevant for track data - - Examples - -------- - >>> import pandas as pd - >>> import modelskill as ms - >>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1]}, index=pd.date_range('2010-01-01', periods=3)) - >>> cmp = ms.from_matched(df, obs_item='stn_a') # remaining columns are model results - >>> cmp - <Comparer> - Quantity: Undefined [Undefined] - Observation: stn_a, n_points=3 - Model: local, rmse=0.100 - >>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1], 'global': [1.2,2.2,3.2], 'nonsense':[1,2,3]}, index=pd.date_range('2010-01-01', periods=3)) - >>> cmp = ms.from_matched(df, obs_item='stn_a', mod_items=['local', 'global']) - >>> cmp - <Comparer> - Quantity: Undefined [Undefined] - Observation: stn_a, n_points=3 - Model: local, rmse=0.100 - Model: global, rmse=0.200 - """ - # pre-process if dfs0, or mikeio.Dataset - if isinstance(data, (str, Path)): - if Path(data).suffix != ".dfs0": - raise ValueError(f"File must be a dfs0 file, not {Path(data).suffix}") - data = mikeio.read(data) # now mikeio.Dataset - elif isinstance(data, mikeio.Dfs0): - data = data.read() # now mikeio.Dataset - if isinstance(data, mikeio.Dataset): - assert len(data.shape) == 1, "Only 0-dimensional data are supported" - if quantity is None: - quantity = Quantity.from_mikeio_iteminfo(data[obs_item].item) - data = data.to_dataframe() - - cmp = Comparer.from_matched_data( - data, - obs_item=obs_item, - mod_items=mod_items, - aux_items=aux_items, - name=name, - weight=weight, - x=x, - y=y, - z=z, - x_item=x_item, - y_item=y_item, - quantity=quantity, - ) - - return cmp + + Parameters + ---------- + data : [pd.DataFrame, str, Path, mikeio.Dfs0, mikeio.Dataset] + DataFrame (or object that can be converted to a DataFrame e.g. dfs0) + with columns obs_item, mod_items, aux_items + obs_item : [str, int], optional + Name or index of observation item, by default first item + mod_items : Iterable[str, int], optional + Names or indicies of model items, if None all remaining columns are model items, by default None + aux_items : Iterable[str, int], optional + Names or indicies of auxiliary items, by default None + quantity : Quantity, optional + Quantity of the observation and model results, by default Quantity(name="Undefined", unit="Undefined") + name : str, optional + Name of the comparer, by default None (will be set to obs_item) + x : float, optional + x-coordinate of observation, by default None + y : float, optional + y-coordinate of observation, by default None + z : float, optional + z-coordinate of observation, by default None + x_item: [str, int], optional, + Name of x item, only relevant for track data + y_item: [str, int], optional + Name of y item, only relevant for track data + + Examples + -------- + >>> import pandas as pd + >>> import modelskill as ms + >>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1]}, index=pd.date_range('2010-01-01', periods=3)) + >>> cmp = ms.from_matched(df, obs_item='stn_a') # remaining columns are model results + >>> cmp + <Comparer> + Quantity: Undefined [Undefined] + Observation: stn_a, n_points=3 + Model: local, rmse=0.100 + >>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1], 'global': [1.2,2.2,3.2], 'nonsense':[1,2,3]}, index=pd.date_range('2010-01-01', periods=3)) + >>> cmp = ms.from_matched(df, obs_item='stn_a', mod_items=['local', 'global']) + >>> cmp + <Comparer> + Quantity: Undefined [Undefined] + Observation: stn_a, n_points=3 + Model: local, rmse=0.100 + Model: global, rmse=0.200 + + """ + # pre-process if dfs0, or mikeio.Dataset + if isinstance(data, (str, Path)): + if Path(data).suffix != ".dfs0": + raise ValueError(f"File must be a dfs0 file, not {Path(data).suffix}") + data = mikeio.read(data) # now mikeio.Dataset + elif isinstance(data, mikeio.Dfs0): + data = data.read() # now mikeio.Dataset + if isinstance(data, mikeio.Dataset): + assert len(data.shape) == 1, "Only 0-dimensional data are supported" + if quantity is None: + quantity = Quantity.from_mikeio_iteminfo(data[obs_item].item) + data = data.to_dataframe() + + cmp = Comparer.from_matched_data( + data, + obs_item=obs_item, + mod_items=mod_items, + aux_items=aux_items, + name=name, + weight=weight, + x=x, + y=y, + z=z, + x_item=x_item, + y_item=y_item, + quantity=quantity, + ) + + return cmp

diff --git a/search/search_index.json b/search/search_index.json index 3cc86187..3dbc1ab7 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":""},{"location":"#modelskill-assess-the-skill-of-your-mike-model","title":"ModelSkill: Assess the skill of your MIKE model","text":"

Compare results from MIKE simulations with observations. ModelSkill would like to be your companion during the different phases of a MIKE modelling workflow.

Useful links: Terminology | Overview | Plotting | Issues | Discussion

  • Set up in 5 minutes

    Install ModelSkill with pip and get up and running in minutes

    Getting started

  • It's just Python

    Focus on your modelling and less on generate a validation report

    API Reference

  • Made to measure

    Choose between different skill metrics and customizable tables and charts

    Metrics

  • Open Source, MIT

    ModelSkill is licensed under MIT and available on GitHub

    License

"},{"location":"license/","title":"License","text":"

MIT License

Copyright (c) 2024 DHI

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

"},{"location":"api/","title":"API Documentation","text":"

Obtain a comparer object in one of the following ways:

  • From matched data with from_matched()
  • After defining observations and model results using the match() function.
  • From a config file with from_config()

Do analysis and plotting with the returned Comparer (a single observation) or ComparerCollection (multiple observations):

  • skill() - returns a SkillTable with the skill scores
  • plot using the various plot methods of the comparer objects
    • plot.scatter()
    • plot.timeseries()
    • plot.kde()
    • plot.qq()
    • plot.hist()
"},{"location":"api/comparer/","title":"Comparer","text":"

The Comparer class is the main class of the ModelSkill package. It is returned by match(), from_matched() or as an element in a ComparerCollection. It holds the matched observation and model data for a single observation and has methods for plotting and skill assessment.

Main functionality:

  • selecting/filtering data
    • sel()
    • query()
  • skill assessment
    • skill()
    • gridded_skill() (for track observations)
  • plotting
    • plot.timeseries()
    • plot.scatter()
    • plot.kde()
    • plot.qq()
    • plot.hist()
    • plot.box()
  • load/save/export data
    • load()
    • save()
    • to_dataframe()
"},{"location":"api/comparer/#modelskill.Comparer","title":"modelskill.Comparer","text":"

Bases: Scoreable

Comparer class for comparing model and observation data.

Typically, the Comparer is part of a ComparerCollection, created with the match function.

Parameters:

Name Type Description Default matched_data Dataset

Matched data

required raw_mod_data dict of modelskill.TimeSeries

Raw model data. If None, observation and modeldata must be provided.

None

Examples:

>>> import modelskill as ms\n>>> cmp1 = ms.match(observation, modeldata)\n>>> cmp2 = ms.from_matched(matched_data)\n
See Also

modelskill.match, modelskill.from_matched

Source code in modelskill/comparison/_comparison.py
class Comparer(Scoreable):\n    \"\"\"\n    Comparer class for comparing model and observation data.\n\n    Typically, the Comparer is part of a ComparerCollection,\n    created with the `match` function.\n\n    Parameters\n    ----------\n    matched_data : xr.Dataset\n        Matched data\n    raw_mod_data : dict of modelskill.TimeSeries, optional\n        Raw model data. If None, observation and modeldata must be provided.\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cmp1 = ms.match(observation, modeldata)\n    >>> cmp2 = ms.from_matched(matched_data)\n\n    See Also\n    --------\n    modelskill.match, modelskill.from_matched\n    \"\"\"\n\n    data: xr.Dataset\n    raw_mod_data: Dict[str, TimeSeries]\n    _obs_str = \"Observation\"\n    plotter = ComparerPlotter\n\n    def __init__(\n        self,\n        matched_data: xr.Dataset,\n        raw_mod_data: Optional[Dict[str, TimeSeries]] = None,\n    ) -> None:\n        self.data = _parse_dataset(matched_data)\n        self.raw_mod_data = (\n            raw_mod_data\n            if raw_mod_data is not None\n            else {\n                # key: ModelResult(value, gtype=self.data.gtype, name=key, x=self.x, y=self.y)\n                key: TimeSeries(self.data[[key]])\n                for key, value in matched_data.data_vars.items()\n                if value.attrs[\"kind\"] == \"model\"\n            }\n        )\n        # TODO: validate that the names in raw_mod_data are the same as in matched_data\n        assert isinstance(self.raw_mod_data, dict)\n        for k in self.raw_mod_data.keys():\n            v = self.raw_mod_data[k]\n            if not isinstance(v, TimeSeries):\n                try:\n                    self.raw_mod_data[k] = TimeSeries(v)\n                except Exception:\n                    raise ValueError(\n                        f\"raw_mod_data[{k}] could not be converted to a TimeSeries object\"\n                    )\n            else:\n                assert isinstance(\n                    v, TimeSeries\n                ), f\"raw_mod_data[{k}] must be a TimeSeries object\"\n\n        self.plot = Comparer.plotter(self)\n        \"\"\"Plot using the ComparerPlotter\n\n        Examples\n        --------\n        >>> cmp.plot.timeseries()\n        >>> cmp.plot.scatter()\n        >>> cmp.plot.qq()\n        >>> cmp.plot.hist()\n        >>> cmp.plot.kde()\n        >>> cmp.plot.box()\n        >>> cmp.plot.residual_hist()\n        >>> cmp.plot.taylor()        \n        \"\"\"\n\n    @staticmethod\n    def from_matched_data(\n        data: xr.Dataset | pd.DataFrame,\n        raw_mod_data: Optional[Dict[str, TimeSeries]] = None,\n        obs_item: str | int | None = None,\n        mod_items: Optional[Iterable[str | int]] = None,\n        aux_items: Optional[Iterable[str | int]] = None,\n        name: Optional[str] = None,\n        weight: float = 1.0,\n        x: Optional[float] = None,\n        y: Optional[float] = None,\n        z: Optional[float] = None,\n        x_item: str | int | None = None,\n        y_item: str | int | None = None,\n        quantity: Optional[Quantity] = None,\n    ) -> \"Comparer\":\n        \"\"\"Initialize from compared data\"\"\"\n        if not isinstance(data, xr.Dataset):\n            # TODO: handle raw_mod_data by accessing data.attrs[\"kind\"] and only remove nan after\n            data = _matched_data_to_xarray(\n                data,\n                obs_item=obs_item,\n                mod_items=mod_items,\n                aux_items=aux_items,\n                name=name,\n                x=x,\n                y=y,\n                z=z,\n                x_item=x_item,\n                y_item=y_item,\n                quantity=quantity,\n            )\n            data.attrs[\"weight\"] = weight\n        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n\n    def __repr__(self):\n        out = [\n            \"<Comparer>\",\n            f\"Quantity: {self.quantity}\",\n            f\"Observation: {self.name}, n_points={self.n_points}\",\n            \"Model(s):\",\n        ]\n        for index, model in enumerate(self.mod_names):\n            out.append(f\"{index}: {model}\")\n\n        for var in self.aux_names:\n            out.append(f\" Auxiliary: {var}\")\n        return str.join(\"\\n\", out)\n\n    @property\n    def name(self) -> str:\n        \"\"\"Name of comparer (=name of observation)\"\"\"\n        return str(self.data.attrs[\"name\"])\n\n    @name.setter\n    def name(self, name: str) -> None:\n        if name in _RESERVED_NAMES:\n            raise ValueError(\n                f\"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!\"\n            )\n        self.data.attrs[\"name\"] = name\n\n    @property\n    def gtype(self) -> str:\n        \"\"\"Geometry type\"\"\"\n        return str(self.data.attrs[\"gtype\"])\n\n    @property\n    def quantity(self) -> Quantity:\n        \"\"\"Quantity object\"\"\"\n        return Quantity(\n            name=self.data[self._obs_str].attrs[\"long_name\"],\n            unit=self.data[self._obs_str].attrs[\"units\"],\n            is_directional=bool(\n                self.data[self._obs_str].attrs.get(\"is_directional\", False)\n            ),\n        )\n\n    @quantity.setter\n    def quantity(self, quantity: Quantity) -> None:\n        assert isinstance(quantity, Quantity), \"value must be a Quantity object\"\n        self.data[self._obs_str].attrs[\"long_name\"] = quantity.name\n        self.data[self._obs_str].attrs[\"units\"] = quantity.unit\n        self.data[self._obs_str].attrs[\"is_directional\"] = int(quantity.is_directional)\n\n    @property\n    def n_points(self) -> int:\n        \"\"\"number of compared points\"\"\"\n        return len(self.data[self._obs_str]) if self.data else 0\n\n    @property\n    def time(self) -> pd.DatetimeIndex:\n        \"\"\"time of compared data as pandas DatetimeIndex\"\"\"\n        return self.data.time.to_index()\n\n    # TODO: Should we keep these? (renamed to start_time and end_time)\n    # @property\n    # def start(self) -> pd.Timestamp:\n    #     \"\"\"start pd.Timestamp of compared data\"\"\"\n    #     return self.time[0]\n\n    # @property\n    # def end(self) -> pd.Timestamp:\n    #     \"\"\"end pd.Timestamp of compared data\"\"\"\n    #     return self.time[-1]\n\n    @property\n    def x(self) -> Any:\n        \"\"\"x-coordinate\"\"\"\n        return self._coordinate_values(\"x\")\n\n    @property\n    def y(self) -> Any:\n        \"\"\"y-coordinate\"\"\"\n        return self._coordinate_values(\"y\")\n\n    @property\n    def z(self) -> Any:\n        \"\"\"z-coordinate\"\"\"\n        return self._coordinate_values(\"z\")\n\n    def _coordinate_values(self, coord: str) -> Any:\n        vals = self.data[coord].values\n        return np.atleast_1d(vals)[0] if vals.ndim == 0 else vals\n\n    @property\n    def n_models(self) -> int:\n        \"\"\"Number of model results\"\"\"\n        return len(self.mod_names)\n\n    @property\n    def mod_names(self) -> List[str]:\n        \"\"\"List of model result names\"\"\"\n        return list(self.raw_mod_data.keys())\n\n    def __contains__(self, key: str) -> bool:\n        return key in self.data.data_vars\n\n    @property\n    def aux_names(self) -> List[str]:\n        \"\"\"List of auxiliary data names\"\"\"\n        # we don't require the kind attribute to be \"auxiliary\"\n        return list(\n            [\n                k\n                for k, v in self.data.data_vars.items()\n                if v.attrs[\"kind\"] not in [\"observation\", \"model\"]\n            ]\n        )\n\n    # TODO: always \"Observation\", necessary to have this property?\n    @property\n    def _obs_name(self) -> str:\n        return self._obs_str\n\n    @property\n    def weight(self) -> float:\n        \"\"\"Weight of observation (used in ComparerCollection score() and mean_skill())\"\"\"\n        return float(self.data.attrs[\"weight\"])\n\n    @weight.setter\n    def weight(self, value: float) -> None:\n        self.data.attrs[\"weight\"] = float(value)\n\n    @property\n    def _unit_text(self) -> str:\n        # Quantity name and unit as text suitable for plot labels\n        return f\"{self.quantity.name} [{self.quantity.unit}]\"\n\n    @property\n    def attrs(self) -> dict[str, Any]:\n        \"\"\"Attributes of the observation\"\"\"\n        return self.data.attrs\n\n    @attrs.setter\n    def attrs(self, value: dict[str, Serializable]) -> None:\n        self.data.attrs = value\n\n    # TODO: is this the best way to copy (self.data.copy.. )\n    def __copy__(self) -> \"Comparer\":\n        return deepcopy(self)\n\n    def copy(self) -> \"Comparer\":\n        return self.__copy__()\n\n    def rename(\n        self, mapping: Mapping[str, str], errors: Literal[\"raise\", \"ignore\"] = \"raise\"\n    ) -> \"Comparer\":\n        \"\"\"Rename observation, model or auxiliary data variables\n\n        Parameters\n        ----------\n        mapping : dict\n            mapping of old names to new names\n        errors : {'raise', 'ignore'}, optional\n            If 'raise', raise a KeyError if any of the old names\n            do not exist in the data. By default 'raise'.\n\n        Returns\n        -------\n        Comparer\n\n        Examples\n        --------\n        >>> cmp = ms.match(observation, modeldata)\n        >>> cmp.mod_names\n        ['model1']\n        >>> cmp2 = cmp.rename({'model1': 'model2'})\n        >>> cmp2.mod_names\n        ['model2']\n        \"\"\"\n        if errors not in [\"raise\", \"ignore\"]:\n            raise ValueError(\"errors must be 'raise' or 'ignore'\")\n\n        allowed_keys = [self.name] + self.mod_names + self.aux_names\n        if errors == \"raise\":\n            for k in mapping.keys():\n                if k not in allowed_keys:\n                    raise KeyError(f\"Unknown key: {k}; must be one of {allowed_keys}\")\n        else:\n            # \"ignore\": silently remove keys that are not in allowed_keys\n            mapping = {k: v for k, v in mapping.items() if k in allowed_keys}\n\n        if any([k in _RESERVED_NAMES for k in mapping.values()]):\n            # TODO: also check for duplicates\n            raise ValueError(\n                f\"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!\"\n            )\n\n        # rename observation\n        obs_name = mapping.get(self.name, self.name)\n        ma_mapping = {k: v for k, v in mapping.items() if k != self.name}\n\n        data = self.data.rename(ma_mapping)\n        data.attrs[\"name\"] = obs_name\n        raw_mod_data = dict()\n        for k, v in self.raw_mod_data.items():\n            if k in ma_mapping:\n                # copy is needed here as the same raw data could be\n                # used for multiple Comparers!\n                v2 = v.copy()\n                v2.data = v2.data.rename({k: ma_mapping[k]})\n                raw_mod_data[ma_mapping[k]] = v2\n            else:\n                raw_mod_data[k] = v\n\n        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n\n    def _to_observation(self) -> PointObservation | TrackObservation:\n        \"\"\"Convert to Observation\"\"\"\n        if self.gtype == \"point\":\n            df = self.data.drop_vars([\"x\", \"y\", \"z\"])[self._obs_str].to_dataframe()\n            return PointObservation(\n                data=df,\n                name=self.name,\n                x=self.x,\n                y=self.y,\n                z=self.z,\n                quantity=self.quantity,\n                # TODO: add attrs\n            )\n        elif self.gtype == \"track\":\n            df = self.data.drop_vars([\"z\"])[[self._obs_str]].to_dataframe()\n            return TrackObservation(\n                data=df,\n                item=0,\n                x_item=1,\n                y_item=2,\n                name=self.name,\n                quantity=self.quantity,\n                # TODO: add attrs\n            )\n        else:\n            raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n\n    def __iadd__(self, other: Comparer):  # type: ignore\n        from ..matching import match_space_time\n\n        missing_models = set(self.mod_names) - set(other.mod_names)\n        if len(missing_models) == 0:\n            # same obs name and same model names\n            self.data = xr.concat([self.data, other.data], dim=\"time\").drop_duplicates(\n                \"time\"\n            )\n        else:\n            self.raw_mod_data.update(other.raw_mod_data)\n            matched = match_space_time(\n                observation=self._to_observation(),\n                raw_mod_data=self.raw_mod_data,  # type: ignore\n            )\n            self.data = matched\n\n        return self\n\n    def __add__(\n        self, other: Union[\"Comparer\", \"ComparerCollection\"]\n    ) -> \"ComparerCollection\" | \"Comparer\":\n        from ._collection import ComparerCollection\n        from ..matching import match_space_time\n\n        if not isinstance(other, (Comparer, ComparerCollection)):\n            raise TypeError(f\"Cannot add {type(other)} to {type(self)}\")\n\n        if isinstance(other, Comparer) and (self.name == other.name):\n            missing_models = set(self.mod_names) - set(other.mod_names)\n            if len(missing_models) == 0:\n                # same obs name and same model names\n                cmp = self.copy()\n                cmp.data = xr.concat(\n                    [cmp.data, other.data], dim=\"time\"\n                ).drop_duplicates(\"time\")\n\n            else:\n                raw_mod_data = self.raw_mod_data.copy()\n                raw_mod_data.update(other.raw_mod_data)  # TODO!\n                matched = match_space_time(\n                    observation=self._to_observation(),\n                    raw_mod_data=raw_mod_data,  # type: ignore\n                )\n                cmp = Comparer(matched_data=matched, raw_mod_data=raw_mod_data)\n\n            return cmp\n        else:\n            if isinstance(other, Comparer):\n                return ComparerCollection([self, other])\n            elif isinstance(other, ComparerCollection):\n                return ComparerCollection([self, *other])\n\n    def sel(\n        self,\n        model: Optional[IdxOrNameTypes] = None,\n        start: Optional[TimeTypes] = None,\n        end: Optional[TimeTypes] = None,\n        time: Optional[TimeTypes] = None,\n        area: Optional[List[float]] = None,\n    ) -> \"Comparer\":\n        \"\"\"Select data based on model, time and/or area.\n\n        Parameters\n        ----------\n        model : str or int or list of str or list of int, optional\n            Model name or index. If None, all models are selected.\n        start : str or datetime, optional\n            Start time. If None, all times are selected.\n        end : str or datetime, optional\n            End time. If None, all times are selected.\n        time : str or datetime, optional\n            Time. If None, all times are selected.\n        area : list of float, optional\n            bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.\n\n        Returns\n        -------\n        Comparer\n            New Comparer with selected data.\n        \"\"\"\n        if (time is not None) and ((start is not None) or (end is not None)):\n            raise ValueError(\"Cannot use both time and start/end\")\n\n        d = self.data\n        raw_mod_data = self.raw_mod_data\n        if model is not None:\n            if isinstance(model, (str, int)):\n                models = [model]\n            else:\n                models = list(model)\n            mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]\n            dropped_models = [m for m in self.mod_names if m not in mod_names]\n            d = d.drop_vars(dropped_models)\n            raw_mod_data = {m: raw_mod_data[m] for m in mod_names}\n        if (start is not None) or (end is not None):\n            # TODO: can this be done without to_index? (simplify)\n            d = d.sel(time=d.time.to_index().to_frame().loc[start:end].index)  # type: ignore\n\n            # Note: if user asks for a specific time, we also filter raw\n            raw_mod_data = {\n                k: v.sel(time=slice(start, end)) for k, v in raw_mod_data.items()\n            }  # type: ignore\n        if time is not None:\n            d = d.sel(time=time)\n\n            # Note: if user asks for a specific time, we also filter raw\n            raw_mod_data = {k: v.sel(time=time) for k, v in raw_mod_data.items()}\n        if area is not None:\n            if _area_is_bbox(area):\n                x0, y0, x1, y1 = area\n                mask = (d.x > x0) & (d.x < x1) & (d.y > y0) & (d.y < y1)\n            elif _area_is_polygon(area):\n                polygon = np.array(area)\n                xy = np.column_stack((d.x, d.y))\n                mask = _inside_polygon(polygon, xy)\n            else:\n                raise ValueError(\"area supports bbox [x0,y0,x1,y1] and closed polygon\")\n            if self.gtype == \"point\":\n                # if False, return empty data\n                d = d if mask else d.isel(time=slice(None, 0))\n            else:\n                d = d.isel(time=mask)\n        return Comparer.from_matched_data(data=d, raw_mod_data=raw_mod_data)\n\n    def where(\n        self,\n        cond: Union[bool, np.ndarray, xr.DataArray],\n    ) -> \"Comparer\":\n        \"\"\"Return a new Comparer with values where cond is True\n\n        Parameters\n        ----------\n        cond : bool, np.ndarray, xr.DataArray\n            This selects the values to return.\n\n        Returns\n        -------\n        Comparer\n            New Comparer with values where cond is True and other otherwise.\n\n        Examples\n        --------\n        >>> c2 = c.where(c.data.Observation > 0)\n        \"\"\"\n        d = self.data.where(cond, other=np.nan)\n        d = d.dropna(dim=\"time\", how=\"all\")\n        return Comparer.from_matched_data(d, self.raw_mod_data)\n\n    def query(self, query: str) -> \"Comparer\":\n        \"\"\"Return a new Comparer with values where query cond is True\n\n        Parameters\n        ----------\n        query : str\n            Query string, see pandas.DataFrame.query\n\n        Returns\n        -------\n        Comparer\n            New Comparer with values where cond is True and other otherwise.\n\n        Examples\n        --------\n        >>> c2 = c.query(\"Observation > 0\")\n        \"\"\"\n        d = self.data.query({\"time\": query})\n        d = d.dropna(dim=\"time\", how=\"all\")\n        return Comparer.from_matched_data(d, self.raw_mod_data)\n\n    def _to_long_dataframe(\n        self, attrs_keys: Iterable[str] | None = None\n    ) -> pd.DataFrame:\n        \"\"\"Return a copy of the data as a long-format pandas DataFrame (for groupby operations)\"\"\"\n\n        data = self.data.drop_vars(\"z\", errors=\"ignore\")\n\n        # this step is necessary since we keep arbitrary derived data in the dataset, but not z\n        # i.e. using a hardcoded whitelist of variables to keep is less flexible\n        id_vars = [v for v in data.variables if v not in self.mod_names]\n\n        attrs = (\n            {key: data.attrs.get(key, False) for key in attrs_keys}\n            if attrs_keys\n            else {}\n        )\n\n        df = (\n            data.to_dataframe()\n            .reset_index()\n            .melt(\n                value_vars=self.mod_names,\n                var_name=\"model\",\n                value_name=\"mod_val\",\n                id_vars=id_vars,\n            )\n            .rename(columns={self._obs_str: \"obs_val\"})\n            .assign(observation=self.name)\n            .assign(**attrs)\n            .astype({\"model\": \"category\", \"observation\": \"category\"})\n        )\n\n        return df\n\n    def skill(\n        self,\n        by: str | Iterable[str] | None = None,\n        metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n        **kwargs: Any,\n    ) -> SkillTable:\n        \"\"\"Skill assessment of model(s)\n\n        Parameters\n        ----------\n        by : str or List[str], optional\n            group by, by default [\"model\"]\n\n            - by column name\n            - by temporal bin of the DateTimeIndex via the freq-argument\n            (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n            - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n            syntax 'dt:month'. The dt-argument is different from the freq-argument\n            in that it gives month-of-year rather than month-of-data.\n        metrics : list, optional\n            list of modelskill.metrics, by default modelskill.options.metrics.list\n\n        Returns\n        -------\n        SkillTable\n            skill assessment object\n\n        See also\n        --------\n        sel\n            a method for filtering/selecting data\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cc = ms.match(c2, mod)\n        >>> cc['c2'].skill().round(2)\n                       n  bias  rmse  urmse   mae    cc    si    r2\n        observation\n        c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n\n        >>> cc['c2'].skill(by='freq:D').round(2)\n                     n  bias  rmse  urmse   mae    cc    si    r2\n        2017-10-27  72 -0.19  0.31   0.25  0.26  0.48  0.12  0.98\n        2017-10-28   0   NaN   NaN    NaN   NaN   NaN   NaN   NaN\n        2017-10-29  41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n        \"\"\"\n        metrics = _parse_metric(metrics, directional=self.quantity.is_directional)\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        if kwargs != {}:\n            raise AttributeError(f\"Unknown keyword arguments: {kwargs}\")\n\n        cmp = self.sel(\n            model=model,\n            start=start,\n            end=end,\n            area=area,\n        )\n        if cmp.n_points == 0:\n            raise ValueError(\"No data selected for skill assessment\")\n\n        by = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=1)\n\n        df = cmp._to_long_dataframe()\n        res = _groupby_df(df, by=by, metrics=metrics)\n        res[\"x\"] = np.nan if self.gtype == \"track\" else cmp.x\n        res[\"y\"] = np.nan if self.gtype == \"track\" else cmp.y\n        res = self._add_as_col_if_not_in_index(df, skilldf=res)\n        return SkillTable(res)\n\n    def _add_as_col_if_not_in_index(\n        self, df: pd.DataFrame, skilldf: pd.DataFrame\n    ) -> pd.DataFrame:\n        \"\"\"Add a field to skilldf if unique in df\"\"\"\n        FIELDS = (\"observation\", \"model\")\n\n        for field in FIELDS:\n            if (field == \"model\") and (self.n_models <= 1):\n                continue\n            if field not in skilldf.index.names:\n                unames = df[field].unique()\n                if len(unames) == 1:\n                    skilldf.insert(loc=0, column=field, value=unames[0])\n        return skilldf\n\n    def score(\n        self,\n        metric: str | Callable = mtr.rmse,\n        **kwargs: Any,\n    ) -> Dict[str, float]:\n        \"\"\"Model skill score\n\n        Parameters\n        ----------\n        metric : list, optional\n            a single metric from modelskill.metrics, by default rmse\n\n        Returns\n        -------\n        dict[str, float]\n            skill score as a single number (for each model)\n\n        See also\n        --------\n        skill\n            a method for skill assessment returning a pd.DataFrame\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cmp = ms.match(c2, mod)\n        >>> cmp.score()\n        {'mod': 0.3517964910888918}\n\n        >>> cmp.score(metric=\"mape\")\n        {'mod': 11.567399646108198}\n        \"\"\"\n        metric = _parse_metric(metric)[0]\n        if not (callable(metric) or isinstance(metric, str)):\n            raise ValueError(\"metric must be a string or a function\")\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        sk = self.skill(\n            by=[\"model\", \"observation\"],\n            metrics=[metric],\n            model=model,  # deprecated\n            start=start,  # deprecated\n            end=end,  # deprecated\n            area=area,  # deprecated\n        )\n        df = sk.to_dataframe()\n\n        metric_name = metric if isinstance(metric, str) else metric.__name__\n        ser = df.reset_index().groupby(\"model\", observed=True)[metric_name].mean()\n        score = {str(k): float(v) for k, v in ser.items()}\n        return score\n\n    def gridded_skill(\n        self,\n        bins: int = 5,\n        binsize: float | None = None,\n        by: str | Iterable[str] | None = None,\n        metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n        n_min: int | None = None,\n        **kwargs: Any,\n    ):\n        \"\"\"Aggregated spatial skill assessment of model(s) on a regular spatial grid.\n\n        Parameters\n        ----------\n        bins: int, list of scalars, or IntervalIndex, or tuple of, optional\n            criteria to bin x and y by, argument bins to pd.cut(), default 5\n            define different bins for x and y a tuple\n            e.g.: bins = 5, bins = (5,[2,3,5])\n        binsize : float, optional\n            bin size for x and y dimension, overwrites bins\n            creates bins with reference to round(mean(x)), round(mean(y))\n        by : (str, List[str]), optional\n            group by column name or by temporal bin via the freq-argument\n            (using pandas pd.Grouper(freq)),\n            e.g.: 'freq:M' = monthly; 'freq:D' daily\n            by default [\"model\",\"observation\"]\n        metrics : list, optional\n            list of modelskill.metrics, by default modelskill.options.metrics.list\n        n_min : int, optional\n            minimum number of observations in a grid cell;\n            cells with fewer observations get a score of `np.nan`\n\n        Returns\n        -------\n        SkillGrid\n            skill assessment as a SkillGrid object\n\n        See also\n        --------\n        skill\n            a method for aggregated skill assessment\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cmp = ms.match(c2, mod)   # satellite altimeter vs. model\n        >>> cmp.gridded_skill(metrics='bias')\n        <xarray.Dataset>\n        Dimensions:      (x: 5, y: 5)\n        Coordinates:\n            observation   'alti'\n        * x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n        * y            (y) float64 50.6 51.66 52.7 53.75 54.8\n        Data variables:\n            n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n            bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n\n        >>> gs = cc.gridded_skill(binsize=0.5)\n        >>> gs.data.coords\n        Coordinates:\n            observation   'alti'\n        * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n        * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n        \"\"\"\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        cmp = self.sel(\n            model=model,\n            start=start,\n            end=end,\n            area=area,\n        )\n\n        metrics = _parse_metric(metrics)\n        if cmp.n_points == 0:\n            raise ValueError(\"No data to compare\")\n\n        df = cmp._to_long_dataframe()\n        df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)\n\n        agg_cols = _parse_groupby(by=by, n_mod=cmp.n_models, n_qnt=1)\n        if \"x\" not in agg_cols:\n            agg_cols.insert(0, \"x\")\n        if \"y\" not in agg_cols:\n            agg_cols.insert(0, \"y\")\n\n        df = df.drop(columns=[\"x\", \"y\"]).rename(columns=dict(xBin=\"x\", yBin=\"y\"))\n        res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)\n        ds = res.to_xarray().squeeze()\n\n        # change categorial index to coordinates\n        for dim in (\"x\", \"y\"):\n            ds[dim] = ds[dim].astype(float)\n\n        return SkillGrid(ds)\n\n    @property\n    def _residual(self) -> np.ndarray:\n        df = self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n        obs = df[self._obs_str].values\n        mod = df[self.mod_names].values\n        return mod - np.vstack(obs)\n\n    def remove_bias(\n        self, correct: Literal[\"Model\", \"Observation\"] = \"Model\"\n    ) -> Comparer:\n        cmp = self.copy()\n\n        bias = cmp._residual.mean(axis=0)\n        if correct == \"Model\":\n            for j in range(cmp.n_models):\n                mod_name = cmp.mod_names[j]\n                mod_ts = cmp.raw_mod_data[mod_name]\n                with xr.set_options(keep_attrs=True):  # type: ignore\n                    mod_ts.data[mod_name].values = mod_ts.values - bias[j]\n                    cmp.data[mod_name].values = cmp.data[mod_name].values - bias[j]\n        elif correct == \"Observation\":\n            # what if multiple models?\n            with xr.set_options(keep_attrs=True):  # type: ignore\n                cmp.data[cmp._obs_str].values = cmp.data[cmp._obs_str].values + bias\n        else:\n            raise ValueError(\n                f\"Unknown correct={correct}. Only know 'Model' and 'Observation'\"\n            )\n        return cmp\n\n    def to_dataframe(self) -> pd.DataFrame:\n        \"\"\"Convert matched data to pandas DataFrame\n\n        Include x, y coordinates only if gtype=track\n\n        Returns\n        -------\n        pd.DataFrame\n            data as a pandas DataFrame\n        \"\"\"\n        if self.gtype == str(GeometryType.POINT):\n            # we remove the scalar coordinate variables as they\n            # will otherwise be columns in the dataframe\n            return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n        elif self.gtype == str(GeometryType.TRACK):\n            df = self.data.drop_vars([\"z\"]).to_dataframe()\n            # make sure that x, y cols are first\n            cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n            return df[cols]\n        else:\n            raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n\n    def save(self, filename: Union[str, Path]) -> None:\n        \"\"\"Save to netcdf file\n\n        Parameters\n        ----------\n        filename : str or Path\n            filename\n        \"\"\"\n        ds = self.data\n\n        # add self.raw_mod_data to ds with prefix 'raw_' to avoid name conflicts\n        # an alternative strategy would be to use NetCDF groups\n        # https://docs.xarray.dev/en/stable/user-guide/io.html#groups\n\n        # There is no need to save raw data for track data, since it is identical to the matched data\n        if self.gtype == \"point\":\n            ds = self.data.copy()  # copy needed to avoid modifying self.data\n\n            for key, ts_mod in self.raw_mod_data.items():\n                ts_mod = ts_mod.copy()\n                #  rename time to unique name\n                ts_mod.data = ts_mod.data.rename({\"time\": \"_time_raw_\" + key})\n                # da = ds_mod.to_xarray()[key]\n                ds[\"_raw_\" + key] = ts_mod.data[key]\n\n        ds.to_netcdf(filename)\n\n    @staticmethod\n    def load(filename: Union[str, Path]) -> \"Comparer\":\n        \"\"\"Load from netcdf file\n\n        Parameters\n        ----------\n        filename : str or Path\n            filename\n\n        Returns\n        -------\n        Comparer\n        \"\"\"\n        with xr.open_dataset(filename) as ds:\n            data = ds.load()\n\n        if data.gtype == \"track\":\n            return Comparer(matched_data=data)\n\n        if data.gtype == \"point\":\n            raw_mod_data: Dict[str, TimeSeries] = {}\n\n            for var in data.data_vars:\n                var_name = str(var)\n                if var_name[:5] == \"_raw_\":\n                    new_key = var_name[5:]  # remove prefix '_raw_'\n                    ds = data[[var_name]].rename(\n                        {\"_time_raw_\" + new_key: \"time\", var_name: new_key}\n                    )\n                    ts = PointObservation(data=ds, name=new_key)\n                    # TODO: name of time?\n                    # ts.name = new_key\n                    # df = (\n                    #     data[var_name]\n                    #     .to_dataframe()\n                    #     .rename(\n                    #         columns={\"_time_raw_\" + new_key: \"time\", var_name: new_key}\n                    #     )\n                    # )\n                    raw_mod_data[new_key] = ts\n\n                    # data = data.drop(var_name).drop(\"_time_raw_\" + new_key)\n\n            # filter variables, only keep the ones with a 'time' dimension\n            data = data[[v for v in data.data_vars if \"time\" in data[v].dims]]\n\n            return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n\n        else:\n            raise NotImplementedError(f\"Unknown gtype: {data.gtype}\")\n\n    # =============== Deprecated methods ===============\n\n    def spatial_skill(\n        self,\n        bins=5,\n        binsize=None,\n        by=None,\n        metrics=None,\n        n_min=None,\n        **kwargs,\n    ):\n        # deprecated\n        warnings.warn(\n            \"spatial_skill is deprecated, use gridded_skill instead\", FutureWarning\n        )\n        return self.gridded_skill(\n            bins=bins,\n            binsize=binsize,\n            by=by,\n            metrics=metrics,\n            n_min=n_min,\n            **kwargs,\n        )\n\n    # TODO remove plotting methods in v1.1\n    def scatter(\n        self,\n        *,\n        bins=120,\n        quantiles=None,\n        fit_to_quantiles=False,\n        show_points=None,\n        show_hist=None,\n        show_density=None,\n        norm=None,\n        backend=\"matplotlib\",\n        figsize=(8, 8),\n        xlim=None,\n        ylim=None,\n        reg_method=\"ols\",\n        title=None,\n        xlabel=None,\n        ylabel=None,\n        skill_table=None,\n        **kwargs,\n    ):\n        warnings.warn(\n            \"This method is deprecated, use plot.scatter instead\", FutureWarning\n        )\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)\n\n        # self.plot.scatter(\n        self.sel(\n            model=model,\n            start=start,\n            end=end,\n            area=area,\n        ).plot.scatter(\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            norm=norm,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            **kwargs,\n        )\n\n    def taylor(\n        self,\n        normalize_std=False,\n        figsize=(7, 7),\n        marker=\"o\",\n        marker_size=6.0,\n        title=\"Taylor diagram\",\n        **kwargs,\n    ):\n        warnings.warn(\"taylor is deprecated, use plot.taylor instead\", FutureWarning)\n\n        self.plot.taylor(\n            normalize_std=normalize_std,\n            figsize=figsize,\n            marker=marker,\n            marker_size=marker_size,\n            title=title,\n            **kwargs,\n        )\n\n    def hist(\n        self, *, model=None, bins=100, title=None, density=True, alpha=0.5, **kwargs\n    ):\n        warnings.warn(\"hist is deprecated. Use plot.hist instead.\", FutureWarning)\n        return self.plot.hist(\n            model=model, bins=bins, title=title, density=density, alpha=alpha, **kwargs\n        )\n\n    def kde(self, ax=None, **kwargs) -> Axes:\n        warnings.warn(\"kde is deprecated. Use plot.kde instead.\", FutureWarning)\n\n        return self.plot.kde(ax=ax, **kwargs)\n\n    def plot_timeseries(\n        self, title=None, *, ylim=None, figsize=None, backend=\"matplotlib\", **kwargs\n    ):\n        warnings.warn(\n            \"plot_timeseries is deprecated. Use plot.timeseries instead.\", FutureWarning\n        )\n\n        return self.plot.timeseries(\n            title=title, ylim=ylim, figsize=figsize, backend=backend, **kwargs\n        )\n\n    def residual_hist(self, bins=100, title=None, color=None, **kwargs):\n        warnings.warn(\n            \"residual_hist is deprecated. Use plot.residual_hist instead.\",\n            FutureWarning,\n        )\n\n        return self.plot.residual_hist(bins=bins, title=title, color=color, **kwargs)\n
"},{"location":"api/comparer/#modelskill.Comparer.attrs","title":"attrs property writable","text":"
attrs\n

Attributes of the observation

"},{"location":"api/comparer/#modelskill.Comparer.aux_names","title":"aux_names property","text":"
aux_names\n

List of auxiliary data names

"},{"location":"api/comparer/#modelskill.Comparer.gtype","title":"gtype property","text":"
gtype\n

Geometry type

"},{"location":"api/comparer/#modelskill.Comparer.mod_names","title":"mod_names property","text":"
mod_names\n

List of model result names

"},{"location":"api/comparer/#modelskill.Comparer.n_models","title":"n_models property","text":"
n_models\n

Number of model results

"},{"location":"api/comparer/#modelskill.Comparer.n_points","title":"n_points property","text":"
n_points\n

number of compared points

"},{"location":"api/comparer/#modelskill.Comparer.name","title":"name property writable","text":"
name\n

Name of comparer (=name of observation)

"},{"location":"api/comparer/#modelskill.Comparer.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerPlotter

Examples:

>>> cmp.plot.timeseries()\n>>> cmp.plot.scatter()\n>>> cmp.plot.qq()\n>>> cmp.plot.hist()\n>>> cmp.plot.kde()\n>>> cmp.plot.box()\n>>> cmp.plot.residual_hist()\n>>> cmp.plot.taylor()\n
"},{"location":"api/comparer/#modelskill.Comparer.quantity","title":"quantity property writable","text":"
quantity\n

Quantity object

"},{"location":"api/comparer/#modelskill.Comparer.time","title":"time property","text":"
time\n

time of compared data as pandas DatetimeIndex

"},{"location":"api/comparer/#modelskill.Comparer.weight","title":"weight property writable","text":"
weight\n

Weight of observation (used in ComparerCollection score() and mean_skill())

"},{"location":"api/comparer/#modelskill.Comparer.x","title":"x property","text":"
x\n

x-coordinate

"},{"location":"api/comparer/#modelskill.Comparer.y","title":"y property","text":"
y\n

y-coordinate

"},{"location":"api/comparer/#modelskill.Comparer.z","title":"z property","text":"
z\n

z-coordinate

"},{"location":"api/comparer/#modelskill.Comparer.from_matched_data","title":"from_matched_data staticmethod","text":"
from_matched_data(data, raw_mod_data=None, obs_item=None, mod_items=None, aux_items=None, name=None, weight=1.0, x=None, y=None, z=None, x_item=None, y_item=None, quantity=None)\n

Initialize from compared data

Source code in modelskill/comparison/_comparison.py
@staticmethod\ndef from_matched_data(\n    data: xr.Dataset | pd.DataFrame,\n    raw_mod_data: Optional[Dict[str, TimeSeries]] = None,\n    obs_item: str | int | None = None,\n    mod_items: Optional[Iterable[str | int]] = None,\n    aux_items: Optional[Iterable[str | int]] = None,\n    name: Optional[str] = None,\n    weight: float = 1.0,\n    x: Optional[float] = None,\n    y: Optional[float] = None,\n    z: Optional[float] = None,\n    x_item: str | int | None = None,\n    y_item: str | int | None = None,\n    quantity: Optional[Quantity] = None,\n) -> \"Comparer\":\n    \"\"\"Initialize from compared data\"\"\"\n    if not isinstance(data, xr.Dataset):\n        # TODO: handle raw_mod_data by accessing data.attrs[\"kind\"] and only remove nan after\n        data = _matched_data_to_xarray(\n            data,\n            obs_item=obs_item,\n            mod_items=mod_items,\n            aux_items=aux_items,\n            name=name,\n            x=x,\n            y=y,\n            z=z,\n            x_item=x_item,\n            y_item=y_item,\n            quantity=quantity,\n        )\n        data.attrs[\"weight\"] = weight\n    return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n
"},{"location":"api/comparer/#modelskill.Comparer.gridded_skill","title":"gridded_skill","text":"
gridded_skill(bins=5, binsize=None, by=None, metrics=None, n_min=None, **kwargs)\n

Aggregated spatial skill assessment of model(s) on a regular spatial grid.

Parameters:

Name Type Description Default bins int

criteria to bin x and y by, argument bins to pd.cut(), default 5 define different bins for x and y a tuple e.g.: bins = 5, bins = (5,[2,3,5])

5 binsize float

bin size for x and y dimension, overwrites bins creates bins with reference to round(mean(x)), round(mean(y))

None by (str, List[str])

group by column name or by temporal bin via the freq-argument (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily by default [\"model\",\"observation\"]

None metrics list

list of modelskill.metrics, by default modelskill.options.metrics.list

None n_min int

minimum number of observations in a grid cell; cells with fewer observations get a score of np.nan

None

Returns:

Type Description SkillGrid

skill assessment as a SkillGrid object

See also

skill a method for aggregated skill assessment

Examples:

>>> import modelskill as ms\n>>> cmp = ms.match(c2, mod)   # satellite altimeter vs. model\n>>> cmp.gridded_skill(metrics='bias')\n<xarray.Dataset>\nDimensions:      (x: 5, y: 5)\nCoordinates:\n    observation   'alti'\n* x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n* y            (y) float64 50.6 51.66 52.7 53.75 54.8\nData variables:\n    n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n    bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n
>>> gs = cc.gridded_skill(binsize=0.5)\n>>> gs.data.coords\nCoordinates:\n    observation   'alti'\n* x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n* y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n
Source code in modelskill/comparison/_comparison.py
def gridded_skill(\n    self,\n    bins: int = 5,\n    binsize: float | None = None,\n    by: str | Iterable[str] | None = None,\n    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n    n_min: int | None = None,\n    **kwargs: Any,\n):\n    \"\"\"Aggregated spatial skill assessment of model(s) on a regular spatial grid.\n\n    Parameters\n    ----------\n    bins: int, list of scalars, or IntervalIndex, or tuple of, optional\n        criteria to bin x and y by, argument bins to pd.cut(), default 5\n        define different bins for x and y a tuple\n        e.g.: bins = 5, bins = (5,[2,3,5])\n    binsize : float, optional\n        bin size for x and y dimension, overwrites bins\n        creates bins with reference to round(mean(x)), round(mean(y))\n    by : (str, List[str]), optional\n        group by column name or by temporal bin via the freq-argument\n        (using pandas pd.Grouper(freq)),\n        e.g.: 'freq:M' = monthly; 'freq:D' daily\n        by default [\"model\",\"observation\"]\n    metrics : list, optional\n        list of modelskill.metrics, by default modelskill.options.metrics.list\n    n_min : int, optional\n        minimum number of observations in a grid cell;\n        cells with fewer observations get a score of `np.nan`\n\n    Returns\n    -------\n    SkillGrid\n        skill assessment as a SkillGrid object\n\n    See also\n    --------\n    skill\n        a method for aggregated skill assessment\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cmp = ms.match(c2, mod)   # satellite altimeter vs. model\n    >>> cmp.gridded_skill(metrics='bias')\n    <xarray.Dataset>\n    Dimensions:      (x: 5, y: 5)\n    Coordinates:\n        observation   'alti'\n    * x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n    * y            (y) float64 50.6 51.66 52.7 53.75 54.8\n    Data variables:\n        n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n        bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n\n    >>> gs = cc.gridded_skill(binsize=0.5)\n    >>> gs.data.coords\n    Coordinates:\n        observation   'alti'\n    * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n    * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n    \"\"\"\n\n    # TODO remove in v1.1\n    model, start, end, area = _get_deprecated_args(kwargs)\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    cmp = self.sel(\n        model=model,\n        start=start,\n        end=end,\n        area=area,\n    )\n\n    metrics = _parse_metric(metrics)\n    if cmp.n_points == 0:\n        raise ValueError(\"No data to compare\")\n\n    df = cmp._to_long_dataframe()\n    df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)\n\n    agg_cols = _parse_groupby(by=by, n_mod=cmp.n_models, n_qnt=1)\n    if \"x\" not in agg_cols:\n        agg_cols.insert(0, \"x\")\n    if \"y\" not in agg_cols:\n        agg_cols.insert(0, \"y\")\n\n    df = df.drop(columns=[\"x\", \"y\"]).rename(columns=dict(xBin=\"x\", yBin=\"y\"))\n    res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)\n    ds = res.to_xarray().squeeze()\n\n    # change categorial index to coordinates\n    for dim in (\"x\", \"y\"):\n        ds[dim] = ds[dim].astype(float)\n\n    return SkillGrid(ds)\n
"},{"location":"api/comparer/#modelskill.Comparer.load","title":"load staticmethod","text":"
load(filename)\n

Load from netcdf file

Parameters:

Name Type Description Default filename str or Path

filename

required

Returns:

Type Description Comparer Source code in modelskill/comparison/_comparison.py
@staticmethod\ndef load(filename: Union[str, Path]) -> \"Comparer\":\n    \"\"\"Load from netcdf file\n\n    Parameters\n    ----------\n    filename : str or Path\n        filename\n\n    Returns\n    -------\n    Comparer\n    \"\"\"\n    with xr.open_dataset(filename) as ds:\n        data = ds.load()\n\n    if data.gtype == \"track\":\n        return Comparer(matched_data=data)\n\n    if data.gtype == \"point\":\n        raw_mod_data: Dict[str, TimeSeries] = {}\n\n        for var in data.data_vars:\n            var_name = str(var)\n            if var_name[:5] == \"_raw_\":\n                new_key = var_name[5:]  # remove prefix '_raw_'\n                ds = data[[var_name]].rename(\n                    {\"_time_raw_\" + new_key: \"time\", var_name: new_key}\n                )\n                ts = PointObservation(data=ds, name=new_key)\n                # TODO: name of time?\n                # ts.name = new_key\n                # df = (\n                #     data[var_name]\n                #     .to_dataframe()\n                #     .rename(\n                #         columns={\"_time_raw_\" + new_key: \"time\", var_name: new_key}\n                #     )\n                # )\n                raw_mod_data[new_key] = ts\n\n                # data = data.drop(var_name).drop(\"_time_raw_\" + new_key)\n\n        # filter variables, only keep the ones with a 'time' dimension\n        data = data[[v for v in data.data_vars if \"time\" in data[v].dims]]\n\n        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {data.gtype}\")\n
"},{"location":"api/comparer/#modelskill.Comparer.query","title":"query","text":"
query(query)\n

Return a new Comparer with values where query cond is True

Parameters:

Name Type Description Default query str

Query string, see pandas.DataFrame.query

required

Returns:

Type Description Comparer

New Comparer with values where cond is True and other otherwise.

Examples:

>>> c2 = c.query(\"Observation > 0\")\n
Source code in modelskill/comparison/_comparison.py
def query(self, query: str) -> \"Comparer\":\n    \"\"\"Return a new Comparer with values where query cond is True\n\n    Parameters\n    ----------\n    query : str\n        Query string, see pandas.DataFrame.query\n\n    Returns\n    -------\n    Comparer\n        New Comparer with values where cond is True and other otherwise.\n\n    Examples\n    --------\n    >>> c2 = c.query(\"Observation > 0\")\n    \"\"\"\n    d = self.data.query({\"time\": query})\n    d = d.dropna(dim=\"time\", how=\"all\")\n    return Comparer.from_matched_data(d, self.raw_mod_data)\n
"},{"location":"api/comparer/#modelskill.Comparer.rename","title":"rename","text":"
rename(mapping, errors='raise')\n

Rename observation, model or auxiliary data variables

Parameters:

Name Type Description Default mapping dict

mapping of old names to new names

required errors ('raise', 'ignore')

If 'raise', raise a KeyError if any of the old names do not exist in the data. By default 'raise'.

'raise'

Returns:

Type Description Comparer

Examples:

>>> cmp = ms.match(observation, modeldata)\n>>> cmp.mod_names\n['model1']\n>>> cmp2 = cmp.rename({'model1': 'model2'})\n>>> cmp2.mod_names\n['model2']\n
Source code in modelskill/comparison/_comparison.py
def rename(\n    self, mapping: Mapping[str, str], errors: Literal[\"raise\", \"ignore\"] = \"raise\"\n) -> \"Comparer\":\n    \"\"\"Rename observation, model or auxiliary data variables\n\n    Parameters\n    ----------\n    mapping : dict\n        mapping of old names to new names\n    errors : {'raise', 'ignore'}, optional\n        If 'raise', raise a KeyError if any of the old names\n        do not exist in the data. By default 'raise'.\n\n    Returns\n    -------\n    Comparer\n\n    Examples\n    --------\n    >>> cmp = ms.match(observation, modeldata)\n    >>> cmp.mod_names\n    ['model1']\n    >>> cmp2 = cmp.rename({'model1': 'model2'})\n    >>> cmp2.mod_names\n    ['model2']\n    \"\"\"\n    if errors not in [\"raise\", \"ignore\"]:\n        raise ValueError(\"errors must be 'raise' or 'ignore'\")\n\n    allowed_keys = [self.name] + self.mod_names + self.aux_names\n    if errors == \"raise\":\n        for k in mapping.keys():\n            if k not in allowed_keys:\n                raise KeyError(f\"Unknown key: {k}; must be one of {allowed_keys}\")\n    else:\n        # \"ignore\": silently remove keys that are not in allowed_keys\n        mapping = {k: v for k, v in mapping.items() if k in allowed_keys}\n\n    if any([k in _RESERVED_NAMES for k in mapping.values()]):\n        # TODO: also check for duplicates\n        raise ValueError(\n            f\"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!\"\n        )\n\n    # rename observation\n    obs_name = mapping.get(self.name, self.name)\n    ma_mapping = {k: v for k, v in mapping.items() if k != self.name}\n\n    data = self.data.rename(ma_mapping)\n    data.attrs[\"name\"] = obs_name\n    raw_mod_data = dict()\n    for k, v in self.raw_mod_data.items():\n        if k in ma_mapping:\n            # copy is needed here as the same raw data could be\n            # used for multiple Comparers!\n            v2 = v.copy()\n            v2.data = v2.data.rename({k: ma_mapping[k]})\n            raw_mod_data[ma_mapping[k]] = v2\n        else:\n            raw_mod_data[k] = v\n\n    return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n
"},{"location":"api/comparer/#modelskill.Comparer.save","title":"save","text":"
save(filename)\n

Save to netcdf file

Parameters:

Name Type Description Default filename str or Path

filename

required Source code in modelskill/comparison/_comparison.py
def save(self, filename: Union[str, Path]) -> None:\n    \"\"\"Save to netcdf file\n\n    Parameters\n    ----------\n    filename : str or Path\n        filename\n    \"\"\"\n    ds = self.data\n\n    # add self.raw_mod_data to ds with prefix 'raw_' to avoid name conflicts\n    # an alternative strategy would be to use NetCDF groups\n    # https://docs.xarray.dev/en/stable/user-guide/io.html#groups\n\n    # There is no need to save raw data for track data, since it is identical to the matched data\n    if self.gtype == \"point\":\n        ds = self.data.copy()  # copy needed to avoid modifying self.data\n\n        for key, ts_mod in self.raw_mod_data.items():\n            ts_mod = ts_mod.copy()\n            #  rename time to unique name\n            ts_mod.data = ts_mod.data.rename({\"time\": \"_time_raw_\" + key})\n            # da = ds_mod.to_xarray()[key]\n            ds[\"_raw_\" + key] = ts_mod.data[key]\n\n    ds.to_netcdf(filename)\n
"},{"location":"api/comparer/#modelskill.Comparer.score","title":"score","text":"
score(metric=mtr.rmse, **kwargs)\n

Model skill score

Parameters:

Name Type Description Default metric list

a single metric from modelskill.metrics, by default rmse

rmse

Returns:

Type Description dict[str, float]

skill score as a single number (for each model)

See also

skill a method for skill assessment returning a pd.DataFrame

Examples:

>>> import modelskill as ms\n>>> cmp = ms.match(c2, mod)\n>>> cmp.score()\n{'mod': 0.3517964910888918}\n
>>> cmp.score(metric=\"mape\")\n{'mod': 11.567399646108198}\n
Source code in modelskill/comparison/_comparison.py
def score(\n    self,\n    metric: str | Callable = mtr.rmse,\n    **kwargs: Any,\n) -> Dict[str, float]:\n    \"\"\"Model skill score\n\n    Parameters\n    ----------\n    metric : list, optional\n        a single metric from modelskill.metrics, by default rmse\n\n    Returns\n    -------\n    dict[str, float]\n        skill score as a single number (for each model)\n\n    See also\n    --------\n    skill\n        a method for skill assessment returning a pd.DataFrame\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cmp = ms.match(c2, mod)\n    >>> cmp.score()\n    {'mod': 0.3517964910888918}\n\n    >>> cmp.score(metric=\"mape\")\n    {'mod': 11.567399646108198}\n    \"\"\"\n    metric = _parse_metric(metric)[0]\n    if not (callable(metric) or isinstance(metric, str)):\n        raise ValueError(\"metric must be a string or a function\")\n\n    # TODO remove in v1.1\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    sk = self.skill(\n        by=[\"model\", \"observation\"],\n        metrics=[metric],\n        model=model,  # deprecated\n        start=start,  # deprecated\n        end=end,  # deprecated\n        area=area,  # deprecated\n    )\n    df = sk.to_dataframe()\n\n    metric_name = metric if isinstance(metric, str) else metric.__name__\n    ser = df.reset_index().groupby(\"model\", observed=True)[metric_name].mean()\n    score = {str(k): float(v) for k, v in ser.items()}\n    return score\n
"},{"location":"api/comparer/#modelskill.Comparer.sel","title":"sel","text":"
sel(model=None, start=None, end=None, time=None, area=None)\n

Select data based on model, time and/or area.

Parameters:

Name Type Description Default model str or int or list of str or list of int

Model name or index. If None, all models are selected.

None start str or datetime

Start time. If None, all times are selected.

None end str or datetime

End time. If None, all times are selected.

None time str or datetime

Time. If None, all times are selected.

None area list of float

bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.

None

Returns:

Type Description Comparer

New Comparer with selected data.

Source code in modelskill/comparison/_comparison.py
def sel(\n    self,\n    model: Optional[IdxOrNameTypes] = None,\n    start: Optional[TimeTypes] = None,\n    end: Optional[TimeTypes] = None,\n    time: Optional[TimeTypes] = None,\n    area: Optional[List[float]] = None,\n) -> \"Comparer\":\n    \"\"\"Select data based on model, time and/or area.\n\n    Parameters\n    ----------\n    model : str or int or list of str or list of int, optional\n        Model name or index. If None, all models are selected.\n    start : str or datetime, optional\n        Start time. If None, all times are selected.\n    end : str or datetime, optional\n        End time. If None, all times are selected.\n    time : str or datetime, optional\n        Time. If None, all times are selected.\n    area : list of float, optional\n        bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.\n\n    Returns\n    -------\n    Comparer\n        New Comparer with selected data.\n    \"\"\"\n    if (time is not None) and ((start is not None) or (end is not None)):\n        raise ValueError(\"Cannot use both time and start/end\")\n\n    d = self.data\n    raw_mod_data = self.raw_mod_data\n    if model is not None:\n        if isinstance(model, (str, int)):\n            models = [model]\n        else:\n            models = list(model)\n        mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]\n        dropped_models = [m for m in self.mod_names if m not in mod_names]\n        d = d.drop_vars(dropped_models)\n        raw_mod_data = {m: raw_mod_data[m] for m in mod_names}\n    if (start is not None) or (end is not None):\n        # TODO: can this be done without to_index? (simplify)\n        d = d.sel(time=d.time.to_index().to_frame().loc[start:end].index)  # type: ignore\n\n        # Note: if user asks for a specific time, we also filter raw\n        raw_mod_data = {\n            k: v.sel(time=slice(start, end)) for k, v in raw_mod_data.items()\n        }  # type: ignore\n    if time is not None:\n        d = d.sel(time=time)\n\n        # Note: if user asks for a specific time, we also filter raw\n        raw_mod_data = {k: v.sel(time=time) for k, v in raw_mod_data.items()}\n    if area is not None:\n        if _area_is_bbox(area):\n            x0, y0, x1, y1 = area\n            mask = (d.x > x0) & (d.x < x1) & (d.y > y0) & (d.y < y1)\n        elif _area_is_polygon(area):\n            polygon = np.array(area)\n            xy = np.column_stack((d.x, d.y))\n            mask = _inside_polygon(polygon, xy)\n        else:\n            raise ValueError(\"area supports bbox [x0,y0,x1,y1] and closed polygon\")\n        if self.gtype == \"point\":\n            # if False, return empty data\n            d = d if mask else d.isel(time=slice(None, 0))\n        else:\n            d = d.isel(time=mask)\n    return Comparer.from_matched_data(data=d, raw_mod_data=raw_mod_data)\n
"},{"location":"api/comparer/#modelskill.Comparer.skill","title":"skill","text":"
skill(by=None, metrics=None, **kwargs)\n

Skill assessment of model(s)

Parameters:

Name Type Description Default by str or List[str]

group by, by default [\"model\"]

  • by column name
  • by temporal bin of the DateTimeIndex via the freq-argument (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily
  • by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the syntax 'dt:month'. The dt-argument is different from the freq-argument in that it gives month-of-year rather than month-of-data.
None metrics list

list of modelskill.metrics, by default modelskill.options.metrics.list

None

Returns:

Type Description SkillTable

skill assessment object

See also

sel a method for filtering/selecting data

Examples:

>>> import modelskill as ms\n>>> cc = ms.match(c2, mod)\n>>> cc['c2'].skill().round(2)\n               n  bias  rmse  urmse   mae    cc    si    r2\nobservation\nc2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n
>>> cc['c2'].skill(by='freq:D').round(2)\n             n  bias  rmse  urmse   mae    cc    si    r2\n2017-10-27  72 -0.19  0.31   0.25  0.26  0.48  0.12  0.98\n2017-10-28   0   NaN   NaN    NaN   NaN   NaN   NaN   NaN\n2017-10-29  41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n
Source code in modelskill/comparison/_comparison.py
def skill(\n    self,\n    by: str | Iterable[str] | None = None,\n    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n    **kwargs: Any,\n) -> SkillTable:\n    \"\"\"Skill assessment of model(s)\n\n    Parameters\n    ----------\n    by : str or List[str], optional\n        group by, by default [\"model\"]\n\n        - by column name\n        - by temporal bin of the DateTimeIndex via the freq-argument\n        (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n        - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n        syntax 'dt:month'. The dt-argument is different from the freq-argument\n        in that it gives month-of-year rather than month-of-data.\n    metrics : list, optional\n        list of modelskill.metrics, by default modelskill.options.metrics.list\n\n    Returns\n    -------\n    SkillTable\n        skill assessment object\n\n    See also\n    --------\n    sel\n        a method for filtering/selecting data\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.match(c2, mod)\n    >>> cc['c2'].skill().round(2)\n                   n  bias  rmse  urmse   mae    cc    si    r2\n    observation\n    c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n\n    >>> cc['c2'].skill(by='freq:D').round(2)\n                 n  bias  rmse  urmse   mae    cc    si    r2\n    2017-10-27  72 -0.19  0.31   0.25  0.26  0.48  0.12  0.98\n    2017-10-28   0   NaN   NaN    NaN   NaN   NaN   NaN   NaN\n    2017-10-29  41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n    \"\"\"\n    metrics = _parse_metric(metrics, directional=self.quantity.is_directional)\n\n    # TODO remove in v1.1\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    if kwargs != {}:\n        raise AttributeError(f\"Unknown keyword arguments: {kwargs}\")\n\n    cmp = self.sel(\n        model=model,\n        start=start,\n        end=end,\n        area=area,\n    )\n    if cmp.n_points == 0:\n        raise ValueError(\"No data selected for skill assessment\")\n\n    by = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=1)\n\n    df = cmp._to_long_dataframe()\n    res = _groupby_df(df, by=by, metrics=metrics)\n    res[\"x\"] = np.nan if self.gtype == \"track\" else cmp.x\n    res[\"y\"] = np.nan if self.gtype == \"track\" else cmp.y\n    res = self._add_as_col_if_not_in_index(df, skilldf=res)\n    return SkillTable(res)\n
"},{"location":"api/comparer/#modelskill.Comparer.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/comparison/_comparison.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert matched data to pandas DataFrame\n\n    Include x, y coordinates only if gtype=track\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    if self.gtype == str(GeometryType.POINT):\n        # we remove the scalar coordinate variables as they\n        # will otherwise be columns in the dataframe\n        return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n    elif self.gtype == str(GeometryType.TRACK):\n        df = self.data.drop_vars([\"z\"]).to_dataframe()\n        # make sure that x, y cols are first\n        cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n        return df[cols]\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n
"},{"location":"api/comparer/#modelskill.Comparer.where","title":"where","text":"
where(cond)\n

Return a new Comparer with values where cond is True

Parameters:

Name Type Description Default cond (bool, ndarray, DataArray)

This selects the values to return.

required

Returns:

Type Description Comparer

New Comparer with values where cond is True and other otherwise.

Examples:

>>> c2 = c.where(c.data.Observation > 0)\n
Source code in modelskill/comparison/_comparison.py
def where(\n    self,\n    cond: Union[bool, np.ndarray, xr.DataArray],\n) -> \"Comparer\":\n    \"\"\"Return a new Comparer with values where cond is True\n\n    Parameters\n    ----------\n    cond : bool, np.ndarray, xr.DataArray\n        This selects the values to return.\n\n    Returns\n    -------\n    Comparer\n        New Comparer with values where cond is True and other otherwise.\n\n    Examples\n    --------\n    >>> c2 = c.where(c.data.Observation > 0)\n    \"\"\"\n    d = self.data.where(cond, other=np.nan)\n    d = d.dropna(dim=\"time\", how=\"all\")\n    return Comparer.from_matched_data(d, self.raw_mod_data)\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter","title":"modelskill.comparison._comparer_plotter.ComparerPlotter","text":"

Plotter class for Comparer

Examples:

>>> cmp.plot.scatter()\n>>> cmp.plot.timeseries()\n>>> cmp.plot.hist()\n>>> cmp.plot.kde()\n>>> cmp.plot.qq()\n>>> cmp.plot.box()\n
Source code in modelskill/comparison/_comparer_plotter.py
class ComparerPlotter:\n    \"\"\"Plotter class for Comparer\n\n    Examples\n    --------\n    >>> cmp.plot.scatter()\n    >>> cmp.plot.timeseries()\n    >>> cmp.plot.hist()\n    >>> cmp.plot.kde()\n    >>> cmp.plot.qq()\n    >>> cmp.plot.box()\n    \"\"\"\n\n    def __init__(self, comparer: Comparer) -> None:\n        self.comparer = comparer\n        self.is_directional = comparer.quantity.is_directional\n\n    def __call__(\n        self, *args, **kwargs\n    ) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n        \"\"\"Plot scatter plot of modelled vs observed data\"\"\"\n        return self.scatter(*args, **kwargs)\n\n    def timeseries(\n        self,\n        *,\n        title: str | None = None,\n        ylim: Tuple[float, float] | None = None,\n        ax=None,\n        figsize: Tuple[float, float] | None = None,\n        backend: str = \"matplotlib\",\n        **kwargs,\n    ):\n        \"\"\"Timeseries plot showing compared data: observation vs modelled\n\n        Parameters\n        ----------\n        title : str, optional\n            plot title, by default None\n        ylim : (float, float), optional\n            plot range for the model (ymin, ymax), by default None\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        figsize : (float, float), optional\n            figure size, by default None\n        backend : str, optional\n            use \"plotly\" (interactive) or \"matplotlib\" backend,\n            by default \"matplotlib\"\n        **kwargs\n            other keyword arguments to fig.update_layout (plotly backend)\n\n        Returns\n        -------\n        matplotlib.axes.Axes or plotly.graph_objects.Figure\n        \"\"\"\n        from ._comparison import MOD_COLORS\n\n        cmp = self.comparer\n\n        if title is None:\n            title = cmp.name\n\n        if backend == \"matplotlib\":\n            fig, ax = _get_fig_ax(ax, figsize)\n            for j in range(cmp.n_models):\n                key = cmp.mod_names[j]\n                mod = cmp.raw_mod_data[key]._values_as_series\n                mod.plot(ax=ax, color=MOD_COLORS[j])\n\n            ax.scatter(\n                cmp.time,\n                cmp.data[cmp._obs_name].values,\n                marker=\".\",\n                color=cmp.data[cmp._obs_name].attrs[\"color\"],\n            )\n            ax.set_ylabel(cmp._unit_text)\n            ax.legend([*cmp.mod_names, cmp._obs_name])\n            ax.set_ylim(ylim)\n            if self.is_directional:\n                _ytick_directional(ax, ylim)\n            ax.set_title(title)\n            return ax\n\n        elif backend == \"plotly\":  # pragma: no cover\n            import plotly.graph_objects as go  # type: ignore\n\n            mod_scatter_list = []\n            for j in range(cmp.n_models):\n                key = cmp.mod_names[j]\n                mod = cmp.raw_mod_data[key]._values_as_series\n                mod_scatter_list.append(\n                    go.Scatter(\n                        x=mod.index,\n                        y=mod.values,\n                        name=key,\n                        line=dict(color=MOD_COLORS[j]),\n                    )\n                )\n\n            fig = go.Figure(\n                [\n                    *mod_scatter_list,\n                    go.Scatter(\n                        x=cmp.time,\n                        y=cmp.data[cmp._obs_name].values,\n                        name=cmp._obs_name,\n                        mode=\"markers\",\n                        marker=dict(color=cmp.data[cmp._obs_name].attrs[\"color\"]),\n                    ),\n                ]\n            )\n\n            fig.update_layout(title=title, yaxis_title=cmp._unit_text, **kwargs)\n            fig.update_yaxes(range=ylim)\n\n            return fig\n        else:\n            raise ValueError(f\"Plotting backend: {backend} not supported\")\n\n    def hist(\n        self,\n        bins: int | Sequence = 100,\n        *,\n        model: str | int | None = None,\n        title: str | None = None,\n        ax=None,\n        figsize: Tuple[float, float] | None = None,\n        density: bool = True,\n        alpha: float = 0.5,\n        **kwargs,\n    ):\n        \"\"\"Plot histogram of model data and observations.\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            number of bins, by default 100\n        title : str, optional\n            plot title, default: [model name] vs [observation name]\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        figsize : tuple, optional\n            figure size, by default None\n        density: bool, optional\n            If True, draw and return a probability density\n        alpha : float, optional\n            alpha transparency fraction, by default 0.5\n        **kwargs\n            other keyword arguments to df.plot.hist()\n\n        Returns\n        -------\n        matplotlib axes\n\n        See also\n        --------\n        pandas.Series.plot.hist\n        matplotlib.axes.Axes.hist\n        \"\"\"\n        cmp = self.comparer\n\n        if model is None:\n            mod_names = cmp.mod_names\n        else:\n            warnings.warn(\n                \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.hist()\",\n                FutureWarning,\n            )\n            model_list = [model] if isinstance(model, (str, int)) else model\n            mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]\n\n        axes = []\n        for mod_name in mod_names:\n            ax_mod = self._hist_one_model(\n                mod_name=mod_name,\n                bins=bins,\n                title=title,\n                ax=ax,\n                figsize=figsize,\n                density=density,\n                alpha=alpha,\n                **kwargs,\n            )\n            axes.append(ax_mod)\n\n        return axes[0] if len(axes) == 1 else axes\n\n    def _hist_one_model(\n        self,\n        *,\n        mod_name: str,\n        bins: int | Sequence | None,\n        title: str | None,\n        ax,\n        figsize: Tuple[float, float] | None,\n        density: bool | None,\n        alpha: float | None,\n        **kwargs,\n    ):\n        from ._comparison import MOD_COLORS  # TODO move to here\n\n        cmp = self.comparer\n        assert mod_name in cmp.mod_names, f\"Model {mod_name} not found in comparer\"\n        mod_idx = _get_idx(mod_name, cmp.mod_names)\n\n        title = f\"{mod_name} vs {cmp.name}\" if title is None else title\n\n        _, ax = _get_fig_ax(ax, figsize)\n\n        kwargs[\"alpha\"] = alpha\n        kwargs[\"density\"] = density\n        kwargs[\"ax\"] = ax\n\n        ax = (\n            cmp.data[mod_name]\n            .to_series()\n            .hist(bins=bins, color=MOD_COLORS[mod_idx], **kwargs)\n        )\n\n        cmp.data[cmp._obs_name].to_series().hist(\n            bins=bins, color=cmp.data[cmp._obs_name].attrs[\"color\"], **kwargs\n        )\n        ax.legend([mod_name, cmp._obs_name])\n        ax.set_title(title)\n        ax.set_xlabel(f\"{cmp._unit_text}\")\n        if density:\n            ax.set_ylabel(\"density\")\n        else:\n            ax.set_ylabel(\"count\")\n\n        if self.is_directional:\n            _xtick_directional(ax)\n\n        return ax\n\n    def kde(self, ax=None, title=None, figsize=None, **kwargs) -> matplotlib.axes.Axes:\n        \"\"\"Plot kde (kernel density estimates of distributions) of model data and observations.\n\n        Wraps pandas.DataFrame kde() method.\n\n        Parameters\n        ----------\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        title : str, optional\n            plot title, default: \"KDE plot for [observation name]\"\n        figsize : tuple, optional\n            figure size, by default None\n        **kwargs\n            other keyword arguments to df.plot.kde()\n\n        Returns\n        -------\n        matplotlib.axes.Axes\n\n        Examples\n        --------\n        >>> cmp.plot.kde()\n        >>> cmp.plot.kde(bw_method=0.3)\n        >>> cmp.plot.kde(ax=ax, bw_method='silverman')\n        >>> cmp.plot.kde(xlim=[0,None], title=\"Density plot\");\n\n        See also\n        --------\n        pandas.Series.plot.kde\n        \"\"\"\n        cmp = self.comparer\n\n        _, ax = _get_fig_ax(ax, figsize)\n\n        cmp.data.Observation.to_series().plot.kde(\n            ax=ax, linestyle=\"dashed\", label=\"Observation\", **kwargs\n        )\n\n        for model in cmp.mod_names:\n            cmp.data[model].to_series().plot.kde(ax=ax, label=model, **kwargs)\n\n        ax.set_xlabel(cmp._unit_text)  # TODO\n\n        ax.legend()\n\n        # remove y-axis, ticks and label\n        ax.yaxis.set_visible(False)\n        ax.tick_params(axis=\"y\", which=\"both\", length=0)\n        ax.set_ylabel(\"\")\n        title = f\"KDE plot for {cmp.name}\" if title is None else title\n        ax.set_title(title)\n\n        # remove box around plot\n        ax.spines[\"top\"].set_visible(False)\n        ax.spines[\"right\"].set_visible(False)\n        ax.spines[\"left\"].set_visible(False)\n\n        if self.is_directional:\n            _xtick_directional(ax)\n\n        return ax\n\n    def qq(\n        self,\n        quantiles: int | Sequence[float] | None = None,\n        *,\n        title=None,\n        ax=None,\n        figsize=None,\n        **kwargs,\n    ):\n        \"\"\"Make quantile-quantile (q-q) plot of model data and observations.\n\n        Primarily used to compare multiple models.\n\n        Parameters\n        ----------\n        quantiles: (int, sequence), optional\n            number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000)\n            if int, this is the number of points\n            if sequence (list of floats), represents the desired quantiles (from 0 to 1)\n        title : str, optional\n            plot title, default: \"Q-Q plot for [observation name]\"\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        figsize : tuple, optional\n            figure size, by default None\n        **kwargs\n            other keyword arguments to plt.plot()\n\n        Returns\n        -------\n        matplotlib axes\n\n        Examples\n        --------\n        >>> cmp.plot.qq()\n\n        \"\"\"\n        cmp = self.comparer\n\n        _, ax = _get_fig_ax(ax, figsize)\n\n        x = cmp.data.Observation.values\n        xmin, xmax = x.min(), x.max()\n        ymin, ymax = np.inf, -np.inf\n\n        for mod_name in cmp.mod_names:\n            y = cmp.data[mod_name].values\n            ymin = min([y.min(), ymin])\n            ymax = max([y.max(), ymax])\n            xq, yq = quantiles_xy(x, y, quantiles)\n            ax.plot(\n                xq,\n                yq,\n                \".-\",\n                label=mod_name,\n                zorder=4,\n                **kwargs,\n            )\n\n        xymin = min([xmin, ymin])\n        xymax = max([xmax, ymax])\n\n        # 1:1 line\n        ax.plot(\n            [xymin, xymax],\n            [xymin, xymax],\n            label=options.plot.scatter.oneone_line.label,\n            c=options.plot.scatter.oneone_line.color,\n            zorder=3,\n        )\n\n        ax.axis(\"square\")\n        ax.set_xlim([xymin, xymax])\n        ax.set_ylim([xymin, xymax])\n        ax.minorticks_on()\n        ax.grid(which=\"both\", axis=\"both\", linewidth=\"0.2\", color=\"k\", alpha=0.6)\n\n        ax.legend()\n        ax.set_xlabel(\"Observation, \" + cmp._unit_text)\n        ax.set_ylabel(\"Model, \" + cmp._unit_text)\n        ax.set_title(title or f\"Q-Q plot for {cmp.name}\")\n\n        if self.is_directional:\n            _xtick_directional(ax)\n            _ytick_directional(ax)\n\n        return ax\n\n    def box(self, *, ax=None, title=None, figsize=None, **kwargs):\n        \"\"\"Make a box plot of model data and observations.\n\n        Wraps pandas.DataFrame boxplot() method.\n\n        Parameters\n        ----------\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        title : str, optional\n            plot title, default: [observation name]\n        figsize : tuple, optional\n            figure size, by default None\n        **kwargs\n            other keyword arguments to df.boxplot()\n\n        Returns\n        -------\n        matplotlib axes\n\n        Examples\n        --------\n        >>> cmp.plot.box()\n        >>> cmp.plot.box(showmeans=True)\n        >>> cmp.plot.box(ax=ax, title=\"Box plot\")\n\n        See also\n        --------\n        pandas.DataFrame.boxplot\n        matplotlib.pyplot.boxplot\n        \"\"\"\n        cmp = self.comparer\n\n        _, ax = _get_fig_ax(ax, figsize)\n\n        cols = [\"Observation\"] + cmp.mod_names\n        df = cmp.data[cols].to_dataframe()[cols]\n        df.boxplot(ax=ax, **kwargs)\n        ax.set_ylabel(cmp._unit_text)\n        ax.set_title(title or cmp.name)\n\n        if self.is_directional:\n            _ytick_directional(ax)\n\n        return ax\n\n    def scatter(\n        self,\n        *,\n        model=None,\n        bins: int | float = 120,\n        quantiles: int | Sequence[float] | None = None,\n        fit_to_quantiles: bool = False,\n        show_points: bool | int | float | None = None,\n        show_hist: Optional[bool] = None,\n        show_density: Optional[bool] = None,\n        norm: Optional[colors.Normalize] = None,\n        backend: Literal[\"matplotlib\", \"plotly\"] = \"matplotlib\",\n        figsize: Tuple[float, float] = (8, 8),\n        xlim: Optional[Tuple[float, float]] = None,\n        ylim: Optional[Tuple[float, float]] = None,\n        reg_method: str | bool = \"ols\",\n        title: Optional[str] = None,\n        xlabel: Optional[str] = None,\n        ylabel: Optional[str] = None,\n        skill_table: Optional[Union[str, List[str], bool]] = None,\n        ax: Optional[matplotlib.axes.Axes] = None,\n        **kwargs,\n    ) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n        \"\"\"Scatter plot showing compared data: observation vs modelled\n        Optionally, with density histogram.\n\n        Parameters\n        ----------\n        bins: (int, float, sequence), optional\n            bins for the 2D histogram on the background. By default 20 bins.\n            if int, represents the number of bins of 2D\n            if float, represents the bin size\n            if sequence (list of int or float), represents the bin edges\n        quantiles: (int, sequence), optional\n            number of quantiles for QQ-plot, by default None and will depend\n            on the scatter data length (10, 100 or 1000); if int, this is\n            the number of points; if sequence (list of floats), represents\n            the desired quantiles (from 0 to 1)\n        fit_to_quantiles: bool, optional\n            by default the regression line is fitted to all data, if True,\n            it is fitted to the quantiles which can be useful to represent\n            the extremes of the distribution, by default False\n        show_points : (bool, int, float), optional\n            Should the scatter points be displayed? None means: show all\n            points if fewer than 1e4, otherwise show 1e4 sample points,\n            by default None. float: fraction of points to show on plot\n            from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int)\n            given, then 'n' points will be displayed, randomly selected\n        show_hist : bool, optional\n            show the data density as a a 2d histogram, by default None\n        show_density: bool, optional\n            show the data density as a colormap of the scatter, by default\n            None. If both `show_density` and `show_hist` are None, then\n            `show_density` is used by default. For binning the data, the\n            kword `bins=Float` is used.\n        norm : matplotlib.colors norm\n            colormap normalization. If None, defaults to\n            matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)\n        backend : str, optional\n            use \"plotly\" (interactive) or \"matplotlib\" backend,\n            by default \"matplotlib\"\n        figsize : tuple, optional\n            width and height of the figure, by default (8, 8)\n        xlim : tuple, optional\n            plot range for the observation (xmin, xmax), by default None\n        ylim : tuple, optional\n            plot range for the model (ymin, ymax), by default None\n        reg_method : str or bool, optional\n            method for determining the regression line\n            \"ols\" : ordinary least squares regression\n            \"odr\" : orthogonal distance regression,\n            False : no regression line\n            by default \"ols\"\n        title : str, optional\n            plot title, by default None\n        xlabel : str, optional\n            x-label text on plot, by default None\n        ylabel : str, optional\n            y-label text on plot, by default None\n        skill_table : str, List[str], bool, optional\n            list of modelskill.metrics or boolean, if True then by default\n            modelskill.options.metrics.list. This kword adds a box at the\n            right of the scatter plot, by default False\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        **kwargs\n            other keyword arguments to plt.scatter()\n\n        Examples\n        ------\n        >>> cmp.plot.scatter()\n        >>> cmp.plot.scatter(bins=0.2, backend='plotly')\n        >>> cmp.plot.scatter(show_points=False, title='no points')\n        >>> cmp.plot.scatter(xlabel='all observations', ylabel='my model')\n        >>> cmp.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n        \"\"\"\n\n        cmp = self.comparer\n        if model is None:\n            mod_names = cmp.mod_names\n        else:\n            warnings.warn(\n                \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.scatter()\",\n                FutureWarning,\n            )\n            model_list = [model] if isinstance(model, (str, int)) else model\n            mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]\n\n        axes = []\n        for mod_name in mod_names:\n            ax_mod = self._scatter_one_model(\n                mod_name=mod_name,\n                bins=bins,\n                quantiles=quantiles,\n                fit_to_quantiles=fit_to_quantiles,\n                show_points=show_points,\n                show_hist=show_hist,\n                show_density=show_density,\n                norm=norm,\n                backend=backend,\n                figsize=figsize,\n                xlim=xlim,\n                ylim=ylim,\n                reg_method=reg_method,\n                title=title,\n                xlabel=xlabel,\n                ylabel=ylabel,\n                skill_table=skill_table,\n                ax=ax,\n                **kwargs,\n            )\n            axes.append(ax_mod)\n        return axes[0] if len(axes) == 1 else axes\n\n    def _scatter_one_model(\n        self,\n        *,\n        mod_name: str,\n        bins: int | float,\n        quantiles: int | Sequence[float] | None,\n        fit_to_quantiles: bool,\n        show_points: bool | int | float | None,\n        show_hist: Optional[bool],\n        show_density: Optional[bool],\n        norm: Optional[colors.Normalize],\n        backend: Literal[\"matplotlib\", \"plotly\"],\n        figsize: Tuple[float, float],\n        xlim: Optional[Tuple[float, float]],\n        ylim: Optional[Tuple[float, float]],\n        reg_method: str | bool,\n        title: Optional[str],\n        xlabel: Optional[str],\n        ylabel: Optional[str],\n        skill_table: Optional[Union[str, List[str], bool]],\n        **kwargs,\n    ):\n        \"\"\"Scatter plot for one model only\"\"\"\n\n        cmp = self.comparer\n        cmp_sel_mod = cmp.sel(model=mod_name)\n        assert mod_name in cmp.mod_names, f\"Model {mod_name} not found in comparer\"\n\n        if cmp_sel_mod.n_points == 0:\n            raise ValueError(\"No data found in selection\")\n\n        x = cmp_sel_mod.data.Observation.values\n        y = cmp_sel_mod.data[mod_name].values\n\n        assert x.ndim == y.ndim == 1, \"x and y must be 1D arrays\"\n        assert x.shape == y.shape, \"x and y must have the same shape\"\n\n        unit_text = cmp._unit_text\n        xlabel = xlabel or f\"Observation, {unit_text}\"\n        ylabel = ylabel or f\"Model, {unit_text}\"\n        title = title or f\"{mod_name} vs {cmp.name}\"\n\n        skill = None\n        skill_score_unit = None\n\n        if skill_table:\n            metrics = None if skill_table is True else skill_table\n            skill = cmp_sel_mod.skill(metrics=metrics)  # type: ignore\n            try:\n                skill_score_unit = unit_text.split(\"[\")[1].split(\"]\")[0]\n            except IndexError:\n                skill_score_unit = \"\"  # Dimensionless\n\n        if self.is_directional:\n            # hide quantiles and regression line\n            quantiles = 0\n            reg_method = False\n\n        skill_scores = skill.iloc[0].to_dict() if skill is not None else None\n\n        ax = scatter(\n            x=x,\n            y=y,\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            norm=norm,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            skill_scores=skill_scores,\n            skill_score_unit=skill_score_unit,\n            **kwargs,\n        )\n\n        if backend == \"matplotlib\" and self.is_directional:\n            _xtick_directional(ax, xlim)\n            _ytick_directional(ax, ylim)\n\n        return ax\n\n    def taylor(\n        self,\n        *,\n        normalize_std: bool = False,\n        figsize: Tuple[float, float] = (7, 7),\n        marker: str = \"o\",\n        marker_size: float = 6.0,\n        title: str = \"Taylor diagram\",\n    ):\n        \"\"\"Taylor diagram showing model std and correlation to observation\n        in a single-quadrant polar plot, with r=std and theta=arccos(cc).\n\n        Parameters\n        ----------\n        normalize_std : bool, optional\n            plot model std normalized with observation std, default False\n        figsize : tuple, optional\n            width and height of the figure (should be square), by default (7, 7)\n        marker : str, optional\n            marker type e.g. \"x\", \"*\", by default \"o\"\n        marker_size : float, optional\n            size of the marker, by default 6\n        title : str, optional\n            title of the plot, by default \"Taylor diagram\"\n\n        Returns\n        -------\n        matplotlib.figure.Figure\n\n        Examples\n        ------\n        >>> comparer.taylor()\n        >>> comparer.taylor(start=\"2017-10-28\", figsize=(5,5))\n\n        References\n        ----------\n        Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin <yannick.copin@laposte.net>\n        \"\"\"\n        cmp = self.comparer\n\n        # TODO consider if this round-trip  via mtr is necessary to get the std:s\n        metrics: List[Callable] = [\n            mtr._std_obs,\n            mtr._std_mod,\n            mtr.cc,\n        ]\n\n        sk = cmp.skill(metrics=metrics)\n\n        if sk is None:  # TODO\n            return\n        df = sk.to_dataframe()\n        ref_std = 1.0 if normalize_std else df.iloc[0][\"_std_obs\"]\n\n        df = df[[\"_std_obs\", \"_std_mod\", \"cc\"]].copy()\n        df.columns = [\"obs_std\", \"std\", \"cc\"]\n\n        pts = [\n            TaylorPoint(\n                r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size\n            )\n            for r in df.itertuples()\n        ]\n\n        return taylor_diagram(\n            obs_std=ref_std,\n            points=pts,\n            figsize=figsize,\n            obs_text=f\"Obs: {cmp.name}\",\n            normalize_std=normalize_std,\n            title=title,\n        )\n\n    def residual_hist(\n        self, bins=100, title=None, color=None, figsize=None, ax=None, **kwargs\n    ) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n        \"\"\"plot histogram of residual values\n\n        Parameters\n        ----------\n        bins : int, optional\n            specification of bins, by default 100\n        title : str, optional\n            plot title, default: Residuals, [name]\n        color : str, optional\n            residual color, by default \"#8B8D8E\"\n        figsize : tuple, optional\n            figure size, by default None\n        ax : matplotlib.axes.Axes | list[matplotlib.axes.Axes], optional\n            axes to plot on, by default None\n        **kwargs\n            other keyword arguments to plt.hist()\n\n        Returns\n        -------\n        matplotlib.axes.Axes | list[matplotlib.axes.Axes]\n        \"\"\"\n        cmp = self.comparer\n\n        if cmp.n_models == 1:\n            return self._residual_hist_one_model(\n                bins=bins,\n                title=title,\n                color=color,\n                figsize=figsize,\n                ax=ax,\n                mod_name=cmp.mod_names[0],\n                **kwargs,\n            )\n\n        if ax is not None and len(ax) != len(cmp.mod_names):\n            raise ValueError(\"Number of axes must match number of models\")\n\n        axs = ax if ax is not None else [None] * len(cmp.mod_names)\n\n        for i, mod_name in enumerate(cmp.mod_names):\n            cmp_model = cmp.sel(model=mod_name)\n            ax_mod = cmp_model.plot.residual_hist(\n                bins=bins,\n                title=title,\n                color=color,\n                figsize=figsize,\n                ax=axs[i],\n                **kwargs,\n            )\n            axs[i] = ax_mod\n\n        return axs\n\n    def _residual_hist_one_model(\n        self,\n        bins=100,\n        title=None,\n        color=None,\n        figsize=None,\n        ax=None,\n        mod_name=None,\n        **kwargs,\n    ) -> matplotlib.axes.Axes:\n        \"\"\"Residual histogram for one model only\"\"\"\n        _, ax = _get_fig_ax(ax, figsize)\n\n        default_color = \"#8B8D8E\"\n        color = default_color if color is None else color\n        title = (\n            f\"Residuals, Observation: {self.comparer.name}, Model: {mod_name}\"\n            if title is None\n            else title\n        )\n        ax.hist(self.comparer._residual, bins=bins, color=color, **kwargs)\n        ax.set_title(title)\n        ax.set_xlabel(f\"Residuals of {self.comparer._unit_text}\")\n\n        if self.is_directional:\n            ticks = np.linspace(-180, 180, 9)\n            ax.set_xticks(ticks)\n            ax.set_xlim(-180, 180)\n\n        return ax\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.__call__","title":"__call__","text":"
__call__(*args, **kwargs)\n

Plot scatter plot of modelled vs observed data

Source code in modelskill/comparison/_comparer_plotter.py
def __call__(\n    self, *args, **kwargs\n) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n    \"\"\"Plot scatter plot of modelled vs observed data\"\"\"\n    return self.scatter(*args, **kwargs)\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.box","title":"box","text":"
box(*, ax=None, title=None, figsize=None, **kwargs)\n

Make a box plot of model data and observations.

Wraps pandas.DataFrame boxplot() method.

Parameters:

Name Type Description Default ax Axes

axes to plot on, by default None

None title str

plot title, default: [observation name]

None figsize tuple

figure size, by default None

None **kwargs

other keyword arguments to df.boxplot()

{}

Returns:

Type Description matplotlib axes

Examples:

>>> cmp.plot.box()\n>>> cmp.plot.box(showmeans=True)\n>>> cmp.plot.box(ax=ax, title=\"Box plot\")\n
See also

pandas.DataFrame.boxplot matplotlib.pyplot.boxplot

Source code in modelskill/comparison/_comparer_plotter.py
def box(self, *, ax=None, title=None, figsize=None, **kwargs):\n    \"\"\"Make a box plot of model data and observations.\n\n    Wraps pandas.DataFrame boxplot() method.\n\n    Parameters\n    ----------\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    title : str, optional\n        plot title, default: [observation name]\n    figsize : tuple, optional\n        figure size, by default None\n    **kwargs\n        other keyword arguments to df.boxplot()\n\n    Returns\n    -------\n    matplotlib axes\n\n    Examples\n    --------\n    >>> cmp.plot.box()\n    >>> cmp.plot.box(showmeans=True)\n    >>> cmp.plot.box(ax=ax, title=\"Box plot\")\n\n    See also\n    --------\n    pandas.DataFrame.boxplot\n    matplotlib.pyplot.boxplot\n    \"\"\"\n    cmp = self.comparer\n\n    _, ax = _get_fig_ax(ax, figsize)\n\n    cols = [\"Observation\"] + cmp.mod_names\n    df = cmp.data[cols].to_dataframe()[cols]\n    df.boxplot(ax=ax, **kwargs)\n    ax.set_ylabel(cmp._unit_text)\n    ax.set_title(title or cmp.name)\n\n    if self.is_directional:\n        _ytick_directional(ax)\n\n    return ax\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.hist","title":"hist","text":"
hist(bins=100, *, model=None, title=None, ax=None, figsize=None, density=True, alpha=0.5, **kwargs)\n

Plot histogram of model data and observations.

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

number of bins, by default 100

100 title str

plot title, default: [model name] vs [observation name]

None ax Axes

axes to plot on, by default None

None figsize tuple

figure size, by default None

None density bool

If True, draw and return a probability density

True alpha float

alpha transparency fraction, by default 0.5

0.5 **kwargs

other keyword arguments to df.plot.hist()

{}

Returns:

Type Description matplotlib axes See also

pandas.Series.plot.hist matplotlib.axes.Axes.hist

Source code in modelskill/comparison/_comparer_plotter.py
def hist(\n    self,\n    bins: int | Sequence = 100,\n    *,\n    model: str | int | None = None,\n    title: str | None = None,\n    ax=None,\n    figsize: Tuple[float, float] | None = None,\n    density: bool = True,\n    alpha: float = 0.5,\n    **kwargs,\n):\n    \"\"\"Plot histogram of model data and observations.\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        number of bins, by default 100\n    title : str, optional\n        plot title, default: [model name] vs [observation name]\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    figsize : tuple, optional\n        figure size, by default None\n    density: bool, optional\n        If True, draw and return a probability density\n    alpha : float, optional\n        alpha transparency fraction, by default 0.5\n    **kwargs\n        other keyword arguments to df.plot.hist()\n\n    Returns\n    -------\n    matplotlib axes\n\n    See also\n    --------\n    pandas.Series.plot.hist\n    matplotlib.axes.Axes.hist\n    \"\"\"\n    cmp = self.comparer\n\n    if model is None:\n        mod_names = cmp.mod_names\n    else:\n        warnings.warn(\n            \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.hist()\",\n            FutureWarning,\n        )\n        model_list = [model] if isinstance(model, (str, int)) else model\n        mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]\n\n    axes = []\n    for mod_name in mod_names:\n        ax_mod = self._hist_one_model(\n            mod_name=mod_name,\n            bins=bins,\n            title=title,\n            ax=ax,\n            figsize=figsize,\n            density=density,\n            alpha=alpha,\n            **kwargs,\n        )\n        axes.append(ax_mod)\n\n    return axes[0] if len(axes) == 1 else axes\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.kde","title":"kde","text":"
kde(ax=None, title=None, figsize=None, **kwargs)\n

Plot kde (kernel density estimates of distributions) of model data and observations.

Wraps pandas.DataFrame kde() method.

Parameters:

Name Type Description Default ax Axes

axes to plot on, by default None

None title str

plot title, default: \"KDE plot for [observation name]\"

None figsize tuple

figure size, by default None

None **kwargs

other keyword arguments to df.plot.kde()

{}

Returns:

Type Description Axes

Examples:

>>> cmp.plot.kde()\n>>> cmp.plot.kde(bw_method=0.3)\n>>> cmp.plot.kde(ax=ax, bw_method='silverman')\n>>> cmp.plot.kde(xlim=[0,None], title=\"Density plot\");\n
See also

pandas.Series.plot.kde

Source code in modelskill/comparison/_comparer_plotter.py
def kde(self, ax=None, title=None, figsize=None, **kwargs) -> matplotlib.axes.Axes:\n    \"\"\"Plot kde (kernel density estimates of distributions) of model data and observations.\n\n    Wraps pandas.DataFrame kde() method.\n\n    Parameters\n    ----------\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    title : str, optional\n        plot title, default: \"KDE plot for [observation name]\"\n    figsize : tuple, optional\n        figure size, by default None\n    **kwargs\n        other keyword arguments to df.plot.kde()\n\n    Returns\n    -------\n    matplotlib.axes.Axes\n\n    Examples\n    --------\n    >>> cmp.plot.kde()\n    >>> cmp.plot.kde(bw_method=0.3)\n    >>> cmp.plot.kde(ax=ax, bw_method='silverman')\n    >>> cmp.plot.kde(xlim=[0,None], title=\"Density plot\");\n\n    See also\n    --------\n    pandas.Series.plot.kde\n    \"\"\"\n    cmp = self.comparer\n\n    _, ax = _get_fig_ax(ax, figsize)\n\n    cmp.data.Observation.to_series().plot.kde(\n        ax=ax, linestyle=\"dashed\", label=\"Observation\", **kwargs\n    )\n\n    for model in cmp.mod_names:\n        cmp.data[model].to_series().plot.kde(ax=ax, label=model, **kwargs)\n\n    ax.set_xlabel(cmp._unit_text)  # TODO\n\n    ax.legend()\n\n    # remove y-axis, ticks and label\n    ax.yaxis.set_visible(False)\n    ax.tick_params(axis=\"y\", which=\"both\", length=0)\n    ax.set_ylabel(\"\")\n    title = f\"KDE plot for {cmp.name}\" if title is None else title\n    ax.set_title(title)\n\n    # remove box around plot\n    ax.spines[\"top\"].set_visible(False)\n    ax.spines[\"right\"].set_visible(False)\n    ax.spines[\"left\"].set_visible(False)\n\n    if self.is_directional:\n        _xtick_directional(ax)\n\n    return ax\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.qq","title":"qq","text":"
qq(quantiles=None, *, title=None, ax=None, figsize=None, **kwargs)\n

Make quantile-quantile (q-q) plot of model data and observations.

Primarily used to compare multiple models.

Parameters:

Name Type Description Default quantiles int | Sequence[float] | None

number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000) if int, this is the number of points if sequence (list of floats), represents the desired quantiles (from 0 to 1)

None title str

plot title, default: \"Q-Q plot for [observation name]\"

None ax Axes

axes to plot on, by default None

None figsize tuple

figure size, by default None

None **kwargs

other keyword arguments to plt.plot()

{}

Returns:

Type Description matplotlib axes

Examples:

>>> cmp.plot.qq()\n
Source code in modelskill/comparison/_comparer_plotter.py
def qq(\n    self,\n    quantiles: int | Sequence[float] | None = None,\n    *,\n    title=None,\n    ax=None,\n    figsize=None,\n    **kwargs,\n):\n    \"\"\"Make quantile-quantile (q-q) plot of model data and observations.\n\n    Primarily used to compare multiple models.\n\n    Parameters\n    ----------\n    quantiles: (int, sequence), optional\n        number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000)\n        if int, this is the number of points\n        if sequence (list of floats), represents the desired quantiles (from 0 to 1)\n    title : str, optional\n        plot title, default: \"Q-Q plot for [observation name]\"\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    figsize : tuple, optional\n        figure size, by default None\n    **kwargs\n        other keyword arguments to plt.plot()\n\n    Returns\n    -------\n    matplotlib axes\n\n    Examples\n    --------\n    >>> cmp.plot.qq()\n\n    \"\"\"\n    cmp = self.comparer\n\n    _, ax = _get_fig_ax(ax, figsize)\n\n    x = cmp.data.Observation.values\n    xmin, xmax = x.min(), x.max()\n    ymin, ymax = np.inf, -np.inf\n\n    for mod_name in cmp.mod_names:\n        y = cmp.data[mod_name].values\n        ymin = min([y.min(), ymin])\n        ymax = max([y.max(), ymax])\n        xq, yq = quantiles_xy(x, y, quantiles)\n        ax.plot(\n            xq,\n            yq,\n            \".-\",\n            label=mod_name,\n            zorder=4,\n            **kwargs,\n        )\n\n    xymin = min([xmin, ymin])\n    xymax = max([xmax, ymax])\n\n    # 1:1 line\n    ax.plot(\n        [xymin, xymax],\n        [xymin, xymax],\n        label=options.plot.scatter.oneone_line.label,\n        c=options.plot.scatter.oneone_line.color,\n        zorder=3,\n    )\n\n    ax.axis(\"square\")\n    ax.set_xlim([xymin, xymax])\n    ax.set_ylim([xymin, xymax])\n    ax.minorticks_on()\n    ax.grid(which=\"both\", axis=\"both\", linewidth=\"0.2\", color=\"k\", alpha=0.6)\n\n    ax.legend()\n    ax.set_xlabel(\"Observation, \" + cmp._unit_text)\n    ax.set_ylabel(\"Model, \" + cmp._unit_text)\n    ax.set_title(title or f\"Q-Q plot for {cmp.name}\")\n\n    if self.is_directional:\n        _xtick_directional(ax)\n        _ytick_directional(ax)\n\n    return ax\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.residual_hist","title":"residual_hist","text":"
residual_hist(bins=100, title=None, color=None, figsize=None, ax=None, **kwargs)\n

plot histogram of residual values

Parameters:

Name Type Description Default bins int

specification of bins, by default 100

100 title str

plot title, default: Residuals, [name]

None color str

residual color, by default \"#8B8D8E\"

None figsize tuple

figure size, by default None

None ax Axes | list[Axes]

axes to plot on, by default None

None **kwargs

other keyword arguments to plt.hist()

{}

Returns:

Type Description Axes | list[Axes] Source code in modelskill/comparison/_comparer_plotter.py
def residual_hist(\n    self, bins=100, title=None, color=None, figsize=None, ax=None, **kwargs\n) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n    \"\"\"plot histogram of residual values\n\n    Parameters\n    ----------\n    bins : int, optional\n        specification of bins, by default 100\n    title : str, optional\n        plot title, default: Residuals, [name]\n    color : str, optional\n        residual color, by default \"#8B8D8E\"\n    figsize : tuple, optional\n        figure size, by default None\n    ax : matplotlib.axes.Axes | list[matplotlib.axes.Axes], optional\n        axes to plot on, by default None\n    **kwargs\n        other keyword arguments to plt.hist()\n\n    Returns\n    -------\n    matplotlib.axes.Axes | list[matplotlib.axes.Axes]\n    \"\"\"\n    cmp = self.comparer\n\n    if cmp.n_models == 1:\n        return self._residual_hist_one_model(\n            bins=bins,\n            title=title,\n            color=color,\n            figsize=figsize,\n            ax=ax,\n            mod_name=cmp.mod_names[0],\n            **kwargs,\n        )\n\n    if ax is not None and len(ax) != len(cmp.mod_names):\n        raise ValueError(\"Number of axes must match number of models\")\n\n    axs = ax if ax is not None else [None] * len(cmp.mod_names)\n\n    for i, mod_name in enumerate(cmp.mod_names):\n        cmp_model = cmp.sel(model=mod_name)\n        ax_mod = cmp_model.plot.residual_hist(\n            bins=bins,\n            title=title,\n            color=color,\n            figsize=figsize,\n            ax=axs[i],\n            **kwargs,\n        )\n        axs[i] = ax_mod\n\n    return axs\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.scatter","title":"scatter","text":"
scatter(*, model=None, bins=120, quantiles=None, fit_to_quantiles=False, show_points=None, show_hist=None, show_density=None, norm=None, backend='matplotlib', figsize=(8, 8), xlim=None, ylim=None, reg_method='ols', title=None, xlabel=None, ylabel=None, skill_table=None, ax=None, **kwargs)\n

Scatter plot showing compared data: observation vs modelled Optionally, with density histogram.

Parameters:

Name Type Description Default bins int | float

bins for the 2D histogram on the background. By default 20 bins. if int, represents the number of bins of 2D if float, represents the bin size if sequence (list of int or float), represents the bin edges

120 quantiles int | Sequence[float] | None

number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000); if int, this is the number of points; if sequence (list of floats), represents the desired quantiles (from 0 to 1)

None fit_to_quantiles bool

by default the regression line is fitted to all data, if True, it is fitted to the quantiles which can be useful to represent the extremes of the distribution, by default False

False show_points (bool, int, float)

Should the scatter points be displayed? None means: show all points if fewer than 1e4, otherwise show 1e4 sample points, by default None. float: fraction of points to show on plot from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int) given, then 'n' points will be displayed, randomly selected

None show_hist bool

show the data density as a a 2d histogram, by default None

None show_density Optional[bool]

show the data density as a colormap of the scatter, by default None. If both show_density and show_hist are None, then show_density is used by default. For binning the data, the kword bins=Float is used.

None norm matplotlib.colors norm

colormap normalization. If None, defaults to matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)

None backend str

use \"plotly\" (interactive) or \"matplotlib\" backend, by default \"matplotlib\"

'matplotlib' figsize tuple

width and height of the figure, by default (8, 8)

(8, 8) xlim tuple

plot range for the observation (xmin, xmax), by default None

None ylim tuple

plot range for the model (ymin, ymax), by default None

None reg_method str or bool

method for determining the regression line \"ols\" : ordinary least squares regression \"odr\" : orthogonal distance regression, False : no regression line by default \"ols\"

'ols' title str

plot title, by default None

None xlabel str

x-label text on plot, by default None

None ylabel str

y-label text on plot, by default None

None skill_table (str, List[str], bool)

list of modelskill.metrics or boolean, if True then by default modelskill.options.metrics.list. This kword adds a box at the right of the scatter plot, by default False

None ax Axes

axes to plot on, by default None

None **kwargs

other keyword arguments to plt.scatter()

{}

Examples:

>>> cmp.plot.scatter()\n>>> cmp.plot.scatter(bins=0.2, backend='plotly')\n>>> cmp.plot.scatter(show_points=False, title='no points')\n>>> cmp.plot.scatter(xlabel='all observations', ylabel='my model')\n>>> cmp.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n
Source code in modelskill/comparison/_comparer_plotter.py
def scatter(\n    self,\n    *,\n    model=None,\n    bins: int | float = 120,\n    quantiles: int | Sequence[float] | None = None,\n    fit_to_quantiles: bool = False,\n    show_points: bool | int | float | None = None,\n    show_hist: Optional[bool] = None,\n    show_density: Optional[bool] = None,\n    norm: Optional[colors.Normalize] = None,\n    backend: Literal[\"matplotlib\", \"plotly\"] = \"matplotlib\",\n    figsize: Tuple[float, float] = (8, 8),\n    xlim: Optional[Tuple[float, float]] = None,\n    ylim: Optional[Tuple[float, float]] = None,\n    reg_method: str | bool = \"ols\",\n    title: Optional[str] = None,\n    xlabel: Optional[str] = None,\n    ylabel: Optional[str] = None,\n    skill_table: Optional[Union[str, List[str], bool]] = None,\n    ax: Optional[matplotlib.axes.Axes] = None,\n    **kwargs,\n) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n    \"\"\"Scatter plot showing compared data: observation vs modelled\n    Optionally, with density histogram.\n\n    Parameters\n    ----------\n    bins: (int, float, sequence), optional\n        bins for the 2D histogram on the background. By default 20 bins.\n        if int, represents the number of bins of 2D\n        if float, represents the bin size\n        if sequence (list of int or float), represents the bin edges\n    quantiles: (int, sequence), optional\n        number of quantiles for QQ-plot, by default None and will depend\n        on the scatter data length (10, 100 or 1000); if int, this is\n        the number of points; if sequence (list of floats), represents\n        the desired quantiles (from 0 to 1)\n    fit_to_quantiles: bool, optional\n        by default the regression line is fitted to all data, if True,\n        it is fitted to the quantiles which can be useful to represent\n        the extremes of the distribution, by default False\n    show_points : (bool, int, float), optional\n        Should the scatter points be displayed? None means: show all\n        points if fewer than 1e4, otherwise show 1e4 sample points,\n        by default None. float: fraction of points to show on plot\n        from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int)\n        given, then 'n' points will be displayed, randomly selected\n    show_hist : bool, optional\n        show the data density as a a 2d histogram, by default None\n    show_density: bool, optional\n        show the data density as a colormap of the scatter, by default\n        None. If both `show_density` and `show_hist` are None, then\n        `show_density` is used by default. For binning the data, the\n        kword `bins=Float` is used.\n    norm : matplotlib.colors norm\n        colormap normalization. If None, defaults to\n        matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)\n    backend : str, optional\n        use \"plotly\" (interactive) or \"matplotlib\" backend,\n        by default \"matplotlib\"\n    figsize : tuple, optional\n        width and height of the figure, by default (8, 8)\n    xlim : tuple, optional\n        plot range for the observation (xmin, xmax), by default None\n    ylim : tuple, optional\n        plot range for the model (ymin, ymax), by default None\n    reg_method : str or bool, optional\n        method for determining the regression line\n        \"ols\" : ordinary least squares regression\n        \"odr\" : orthogonal distance regression,\n        False : no regression line\n        by default \"ols\"\n    title : str, optional\n        plot title, by default None\n    xlabel : str, optional\n        x-label text on plot, by default None\n    ylabel : str, optional\n        y-label text on plot, by default None\n    skill_table : str, List[str], bool, optional\n        list of modelskill.metrics or boolean, if True then by default\n        modelskill.options.metrics.list. This kword adds a box at the\n        right of the scatter plot, by default False\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    **kwargs\n        other keyword arguments to plt.scatter()\n\n    Examples\n    ------\n    >>> cmp.plot.scatter()\n    >>> cmp.plot.scatter(bins=0.2, backend='plotly')\n    >>> cmp.plot.scatter(show_points=False, title='no points')\n    >>> cmp.plot.scatter(xlabel='all observations', ylabel='my model')\n    >>> cmp.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n    \"\"\"\n\n    cmp = self.comparer\n    if model is None:\n        mod_names = cmp.mod_names\n    else:\n        warnings.warn(\n            \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.scatter()\",\n            FutureWarning,\n        )\n        model_list = [model] if isinstance(model, (str, int)) else model\n        mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]\n\n    axes = []\n    for mod_name in mod_names:\n        ax_mod = self._scatter_one_model(\n            mod_name=mod_name,\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            norm=norm,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            skill_table=skill_table,\n            ax=ax,\n            **kwargs,\n        )\n        axes.append(ax_mod)\n    return axes[0] if len(axes) == 1 else axes\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.taylor","title":"taylor","text":"
taylor(*, normalize_std=False, figsize=(7, 7), marker='o', marker_size=6.0, title='Taylor diagram')\n

Taylor diagram showing model std and correlation to observation in a single-quadrant polar plot, with r=std and theta=arccos(cc).

Parameters:

Name Type Description Default normalize_std bool

plot model std normalized with observation std, default False

False figsize tuple

width and height of the figure (should be square), by default (7, 7)

(7, 7) marker str

marker type e.g. \"x\", \"*\", by default \"o\"

'o' marker_size float

size of the marker, by default 6

6.0 title str

title of the plot, by default \"Taylor diagram\"

'Taylor diagram'

Returns:

Type Description Figure

Examples:

>>> comparer.taylor()\n>>> comparer.taylor(start=\"2017-10-28\", figsize=(5,5))\n
References

Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin yannick.copin@laposte.net

Source code in modelskill/comparison/_comparer_plotter.py
def taylor(\n    self,\n    *,\n    normalize_std: bool = False,\n    figsize: Tuple[float, float] = (7, 7),\n    marker: str = \"o\",\n    marker_size: float = 6.0,\n    title: str = \"Taylor diagram\",\n):\n    \"\"\"Taylor diagram showing model std and correlation to observation\n    in a single-quadrant polar plot, with r=std and theta=arccos(cc).\n\n    Parameters\n    ----------\n    normalize_std : bool, optional\n        plot model std normalized with observation std, default False\n    figsize : tuple, optional\n        width and height of the figure (should be square), by default (7, 7)\n    marker : str, optional\n        marker type e.g. \"x\", \"*\", by default \"o\"\n    marker_size : float, optional\n        size of the marker, by default 6\n    title : str, optional\n        title of the plot, by default \"Taylor diagram\"\n\n    Returns\n    -------\n    matplotlib.figure.Figure\n\n    Examples\n    ------\n    >>> comparer.taylor()\n    >>> comparer.taylor(start=\"2017-10-28\", figsize=(5,5))\n\n    References\n    ----------\n    Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin <yannick.copin@laposte.net>\n    \"\"\"\n    cmp = self.comparer\n\n    # TODO consider if this round-trip  via mtr is necessary to get the std:s\n    metrics: List[Callable] = [\n        mtr._std_obs,\n        mtr._std_mod,\n        mtr.cc,\n    ]\n\n    sk = cmp.skill(metrics=metrics)\n\n    if sk is None:  # TODO\n        return\n    df = sk.to_dataframe()\n    ref_std = 1.0 if normalize_std else df.iloc[0][\"_std_obs\"]\n\n    df = df[[\"_std_obs\", \"_std_mod\", \"cc\"]].copy()\n    df.columns = [\"obs_std\", \"std\", \"cc\"]\n\n    pts = [\n        TaylorPoint(\n            r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size\n        )\n        for r in df.itertuples()\n    ]\n\n    return taylor_diagram(\n        obs_std=ref_std,\n        points=pts,\n        figsize=figsize,\n        obs_text=f\"Obs: {cmp.name}\",\n        normalize_std=normalize_std,\n        title=title,\n    )\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.timeseries","title":"timeseries","text":"
timeseries(*, title=None, ylim=None, ax=None, figsize=None, backend='matplotlib', **kwargs)\n

Timeseries plot showing compared data: observation vs modelled

Parameters:

Name Type Description Default title str

plot title, by default None

None ylim (float, float)

plot range for the model (ymin, ymax), by default None

None ax Axes

axes to plot on, by default None

None figsize (float, float)

figure size, by default None

None backend str

use \"plotly\" (interactive) or \"matplotlib\" backend, by default \"matplotlib\"

'matplotlib' **kwargs

other keyword arguments to fig.update_layout (plotly backend)

{}

Returns:

Type Description Axes or Figure Source code in modelskill/comparison/_comparer_plotter.py
def timeseries(\n    self,\n    *,\n    title: str | None = None,\n    ylim: Tuple[float, float] | None = None,\n    ax=None,\n    figsize: Tuple[float, float] | None = None,\n    backend: str = \"matplotlib\",\n    **kwargs,\n):\n    \"\"\"Timeseries plot showing compared data: observation vs modelled\n\n    Parameters\n    ----------\n    title : str, optional\n        plot title, by default None\n    ylim : (float, float), optional\n        plot range for the model (ymin, ymax), by default None\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    figsize : (float, float), optional\n        figure size, by default None\n    backend : str, optional\n        use \"plotly\" (interactive) or \"matplotlib\" backend,\n        by default \"matplotlib\"\n    **kwargs\n        other keyword arguments to fig.update_layout (plotly backend)\n\n    Returns\n    -------\n    matplotlib.axes.Axes or plotly.graph_objects.Figure\n    \"\"\"\n    from ._comparison import MOD_COLORS\n\n    cmp = self.comparer\n\n    if title is None:\n        title = cmp.name\n\n    if backend == \"matplotlib\":\n        fig, ax = _get_fig_ax(ax, figsize)\n        for j in range(cmp.n_models):\n            key = cmp.mod_names[j]\n            mod = cmp.raw_mod_data[key]._values_as_series\n            mod.plot(ax=ax, color=MOD_COLORS[j])\n\n        ax.scatter(\n            cmp.time,\n            cmp.data[cmp._obs_name].values,\n            marker=\".\",\n            color=cmp.data[cmp._obs_name].attrs[\"color\"],\n        )\n        ax.set_ylabel(cmp._unit_text)\n        ax.legend([*cmp.mod_names, cmp._obs_name])\n        ax.set_ylim(ylim)\n        if self.is_directional:\n            _ytick_directional(ax, ylim)\n        ax.set_title(title)\n        return ax\n\n    elif backend == \"plotly\":  # pragma: no cover\n        import plotly.graph_objects as go  # type: ignore\n\n        mod_scatter_list = []\n        for j in range(cmp.n_models):\n            key = cmp.mod_names[j]\n            mod = cmp.raw_mod_data[key]._values_as_series\n            mod_scatter_list.append(\n                go.Scatter(\n                    x=mod.index,\n                    y=mod.values,\n                    name=key,\n                    line=dict(color=MOD_COLORS[j]),\n                )\n            )\n\n        fig = go.Figure(\n            [\n                *mod_scatter_list,\n                go.Scatter(\n                    x=cmp.time,\n                    y=cmp.data[cmp._obs_name].values,\n                    name=cmp._obs_name,\n                    mode=\"markers\",\n                    marker=dict(color=cmp.data[cmp._obs_name].attrs[\"color\"]),\n                ),\n            ]\n        )\n\n        fig.update_layout(title=title, yaxis_title=cmp._unit_text, **kwargs)\n        fig.update_yaxes(range=ylim)\n\n        return fig\n    else:\n        raise ValueError(f\"Plotting backend: {backend} not supported\")\n
"},{"location":"api/comparercollection/","title":"ComparerCollection","text":"

The ComparerCollection is one of the main objects of the modelskill package. It is a collection of Comparer objects and created either by the match() method, by passing a list of Comparers to the ComparerCollection constructor, or by reading a config file using the from_config() function.

Main functionality:

  • selecting/filtering data
    • __get_item__() - get a single Comparer, e.g., cc[0] or cc['obs1']
    • sel()
    • query()
  • skill assessment
    • skill()
    • mean_skill()
    • gridded_skill() (for track observations)
  • plotting
    • plot.scatter()
    • plot.kde()
    • plot.hist()
  • load/save/export data
    • load()
    • save()
"},{"location":"api/comparercollection/#modelskill.ComparerCollection","title":"modelskill.ComparerCollection","text":"

Bases: Mapping, Scoreable

Collection of comparers, constructed by calling the modelskill.match method or by initializing with a list of comparers.

NOTE: In case of multiple model results with different time coverage, only the overlapping time period will be used! (intersection)

Examples:

>>> import modelskill as ms\n>>> mr = ms.DfsuModelResult(\"Oresund2D.dfsu\", item=0)\n>>> o1 = ms.PointObservation(\"klagshamn.dfs0\", item=0, x=366844, y=6154291, name=\"Klagshamn\")\n>>> o2 = ms.PointObservation(\"drogden.dfs0\", item=0, x=355568.0, y=6156863.0)\n>>> cmp1 = ms.match(o1, mr)  # Comparer\n>>> cmp2 = ms.match(o2, mr)  # Comparer\n>>> ccA = ms.ComparerCollection([cmp1, cmp2])\n>>> ccB = ms.match(obs=[o1, o2], mod=mr)\n>>> sk = ccB.skill()\n>>> ccB[\"Klagshamn\"].plot.timeseries()\n
Source code in modelskill/comparison/_collection.py
class ComparerCollection(Mapping, Scoreable):\n    \"\"\"\n    Collection of comparers, constructed by calling the `modelskill.match`\n    method or by initializing with a list of comparers.\n\n    NOTE: In case of multiple model results with different time coverage,\n    only the _overlapping_ time period will be used! (intersection)\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> mr = ms.DfsuModelResult(\"Oresund2D.dfsu\", item=0)\n    >>> o1 = ms.PointObservation(\"klagshamn.dfs0\", item=0, x=366844, y=6154291, name=\"Klagshamn\")\n    >>> o2 = ms.PointObservation(\"drogden.dfs0\", item=0, x=355568.0, y=6156863.0)\n    >>> cmp1 = ms.match(o1, mr)  # Comparer\n    >>> cmp2 = ms.match(o2, mr)  # Comparer\n    >>> ccA = ms.ComparerCollection([cmp1, cmp2])\n    >>> ccB = ms.match(obs=[o1, o2], mod=mr)\n    >>> sk = ccB.skill()\n    >>> ccB[\"Klagshamn\"].plot.timeseries()\n    \"\"\"\n\n    plotter = ComparerCollectionPlotter\n\n    def __init__(self, comparers: Iterable[Comparer]) -> None:\n        self._comparers: Dict[str, Comparer] = {}\n\n        for cmp in comparers:\n            if cmp.name in self._comparers:\n                # comparer with this name already exists!\n                # maybe the user is trying to add a new model\n                # or a new time period\n                self._comparers[cmp.name] += cmp\n            else:\n                self._comparers[cmp.name] = cmp\n\n        self.plot = ComparerCollection.plotter(self)\n        \"\"\"Plot using the ComparerCollectionPlotter\n\n        Examples\n        --------\n        >>> cc.plot.scatter()\n        >>> cc.plot.kde()\n        >>> cc.plot.taylor()\n        >>> cc.plot.hist()\n        \"\"\"\n\n    @property\n    def _name(self) -> str:\n        return \"Observations\"\n\n    @property\n    def _unit_text(self) -> str:\n        # Picking the first one is arbitrary, but it should be the same for all\n        # we could check that they are all the same, but let's assume that they are\n        # for cmp in self:\n        #     if cmp._unit_text != text:\n        #         warnings.warn(f\"Unit text is inconsistent: {text} vs {cmp._unit_text}\")\n        return self[0]._unit_text\n\n    @property\n    def n_comparers(self) -> int:\n        warnings.warn(\n            \"cc.n_comparers is deprecated, use len(cc) instead\",\n            FutureWarning,\n        )\n        return len(self)\n\n    @property\n    def n_points(self) -> int:\n        \"\"\"number of compared points\"\"\"\n        return sum([c.n_points for c in self._comparers.values()])\n\n    @property\n    def start(self) -> pd.Timestamp:\n        warnings.warn(\n            \"start is deprecated, use start_time instead\",\n            FutureWarning,\n        )\n        return self.start_time\n\n    @property\n    def start_time(self) -> pd.Timestamp:\n        \"\"\"start timestamp of compared data\"\"\"\n        starts = [pd.Timestamp.max]\n        for cmp in self._comparers.values():\n            starts.append(cmp.time[0])\n        return min(starts)\n\n    @property\n    def end(self) -> pd.Timestamp:\n        warnings.warn(\n            \"end is deprecated, use end_time instead\",\n            FutureWarning,\n        )\n        return self.end_time\n\n    @property\n    def end_time(self) -> pd.Timestamp:\n        \"\"\"end timestamp of compared data\"\"\"\n        ends = [pd.Timestamp.min]\n        for cmp in self._comparers.values():\n            ends.append(cmp.time[-1])\n        return max(ends)\n\n    @property\n    def obs_names(self) -> List[str]:\n        \"\"\"List of observation names\"\"\"\n        return [c.name for c in self._comparers.values()]\n\n    @property\n    def n_observations(self) -> int:\n        \"\"\"Number of observations (same as len(cc))\"\"\"\n        return len(self)\n\n    @property\n    def mod_names(self) -> List[str]:\n        \"\"\"List of unique model names\"\"\"\n        all_names = [n for cmp in self for n in cmp.mod_names]\n        # preserve order (instead of using set)\n        return list(dict.fromkeys(all_names))\n\n    @property\n    def n_models(self) -> int:\n        \"\"\"Number of unique models\"\"\"\n        return len(self.mod_names)\n\n    @property\n    def aux_names(self) -> List[str]:\n        \"\"\"List of unique auxiliary names\"\"\"\n        all_names = [n for cmp in self for n in cmp.aux_names]\n        # preserve order (instead of using set)\n        return list(dict.fromkeys(all_names))\n\n    @property\n    def quantity_names(self) -> List[str]:\n        \"\"\"List of unique quantity names\"\"\"\n        all_names = [cmp.quantity.name for cmp in self]\n        # preserve order (instead of using set)\n        return list(dict.fromkeys(all_names))\n\n    @property\n    def n_quantities(self) -> int:\n        \"\"\"Number of unique quantities\"\"\"\n        return len(self.quantity_names)\n\n    def __repr__(self) -> str:\n        out = []\n        out.append(\"<ComparerCollection>\")\n        out.append(\"Comparers:\")\n        for index, (key, value) in enumerate(self._comparers.items()):\n            out.append(f\"{index}: {key} - {value.quantity}\")\n        return str.join(\"\\n\", out)\n\n    def rename(self, mapping: Dict[str, str]) -> \"ComparerCollection\":\n        \"\"\"Rename observation, model or auxiliary data variables\n\n        Parameters\n        ----------\n        mapping : dict\n            mapping of old names to new names\n\n        Returns\n        -------\n        ComparerCollection\n\n        Examples\n        --------\n        >>> cc = ms.match([o1, o2], [mr1, mr2])\n        >>> cc.mod_names\n        ['mr1', 'mr2']\n        >>> cc2 = cc.rename({'mr1': 'model1'})\n        >>> cc2.mod_names\n        ['model1', 'mr2']\n        \"\"\"\n        for k in mapping.keys():\n            allowed_keys = self.obs_names + self.mod_names + self.aux_names\n            if k not in allowed_keys:\n                raise KeyError(f\"Unknown key: {k}; must be one of {allowed_keys}\")\n\n        cmps = []\n        for cmp in self._comparers.values():\n            cmps.append(cmp.rename(mapping, errors=\"ignore\"))\n        return ComparerCollection(cmps)\n\n    @overload\n    def __getitem__(self, x: slice | Iterable[Hashable]) -> ComparerCollection: ...\n\n    @overload\n    def __getitem__(self, x: int | Hashable) -> Comparer: ...\n\n    def __getitem__(\n        self, x: int | Hashable | slice | Iterable[Hashable]\n    ) -> Comparer | ComparerCollection:\n        if isinstance(x, str):\n            return self._comparers[x]\n\n        if isinstance(x, slice):\n            idxs = list(range(*x.indices(len(self))))\n            return ComparerCollection([self[i] for i in idxs])\n\n        if isinstance(x, int):\n            name = _get_name(x, self.obs_names)\n            return self._comparers[name]\n\n        if isinstance(x, Iterable):\n            cmps = [self[i] for i in x]\n            return ComparerCollection(cmps)\n\n        raise TypeError(f\"Invalid type for __getitem__: {type(x)}\")\n\n    def __len__(self) -> int:\n        return len(self._comparers)\n\n    def __iter__(self) -> Iterator[Comparer]:\n        return iter(self._comparers.values())\n\n    def copy(self) -> \"ComparerCollection\":\n        return deepcopy(self)\n\n    def __add__(\n        self, other: Union[\"Comparer\", \"ComparerCollection\"]\n    ) -> \"ComparerCollection\":\n        if not isinstance(other, (Comparer, ComparerCollection)):\n            raise TypeError(f\"Cannot add {type(other)} to {type(self)}\")\n\n        if isinstance(other, Comparer):\n            return ComparerCollection([*self, other])\n        elif isinstance(other, ComparerCollection):\n            return ComparerCollection([*self, *other])\n\n    def sel(\n        self,\n        model: Optional[IdxOrNameTypes] = None,\n        observation: Optional[IdxOrNameTypes] = None,\n        quantity: Optional[IdxOrNameTypes] = None,\n        start: Optional[TimeTypes] = None,\n        end: Optional[TimeTypes] = None,\n        time: Optional[TimeTypes] = None,\n        area: Optional[List[float]] = None,\n        variable: Optional[IdxOrNameTypes] = None,  # obsolete\n        **kwargs: Any,\n    ) -> \"ComparerCollection\":\n        \"\"\"Select data based on model, time and/or area.\n\n        Parameters\n        ----------\n        model : str or int or list of str or list of int, optional\n            Model name or index. If None, all models are selected.\n        observation : str or int or list of str or list of int, optional\n            Observation name or index. If None, all observations are selected.\n        quantity : str or int or list of str or list of int, optional\n            Quantity name or index. If None, all quantities are selected.\n        start : str or datetime, optional\n            Start time. If None, all times are selected.\n        end : str or datetime, optional\n            End time. If None, all times are selected.\n        time : str or datetime, optional\n            Time. If None, all times are selected.\n        area : list of float, optional\n            bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.\n        **kwargs\n            Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs\n            e.g. `sel(gtype='track')` or `sel(obs_provider='CMEMS')` if at least\n            one comparer has an entry `obs_provider` with value `CMEMS` in its\n            attrs container. Multiple kwargs are combined with logical AND.\n\n        Returns\n        -------\n        ComparerCollection\n            New ComparerCollection with selected data.\n        \"\"\"\n        if variable is not None:\n            warnings.warn(\n                \"variable is deprecated, use quantity instead\",\n                FutureWarning,\n            )\n            quantity = variable\n        # TODO is this really necessary to do both in ComparerCollection and Comparer?\n        if model is not None:\n            if isinstance(model, (str, int)):\n                models = [model]\n            else:\n                models = list(model)\n            mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]\n        if observation is None:\n            observation = self.obs_names\n        else:\n            observation = [observation] if np.isscalar(observation) else observation  # type: ignore\n            observation = [_get_name(o, self.obs_names) for o in observation]  # type: ignore\n\n        if (quantity is not None) and (self.n_quantities > 1):\n            quantity = [quantity] if np.isscalar(quantity) else quantity  # type: ignore\n            quantity = [_get_name(v, self.quantity_names) for v in quantity]  # type: ignore\n        else:\n            quantity = self.quantity_names\n\n        cmps = []\n        for cmp in self._comparers.values():\n            if cmp.name in observation and cmp.quantity.name in quantity:\n                thismodel = (\n                    [m for m in mod_names if m in cmp.mod_names] if model else None\n                )\n                if (thismodel is not None) and (len(thismodel) == 0):\n                    continue\n                cmpsel = cmp.sel(\n                    model=thismodel,\n                    start=start,\n                    end=end,\n                    time=time,\n                    area=area,\n                )\n                if cmpsel is not None:\n                    # TODO: check if cmpsel is empty\n                    if cmpsel.n_points > 0:\n                        cmps.append(cmpsel)\n        cc = ComparerCollection(cmps)\n\n        if kwargs:\n            cc = cc.filter_by_attrs(**kwargs)\n\n        return cc\n\n    def filter_by_attrs(self, **kwargs: Any) -> \"ComparerCollection\":\n        \"\"\"Filter by comparer attrs similar to xarray.Dataset.filter_by_attrs\n\n        Parameters\n        ----------\n        **kwargs\n            Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs\n            e.g. `sel(gtype='track')` or `sel(obs_provider='CMEMS')` if at least\n            one comparer has an entry `obs_provider` with value `CMEMS` in its\n            attrs container. Multiple kwargs are combined with logical AND.\n\n        Returns\n        -------\n        ComparerCollection\n            New ComparerCollection with selected data.\n\n        Examples\n        --------\n        >>> cc = ms.match([HKNA, EPL, alti], mr)\n        >>> cc.filter_by_attrs(gtype='track')\n        <ComparerCollection>\n        Comparer: alti\n        \"\"\"\n        cmps = []\n        for cmp in self._comparers.values():\n            for k, v in kwargs.items():\n                # TODO: should we also filter on cmp.data.Observation.attrs?\n                if cmp.data.attrs.get(k) != v:\n                    break\n            else:\n                cmps.append(cmp)\n        return ComparerCollection(cmps)\n\n    def query(self, query: str) -> \"ComparerCollection\":\n        \"\"\"Select data based on a query.\n\n        Parameters\n        ----------\n        query : str\n            Query string. See pandas.DataFrame.query() for details.\n\n        Returns\n        -------\n        ComparerCollection\n            New ComparerCollection with selected data.\n        \"\"\"\n        q_cmps = [cmp.query(query) for cmp in self._comparers.values()]\n        cmps_with_data = [cmp for cmp in q_cmps if cmp.n_points > 0]\n\n        return ComparerCollection(cmps_with_data)\n\n    def skill(\n        self,\n        by: str | Iterable[str] | None = None,\n        metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n        observed: bool = False,\n        **kwargs: Any,\n    ) -> SkillTable:\n        \"\"\"Aggregated skill assessment of model(s)\n\n        Parameters\n        ----------\n        by : str or List[str], optional\n            group by, by default [\"model\", \"observation\"]\n\n            - by column name\n            - by temporal bin of the DateTimeIndex via the freq-argument\n            (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n            - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n            syntax 'dt:month'. The dt-argument is different from the freq-argument\n            in that it gives month-of-year rather than month-of-data.\n            - by attributes, stored in the cc.data.attrs container,\n            e.g.: 'attrs:obs_provider' = group by observation provider or\n            'attrs:gtype' = group by geometry type (track or point)\n        metrics : list, optional\n            list of modelskill.metrics (or str), by default modelskill.options.metrics.list\n        observed: bool, optional\n            This only applies if any of the groupers are Categoricals.\n\n            - True: only show observed values for categorical groupers.\n            - False: show all values for categorical groupers.\n\n        Returns\n        -------\n        SkillTable\n            skill assessment as a SkillTable object\n\n        See also\n        --------\n        sel\n            a method for filtering/selecting data\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cc = ms.match([HKNA,EPL,c2], mr)\n        >>> cc.skill().round(2)\n                       n  bias  rmse  urmse   mae    cc    si    r2\n        observation\n        HKNA         385 -0.20  0.35   0.29  0.25  0.97  0.09  0.99\n        EPL           66 -0.08  0.22   0.20  0.18  0.97  0.07  0.99\n        c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n\n        >>> cc.sel(observation='c2', start='2017-10-28').skill().round(2)\n                       n  bias  rmse  urmse   mae    cc    si    r2\n        observation\n        c2            41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n\n        >>> cc.skill(by='freq:D').round(2)\n                      n  bias  rmse  urmse   mae    cc    si    r2\n        2017-10-27  239 -0.15  0.25   0.21  0.20  0.72  0.10  0.98\n        2017-10-28  162 -0.07  0.19   0.18  0.16  0.96  0.06  1.00\n        2017-10-29  163 -0.21  0.52   0.47  0.42  0.79  0.11  0.99\n        \"\"\"\n\n        # TODO remove in v1.1 ----------\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        cc = self.sel(\n            model=model,\n            observation=observation,\n            quantity=variable,\n            start=start,\n            end=end,\n            area=area,\n        )\n        if cc.n_points == 0:\n            raise ValueError(\"Dataset is empty, no data to compare.\")\n\n        ## ---- end of deprecated code ----\n\n        pmetrics = _parse_metric(metrics)\n\n        agg_cols = _parse_groupby(by, n_mod=cc.n_models, n_qnt=cc.n_quantities)\n        agg_cols, attrs_keys = self._attrs_keys_in_by(agg_cols)\n\n        df = cc._to_long_dataframe(attrs_keys=attrs_keys, observed=observed)\n\n        res = _groupby_df(df, by=agg_cols, metrics=pmetrics)\n        mtr_cols = [m.__name__ for m in pmetrics]  # type: ignore\n        res = res.dropna(subset=mtr_cols, how=\"all\")  # TODO: ok to remove empty?\n        res = self._append_xy_to_res(res, cc)\n        res = cc._add_as_col_if_not_in_index(df, skilldf=res)  # type: ignore\n        return SkillTable(res)\n\n    def _to_long_dataframe(\n        self, attrs_keys: Iterable[str] | None = None, observed: bool = False\n    ) -> pd.DataFrame:\n        \"\"\"Return a copy of the data as a long-format pandas DataFrame (for groupby operations)\"\"\"\n        frames = []\n        for cmp in self:\n            frame = cmp._to_long_dataframe(attrs_keys=attrs_keys)\n            if self.n_quantities > 1:\n                frame[\"quantity\"] = cmp.quantity.name\n            frames.append(frame)\n        res = pd.concat(frames)\n\n        cat_cols = res.select_dtypes(include=[\"object\"]).columns\n        res[cat_cols] = res[cat_cols].astype(\"category\")\n\n        if observed:\n            res = res.loc[~(res == False).any(axis=1)]  # noqa\n        return res\n\n    @staticmethod\n    def _attrs_keys_in_by(by: List[str | pd.Grouper]) -> Tuple[List[str], List[str]]:\n        attrs_keys: List[str] = []\n        agg_cols: List[str] = []\n        for b in by:\n            if isinstance(b, str) and b.startswith(\"attrs:\"):\n                key = b.split(\":\")[1]\n                attrs_keys.append(key)\n                agg_cols.append(key)\n            else:\n                agg_cols.append(b)\n        return agg_cols, attrs_keys\n\n    @staticmethod\n    def _append_xy_to_res(res: pd.DataFrame, cc: ComparerCollection) -> pd.DataFrame:\n        \"\"\"skill() helper: Append x and y to res if possible\"\"\"\n        res[\"x\"] = np.nan\n        res[\"y\"] = np.nan\n\n        # for MultiIndex in res find \"observation\" level and\n        # insert x, y if gtype=point for that observation\n        if \"observation\" in res.index.names:\n            idx_names = res.index.names\n            res = res.reset_index()\n            for cmp in cc:\n                if cmp.gtype == \"point\":\n                    res.loc[res.observation == cmp.name, \"x\"] = cmp.x\n                    res.loc[res.observation == cmp.name, \"y\"] = cmp.y\n            res = res.set_index(idx_names)\n        return res\n\n    def _add_as_col_if_not_in_index(\n        self,\n        df: pd.DataFrame,\n        skilldf: pd.DataFrame,\n        fields: List[str] = [\"model\", \"observation\", \"quantity\"],\n    ) -> pd.DataFrame:\n        \"\"\"skill() helper: Add a field to skilldf if unique in df\"\"\"\n        for field in reversed(fields):\n            if (field == \"model\") and (self.n_models <= 1):\n                continue\n            if (field == \"quantity\") and (self.n_quantities <= 1):\n                continue\n            if field not in skilldf.index.names:\n                unames = df[field].unique()\n                if len(unames) == 1:\n                    skilldf.insert(loc=0, column=field, value=unames[0])\n        return skilldf\n\n    def gridded_skill(\n        self,\n        bins: int = 5,\n        binsize: float | None = None,\n        by: str | Iterable[str] | None = None,\n        metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n        n_min: Optional[int] = None,\n        **kwargs: Any,\n    ) -> SkillGrid:\n        \"\"\"Skill assessment of model(s) on a regular spatial grid.\n\n        Parameters\n        ----------\n        bins: int, list of scalars, or IntervalIndex, or tuple of, optional\n            criteria to bin x and y by, argument bins to pd.cut(), default 5\n            define different bins for x and y a tuple\n            e.g.: bins = 5, bins = (5,[2,3,5])\n        binsize : float, optional\n            bin size for x and y dimension, overwrites bins\n            creates bins with reference to round(mean(x)), round(mean(y))\n        by : str, List[str], optional\n            group by, by default [\"model\", \"observation\"]\n\n            - by column name\n            - by temporal bin of the DateTimeIndex via the freq-argument\n            (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n            - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n            syntax 'dt:month'. The dt-argument is different from the freq-argument\n            in that it gives month-of-year rather than month-of-data.\n        metrics : list, optional\n            list of modelskill.metrics, by default modelskill.options.metrics.list\n        n_min : int, optional\n            minimum number of observations in a grid cell;\n            cells with fewer observations get a score of `np.nan`\n\n        Returns\n        -------\n        SkillGrid\n            skill assessment as a SkillGrid object\n\n        See also\n        --------\n        skill\n            a method for aggregated skill assessment\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cc = ms.match([HKNA,EPL,c2], mr)  # with satellite track measurements\n        >>> gs = cc.gridded_skill(metrics='bias')\n        >>> gs.data\n        <xarray.Dataset>\n        Dimensions:      (x: 5, y: 5)\n        Coordinates:\n            observation   'alti'\n        * x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n        * y            (y) float64 50.6 51.66 52.7 53.75 54.8\n        Data variables:\n            n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n            bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n\n        >>> gs = cc.gridded_skill(binsize=0.5)\n        >>> gs.data.coords\n        Coordinates:\n            observation   'alti'\n        * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n        * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n        \"\"\"\n\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        cmp = self.sel(\n            model=model,\n            observation=observation,\n            quantity=variable,\n            start=start,\n            end=end,\n            area=area,\n        )\n\n        if cmp.n_points == 0:\n            raise ValueError(\"Dataset is empty, no data to compare.\")\n\n        ## ---- end of deprecated code ----\n\n        metrics = _parse_metric(metrics)\n\n        df = cmp._to_long_dataframe()\n        df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)\n\n        agg_cols = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=cmp.n_quantities)\n        if \"x\" not in agg_cols:\n            agg_cols.insert(0, \"x\")\n        if \"y\" not in agg_cols:\n            agg_cols.insert(0, \"y\")\n\n        df = df.drop(columns=[\"x\", \"y\"]).rename(columns=dict(xBin=\"x\", yBin=\"y\"))\n        res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)\n        ds = res.to_xarray().squeeze()\n\n        # change categorial index to coordinates\n        for dim in (\"x\", \"y\"):\n            ds[dim] = ds[dim].astype(float)\n        return SkillGrid(ds)\n\n    def mean_skill(\n        self,\n        *,\n        weights: Optional[Union[str, List[float], Dict[str, float]]] = None,\n        metrics: Optional[list] = None,\n        **kwargs: Any,\n    ) -> SkillTable:\n        \"\"\"Weighted mean of skills\n\n        First, the skill is calculated per observation,\n        the weighted mean of the skills is then found.\n\n        Warning: This method is NOT the mean skill of\n        all observational points! (mean_skill_points)\n\n        Parameters\n        ----------\n        weights : str or List(float) or Dict(str, float), optional\n            weighting of observations, by default None\n\n            - None: use observations weight attribute (if assigned, else \"equal\")\n            - \"equal\": giving all observations equal weight,\n            - \"points\": giving all points equal weight,\n            - list of weights e.g. [0.3, 0.3, 0.4] per observation,\n            - dictionary of observations with special weigths, others will be set to 1.0\n        metrics : list, optional\n            list of modelskill.metrics, by default modelskill.options.metrics.list\n\n        Returns\n        -------\n        SkillTable\n            mean skill assessment as a SkillTable object\n\n        See also\n        --------\n        skill\n            skill assessment per observation\n        mean_skill_points\n            skill assessment pooling all observation points together\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cc = ms.match([HKNA,EPL,c2], mod=HKZN_local)\n        >>> cc.mean_skill().round(2)\n                      n  bias  rmse  urmse   mae    cc    si    r2\n        HKZN_local  564 -0.09  0.31   0.28  0.24  0.97  0.09  0.99\n        >>> sk = cc.mean_skill(weights=\"equal\")\n        >>> sk = cc.mean_skill(weights=\"points\")\n        >>> sk = cc.mean_skill(weights={\"EPL\": 2.0}) # more weight on EPL, others=1.0\n        \"\"\"\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        # filter data\n        cc = self.sel(\n            model=model,  # deprecated\n            observation=observation,  # deprecated\n            quantity=variable,  # deprecated\n            start=start,  # deprecated\n            end=end,  # deprecated\n            area=area,  # deprecated\n        )\n        if cc.n_points == 0:\n            raise ValueError(\"Dataset is empty, no data to compare.\")\n\n        ## ---- end of deprecated code ----\n\n        df = cc._to_long_dataframe()  # TODO: remove\n        mod_names = cc.mod_names\n        # obs_names = cmp.obs_names  # df.observation.unique()\n        qnt_names = cc.quantity_names\n\n        # skill assessment\n        pmetrics = _parse_metric(metrics)\n        sk = cc.skill(metrics=pmetrics)\n        if sk is None:\n            return None\n        skilldf = sk.to_dataframe()\n\n        # weights\n        weights = cc._parse_weights(weights, sk.obs_names)\n        skilldf[\"weights\"] = (\n            skilldf.n if weights is None else np.tile(weights, len(mod_names))  # type: ignore\n        )\n\n        def weighted_mean(x: Any) -> Any:\n            return np.average(x, weights=skilldf.loc[x.index, \"weights\"])\n\n        # group by\n        by = cc._mean_skill_by(skilldf, mod_names, qnt_names)  # type: ignore\n        agg = {\"n\": \"sum\"}\n        for metric in pmetrics:  # type: ignore\n            agg[metric.__name__] = weighted_mean  # type: ignore\n        res = skilldf.groupby(by, observed=False).agg(agg)\n\n        # TODO is this correct?\n        res.index.name = \"model\"\n\n        # output\n        res = cc._add_as_col_if_not_in_index(df, res, fields=[\"model\", \"quantity\"])  # type: ignore\n        return SkillTable(res.astype({\"n\": int}))\n\n    # def mean_skill_points(\n    #     self,\n    #     *,\n    #     metrics: Optional[list] = None,\n    #     **kwargs,\n    # ) -> Optional[SkillTable]:  # TODO raise error if no data?\n    #     \"\"\"Mean skill of all observational points\n\n    #     All data points are pooled (disregarding which observation they belong to),\n    #     the skill is then found (for each model).\n\n    #     .. note::\n    #         No weighting can be applied with this method,\n    #         use mean_skill() if you need to apply weighting\n\n    #     .. warning::\n    #         This method is NOT the mean of skills (mean_skill)\n\n    #     Parameters\n    #     ----------\n    #     metrics : list, optional\n    #         list of modelskill.metrics, by default modelskill.options.metrics.list\n\n    #     Returns\n    #     -------\n    #     SkillTable\n    #         mean skill assessment as a skill object\n\n    #     See also\n    #     --------\n    #     skill\n    #         skill assessment per observation\n    #     mean_skill\n    #         weighted mean of skills (not the same as this method)\n\n    #     Examples\n    #     --------\n    #     >>> import modelskill as ms\n    #     >>> cc = ms.match(obs, mod)\n    #     >>> cc.mean_skill_points()\n    #     \"\"\"\n\n    #     # TODO remove in v1.1\n    #     model, start, end, area = _get_deprecated_args(kwargs)\n    #     observation, variable = _get_deprecated_obs_var_args(kwargs)\n    #     assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    #     # filter data\n    #     cmp = self.sel(\n    #         model=model,\n    #         observation=observation,\n    #         variable=variable,\n    #         start=start,\n    #         end=end,\n    #         area=area,\n    #     )\n    #     if cmp.n_points == 0:\n    #         warnings.warn(\"No data!\")\n    #         return None\n\n    #     dfall = cmp.to_dataframe()\n    #     dfall[\"observation\"] = \"all\"\n\n    #     # TODO: no longer possible to do this way\n    #     # return self.skill(df=dfall, metrics=metrics)\n    #     return cmp.skill(metrics=metrics)  # NOT CORRECT - SEE ABOVE\n\n    def _mean_skill_by(self, skilldf, mod_names, qnt_names):  # type: ignore\n        by = []\n        if len(mod_names) > 1:\n            by.append(\"model\")\n        if len(qnt_names) > 1:\n            by.append(\"quantity\")\n        if len(by) == 0:\n            if (self.n_quantities > 1) and (\"quantity\" in skilldf):\n                by.append(\"quantity\")\n            elif \"model\" in skilldf:\n                by.append(\"model\")\n            else:\n                by = [mod_names[0]] * len(skilldf)\n        return by\n\n    def _parse_weights(self, weights: Any, observations: Any) -> Any:\n        if observations is None:\n            observations = self.obs_names\n        else:\n            observations = [observations] if np.isscalar(observations) else observations\n            observations = [_get_name(o, self.obs_names) for o in observations]\n        n_obs = len(observations)\n\n        if weights is None:\n            # get weights from observation objects\n            # default is equal weight to all\n            weights = [self._comparers[o].weight for o in observations]\n        else:\n            if isinstance(weights, int):\n                weights = np.ones(n_obs)  # equal weight to all\n            elif isinstance(weights, dict):\n                w_dict = weights\n                weights = [w_dict.get(name, 1.0) for name in observations]\n\n            elif isinstance(weights, str):\n                if weights.lower() == \"equal\":\n                    weights = np.ones(n_obs)  # equal weight to all\n                elif \"point\" in weights.lower():\n                    weights = None  # no weight => use n_points\n                else:\n                    raise ValueError(\n                        \"unknown weights argument (None, 'equal', 'points', or list of floats)\"\n                    )\n            elif not np.isscalar(weights):\n                if n_obs == 1:\n                    if len(weights) > 1:\n                        warnings.warn(\n                            \"Cannot apply multiple weights to one observation\"\n                        )\n                    weights = [1.0]\n                if not len(weights) == n_obs:\n                    raise ValueError(\n                        f\"weights must have same length as observations: {observations}\"\n                    )\n        if weights is not None:\n            assert len(weights) == n_obs\n        return weights\n\n    def score(\n        self,\n        metric: str | Callable = mtr.rmse,\n        **kwargs: Any,\n    ) -> Dict[str, float]:\n        \"\"\"Weighted mean score of model(s) over all observations\n\n        Wrapping mean_skill() with a single metric.\n\n        NOTE: will take simple mean over different quantities!\n\n        Parameters\n        ----------\n        weights : str or List(float) or Dict(str, float), optional\n            weighting of observations, by default None\n\n            - None: use observations weight attribute (if assigned, else \"equal\")\n            - \"equal\": giving all observations equal weight,\n            - \"points\": giving all points equal weight,\n            - list of weights e.g. [0.3, 0.3, 0.4] per observation,\n            - dictionary of observations with special weigths, others will be set to 1.0\n        metric : list, optional\n            a single metric from modelskill.metrics, by default rmse\n\n        Returns\n        -------\n        Dict[str, float]\n            mean of skills score as a single number (for each model)\n\n        See also\n        --------\n        skill\n            skill assessment per observation\n        mean_skill\n            weighted mean of skills assessment\n        mean_skill_points\n            skill assessment pooling all observation points together\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cc = ms.match([o1, o2], mod)\n        >>> cc.score()\n        {'mod': 0.30681206}\n        >>> cc.score(weights=[0.1,0.1,0.8])\n        {'mod': 0.3383011631797379}\n\n        >>> cc.score(weights='points', metric=\"mape\")\n        {'mod': 8.414442957854142}\n        \"\"\"\n\n        weights = kwargs.pop(\"weights\", None)\n\n        metric = _parse_metric(metric)[0]\n\n        if weights is None:\n            weights = {c.name: c.weight for c in self._comparers.values()}\n\n        if not (callable(metric) or isinstance(metric, str)):\n            raise ValueError(\"metric must be a string or a function\")\n\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        if model is None:\n            models = self.mod_names\n        else:\n            # TODO: these two lines looks familiar, extract to function\n            models = [model] if np.isscalar(model) else model  # type: ignore\n            models = [_get_name(m, self.mod_names) for m in models]  # type: ignore\n\n        cmp = self.sel(\n            model=models,  # deprecated\n            observation=observation,  # deprecated\n            quantity=variable,  # deprecated\n            start=start,  # deprecated\n            end=end,  # deprecated\n            area=area,  # deprecated\n        )\n\n        if cmp.n_points == 0:\n            raise ValueError(\"Dataset is empty, no data to compare.\")\n\n        ## ---- end of deprecated code ----\n\n        sk = cmp.mean_skill(weights=weights, metrics=[metric])\n        df = sk.to_dataframe()\n\n        metric_name = metric if isinstance(metric, str) else metric.__name__\n        ser = df[metric_name]\n        score = {str(col): float(value) for col, value in ser.items()}\n\n        return score\n\n    def save(self, filename: Union[str, Path]) -> None:\n        \"\"\"Save the ComparerCollection to a zip file.\n\n        Each comparer is stored as a netcdf file in the zip file.\n\n        Parameters\n        ----------\n        filename : str or Path\n            Filename of the zip file.\n\n        Examples\n        --------\n        >>> cc = ms.match(obs, mod)\n        >>> cc.save(\"my_comparer_collection.msk\")\n        \"\"\"\n\n        files = []\n        no = 0\n        for name, cmp in self._comparers.items():\n            cmp_fn = f\"{no}_{name}.nc\"\n            cmp.save(cmp_fn)\n            files.append(cmp_fn)\n            no += 1\n\n        with zipfile.ZipFile(filename, \"w\") as zip:\n            for f in files:\n                zip.write(f)\n                os.remove(f)\n\n    @staticmethod\n    def load(filename: Union[str, Path]) -> \"ComparerCollection\":\n        \"\"\"Load a ComparerCollection from a zip file.\n\n        Parameters\n        ----------\n        filename : str or Path\n            Filename of the zip file.\n\n        Returns\n        -------\n        ComparerCollection\n            The loaded ComparerCollection.\n\n        Examples\n        --------\n        >>> cc = ms.match(obs, mod)\n        >>> cc.save(\"my_comparer_collection.msk\")\n        >>> cc2 = ms.ComparerCollection.load(\"my_comparer_collection.msk\")\n        \"\"\"\n\n        folder = tempfile.TemporaryDirectory().name\n\n        with zipfile.ZipFile(filename, \"r\") as zip:\n            for f in zip.namelist():\n                if f.endswith(\".nc\"):\n                    zip.extract(f, path=folder)\n\n        comparers = [\n            ComparerCollection._load_comparer(folder, f)\n            for f in sorted(os.listdir(folder))\n        ]\n        return ComparerCollection(comparers)\n\n    @staticmethod\n    def _load_comparer(folder: str, f: str) -> Comparer:\n        f = os.path.join(folder, f)\n        cmp = Comparer.load(f)\n        os.remove(f)\n        return cmp\n\n    # =============== Deprecated methods ===============\n\n    def spatial_skill(\n        self,\n        bins=5,\n        binsize=None,\n        by=None,\n        metrics=None,\n        n_min=None,\n        **kwargs,\n    ):\n        warnings.warn(\n            \"spatial_skill is deprecated, use gridded_skill instead\", FutureWarning\n        )\n        return self.gridded_skill(\n            bins=bins,\n            binsize=binsize,\n            by=by,\n            metrics=metrics,\n            n_min=n_min,\n            **kwargs,\n        )\n\n    def scatter(\n        self,\n        *,\n        bins=120,\n        quantiles=None,\n        fit_to_quantiles=False,\n        show_points=None,\n        show_hist=None,\n        show_density=None,\n        backend=\"matplotlib\",\n        figsize=(8, 8),\n        xlim=None,\n        ylim=None,\n        reg_method=\"ols\",\n        title=None,\n        xlabel=None,\n        ylabel=None,\n        skill_table=None,\n        **kwargs,\n    ):\n        warnings.warn(\"scatter is deprecated, use plot.scatter instead\", FutureWarning)\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)\n        observation, variable = _get_deprecated_obs_var_args(kwargs)\n\n        # select model\n        mod_idx = _get_idx(model, self.mod_names)\n        mod_name = self.mod_names[mod_idx]\n\n        # select variable\n        qnt_idx = _get_idx(variable, self.quantity_names)\n        qnt_name = self.quantity_names[qnt_idx]\n\n        # filter data\n        cmp = self.sel(\n            model=mod_name,\n            observation=observation,\n            quantity=qnt_name,\n            start=start,\n            end=end,\n            area=area,\n        )\n\n        return cmp.plot.scatter(\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            skill_table=skill_table,\n            **kwargs,\n        )\n\n    def taylor(\n        self,\n        normalize_std=False,\n        aggregate_observations=True,\n        figsize=(7, 7),\n        marker=\"o\",\n        marker_size=6.0,\n        title=\"Taylor diagram\",\n        **kwargs,\n    ):\n        warnings.warn(\"taylor is deprecated, use plot.taylor instead\", FutureWarning)\n\n        model, start, end, area = _get_deprecated_args(kwargs)\n        observation, variable = _get_deprecated_obs_var_args(kwargs)\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        cmp = self.sel(\n            model=model,\n            observation=observation,\n            quantity=variable,\n            start=start,\n            end=end,\n            area=area,\n        )\n\n        if cmp.n_points == 0:\n            warnings.warn(\"No data!\")\n            return\n\n        if (not aggregate_observations) and (not normalize_std):\n            raise ValueError(\n                \"aggregate_observations=False is only possible if normalize_std=True!\"\n            )\n\n        metrics = [mtr._std_obs, mtr._std_mod, mtr.cc]\n        skill_func = cmp.mean_skill if aggregate_observations else cmp.skill\n        sk = skill_func(metrics=metrics)\n\n        df = sk.to_dataframe()\n        ref_std = 1.0 if normalize_std else df.iloc[0][\"_std_obs\"]\n\n        if isinstance(df.index, pd.MultiIndex):\n            df.index = df.index.map(\"_\".join)\n\n        df = df[[\"_std_obs\", \"_std_mod\", \"cc\"]].copy()\n        df.columns = [\"obs_std\", \"std\", \"cc\"]\n        pts = [\n            TaylorPoint(\n                r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size\n            )\n            for r in df.itertuples()\n        ]\n\n        taylor_diagram(\n            obs_std=ref_std,\n            points=pts,\n            figsize=figsize,\n            normalize_std=normalize_std,\n            title=title,\n        )\n\n    def kde(self, ax=None, **kwargs):\n        warnings.warn(\"kde is deprecated, use plot.kde instead\", FutureWarning)\n\n        return self.plot.kde(ax=ax, **kwargs)\n\n    def hist(\n        self,\n        model=None,\n        bins=100,\n        title=None,\n        density=True,\n        alpha=0.5,\n        **kwargs,\n    ):\n        warnings.warn(\"hist is deprecated, use plot.hist instead\", FutureWarning)\n\n        return self.plot.hist(\n            model=model, bins=bins, title=title, density=density, alpha=alpha, **kwargs\n        )\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.aux_names","title":"aux_names property","text":"
aux_names\n

List of unique auxiliary names

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.end_time","title":"end_time property","text":"
end_time\n

end timestamp of compared data

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.mod_names","title":"mod_names property","text":"
mod_names\n

List of unique model names

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.n_models","title":"n_models property","text":"
n_models\n

Number of unique models

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.n_observations","title":"n_observations property","text":"
n_observations\n

Number of observations (same as len(cc))

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.n_points","title":"n_points property","text":"
n_points\n

number of compared points

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.n_quantities","title":"n_quantities property","text":"
n_quantities\n

Number of unique quantities

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.obs_names","title":"obs_names property","text":"
obs_names\n

List of observation names

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerCollectionPlotter

Examples:

>>> cc.plot.scatter()\n>>> cc.plot.kde()\n>>> cc.plot.taylor()\n>>> cc.plot.hist()\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.quantity_names","title":"quantity_names property","text":"
quantity_names\n

List of unique quantity names

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.start_time","title":"start_time property","text":"
start_time\n

start timestamp of compared data

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.filter_by_attrs","title":"filter_by_attrs","text":"
filter_by_attrs(**kwargs)\n

Filter by comparer attrs similar to xarray.Dataset.filter_by_attrs

Parameters:

Name Type Description Default **kwargs Any

Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs e.g. sel(gtype='track') or sel(obs_provider='CMEMS') if at least one comparer has an entry obs_provider with value CMEMS in its attrs container. Multiple kwargs are combined with logical AND.

{}

Returns:

Type Description ComparerCollection

New ComparerCollection with selected data.

Examples:

>>> cc = ms.match([HKNA, EPL, alti], mr)\n>>> cc.filter_by_attrs(gtype='track')\n<ComparerCollection>\nComparer: alti\n
Source code in modelskill/comparison/_collection.py
def filter_by_attrs(self, **kwargs: Any) -> \"ComparerCollection\":\n    \"\"\"Filter by comparer attrs similar to xarray.Dataset.filter_by_attrs\n\n    Parameters\n    ----------\n    **kwargs\n        Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs\n        e.g. `sel(gtype='track')` or `sel(obs_provider='CMEMS')` if at least\n        one comparer has an entry `obs_provider` with value `CMEMS` in its\n        attrs container. Multiple kwargs are combined with logical AND.\n\n    Returns\n    -------\n    ComparerCollection\n        New ComparerCollection with selected data.\n\n    Examples\n    --------\n    >>> cc = ms.match([HKNA, EPL, alti], mr)\n    >>> cc.filter_by_attrs(gtype='track')\n    <ComparerCollection>\n    Comparer: alti\n    \"\"\"\n    cmps = []\n    for cmp in self._comparers.values():\n        for k, v in kwargs.items():\n            # TODO: should we also filter on cmp.data.Observation.attrs?\n            if cmp.data.attrs.get(k) != v:\n                break\n        else:\n            cmps.append(cmp)\n    return ComparerCollection(cmps)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.gridded_skill","title":"gridded_skill","text":"
gridded_skill(bins=5, binsize=None, by=None, metrics=None, n_min=None, **kwargs)\n

Skill assessment of model(s) on a regular spatial grid.

Parameters:

Name Type Description Default bins int

criteria to bin x and y by, argument bins to pd.cut(), default 5 define different bins for x and y a tuple e.g.: bins = 5, bins = (5,[2,3,5])

5 binsize float

bin size for x and y dimension, overwrites bins creates bins with reference to round(mean(x)), round(mean(y))

None by (str, List[str])

group by, by default [\"model\", \"observation\"]

  • by column name
  • by temporal bin of the DateTimeIndex via the freq-argument (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily
  • by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the syntax 'dt:month'. The dt-argument is different from the freq-argument in that it gives month-of-year rather than month-of-data.
None metrics list

list of modelskill.metrics, by default modelskill.options.metrics.list

None n_min int

minimum number of observations in a grid cell; cells with fewer observations get a score of np.nan

None

Returns:

Type Description SkillGrid

skill assessment as a SkillGrid object

See also

skill a method for aggregated skill assessment

Examples:

>>> import modelskill as ms\n>>> cc = ms.match([HKNA,EPL,c2], mr)  # with satellite track measurements\n>>> gs = cc.gridded_skill(metrics='bias')\n>>> gs.data\n<xarray.Dataset>\nDimensions:      (x: 5, y: 5)\nCoordinates:\n    observation   'alti'\n* x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n* y            (y) float64 50.6 51.66 52.7 53.75 54.8\nData variables:\n    n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n    bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n
>>> gs = cc.gridded_skill(binsize=0.5)\n>>> gs.data.coords\nCoordinates:\n    observation   'alti'\n* x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n* y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n
Source code in modelskill/comparison/_collection.py
def gridded_skill(\n    self,\n    bins: int = 5,\n    binsize: float | None = None,\n    by: str | Iterable[str] | None = None,\n    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n    n_min: Optional[int] = None,\n    **kwargs: Any,\n) -> SkillGrid:\n    \"\"\"Skill assessment of model(s) on a regular spatial grid.\n\n    Parameters\n    ----------\n    bins: int, list of scalars, or IntervalIndex, or tuple of, optional\n        criteria to bin x and y by, argument bins to pd.cut(), default 5\n        define different bins for x and y a tuple\n        e.g.: bins = 5, bins = (5,[2,3,5])\n    binsize : float, optional\n        bin size for x and y dimension, overwrites bins\n        creates bins with reference to round(mean(x)), round(mean(y))\n    by : str, List[str], optional\n        group by, by default [\"model\", \"observation\"]\n\n        - by column name\n        - by temporal bin of the DateTimeIndex via the freq-argument\n        (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n        - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n        syntax 'dt:month'. The dt-argument is different from the freq-argument\n        in that it gives month-of-year rather than month-of-data.\n    metrics : list, optional\n        list of modelskill.metrics, by default modelskill.options.metrics.list\n    n_min : int, optional\n        minimum number of observations in a grid cell;\n        cells with fewer observations get a score of `np.nan`\n\n    Returns\n    -------\n    SkillGrid\n        skill assessment as a SkillGrid object\n\n    See also\n    --------\n    skill\n        a method for aggregated skill assessment\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.match([HKNA,EPL,c2], mr)  # with satellite track measurements\n    >>> gs = cc.gridded_skill(metrics='bias')\n    >>> gs.data\n    <xarray.Dataset>\n    Dimensions:      (x: 5, y: 5)\n    Coordinates:\n        observation   'alti'\n    * x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n    * y            (y) float64 50.6 51.66 52.7 53.75 54.8\n    Data variables:\n        n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n        bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n\n    >>> gs = cc.gridded_skill(binsize=0.5)\n    >>> gs.data.coords\n    Coordinates:\n        observation   'alti'\n    * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n    * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n    \"\"\"\n\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    cmp = self.sel(\n        model=model,\n        observation=observation,\n        quantity=variable,\n        start=start,\n        end=end,\n        area=area,\n    )\n\n    if cmp.n_points == 0:\n        raise ValueError(\"Dataset is empty, no data to compare.\")\n\n    ## ---- end of deprecated code ----\n\n    metrics = _parse_metric(metrics)\n\n    df = cmp._to_long_dataframe()\n    df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)\n\n    agg_cols = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=cmp.n_quantities)\n    if \"x\" not in agg_cols:\n        agg_cols.insert(0, \"x\")\n    if \"y\" not in agg_cols:\n        agg_cols.insert(0, \"y\")\n\n    df = df.drop(columns=[\"x\", \"y\"]).rename(columns=dict(xBin=\"x\", yBin=\"y\"))\n    res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)\n    ds = res.to_xarray().squeeze()\n\n    # change categorial index to coordinates\n    for dim in (\"x\", \"y\"):\n        ds[dim] = ds[dim].astype(float)\n    return SkillGrid(ds)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.load","title":"load staticmethod","text":"
load(filename)\n

Load a ComparerCollection from a zip file.

Parameters:

Name Type Description Default filename str or Path

Filename of the zip file.

required

Returns:

Type Description ComparerCollection

The loaded ComparerCollection.

Examples:

>>> cc = ms.match(obs, mod)\n>>> cc.save(\"my_comparer_collection.msk\")\n>>> cc2 = ms.ComparerCollection.load(\"my_comparer_collection.msk\")\n
Source code in modelskill/comparison/_collection.py
@staticmethod\ndef load(filename: Union[str, Path]) -> \"ComparerCollection\":\n    \"\"\"Load a ComparerCollection from a zip file.\n\n    Parameters\n    ----------\n    filename : str or Path\n        Filename of the zip file.\n\n    Returns\n    -------\n    ComparerCollection\n        The loaded ComparerCollection.\n\n    Examples\n    --------\n    >>> cc = ms.match(obs, mod)\n    >>> cc.save(\"my_comparer_collection.msk\")\n    >>> cc2 = ms.ComparerCollection.load(\"my_comparer_collection.msk\")\n    \"\"\"\n\n    folder = tempfile.TemporaryDirectory().name\n\n    with zipfile.ZipFile(filename, \"r\") as zip:\n        for f in zip.namelist():\n            if f.endswith(\".nc\"):\n                zip.extract(f, path=folder)\n\n    comparers = [\n        ComparerCollection._load_comparer(folder, f)\n        for f in sorted(os.listdir(folder))\n    ]\n    return ComparerCollection(comparers)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.mean_skill","title":"mean_skill","text":"
mean_skill(*, weights=None, metrics=None, **kwargs)\n

Weighted mean of skills

First, the skill is calculated per observation, the weighted mean of the skills is then found.

Warning: This method is NOT the mean skill of all observational points! (mean_skill_points)

Parameters:

Name Type Description Default weights str or List(float) or Dict(str, float)

weighting of observations, by default None

  • None: use observations weight attribute (if assigned, else \"equal\")
  • \"equal\": giving all observations equal weight,
  • \"points\": giving all points equal weight,
  • list of weights e.g. [0.3, 0.3, 0.4] per observation,
  • dictionary of observations with special weigths, others will be set to 1.0
None metrics list

list of modelskill.metrics, by default modelskill.options.metrics.list

None

Returns:

Type Description SkillTable

mean skill assessment as a SkillTable object

See also

skill skill assessment per observation mean_skill_points skill assessment pooling all observation points together

Examples:

>>> import modelskill as ms\n>>> cc = ms.match([HKNA,EPL,c2], mod=HKZN_local)\n>>> cc.mean_skill().round(2)\n              n  bias  rmse  urmse   mae    cc    si    r2\nHKZN_local  564 -0.09  0.31   0.28  0.24  0.97  0.09  0.99\n>>> sk = cc.mean_skill(weights=\"equal\")\n>>> sk = cc.mean_skill(weights=\"points\")\n>>> sk = cc.mean_skill(weights={\"EPL\": 2.0}) # more weight on EPL, others=1.0\n
Source code in modelskill/comparison/_collection.py
def mean_skill(\n    self,\n    *,\n    weights: Optional[Union[str, List[float], Dict[str, float]]] = None,\n    metrics: Optional[list] = None,\n    **kwargs: Any,\n) -> SkillTable:\n    \"\"\"Weighted mean of skills\n\n    First, the skill is calculated per observation,\n    the weighted mean of the skills is then found.\n\n    Warning: This method is NOT the mean skill of\n    all observational points! (mean_skill_points)\n\n    Parameters\n    ----------\n    weights : str or List(float) or Dict(str, float), optional\n        weighting of observations, by default None\n\n        - None: use observations weight attribute (if assigned, else \"equal\")\n        - \"equal\": giving all observations equal weight,\n        - \"points\": giving all points equal weight,\n        - list of weights e.g. [0.3, 0.3, 0.4] per observation,\n        - dictionary of observations with special weigths, others will be set to 1.0\n    metrics : list, optional\n        list of modelskill.metrics, by default modelskill.options.metrics.list\n\n    Returns\n    -------\n    SkillTable\n        mean skill assessment as a SkillTable object\n\n    See also\n    --------\n    skill\n        skill assessment per observation\n    mean_skill_points\n        skill assessment pooling all observation points together\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.match([HKNA,EPL,c2], mod=HKZN_local)\n    >>> cc.mean_skill().round(2)\n                  n  bias  rmse  urmse   mae    cc    si    r2\n    HKZN_local  564 -0.09  0.31   0.28  0.24  0.97  0.09  0.99\n    >>> sk = cc.mean_skill(weights=\"equal\")\n    >>> sk = cc.mean_skill(weights=\"points\")\n    >>> sk = cc.mean_skill(weights={\"EPL\": 2.0}) # more weight on EPL, others=1.0\n    \"\"\"\n\n    # TODO remove in v1.1\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    # filter data\n    cc = self.sel(\n        model=model,  # deprecated\n        observation=observation,  # deprecated\n        quantity=variable,  # deprecated\n        start=start,  # deprecated\n        end=end,  # deprecated\n        area=area,  # deprecated\n    )\n    if cc.n_points == 0:\n        raise ValueError(\"Dataset is empty, no data to compare.\")\n\n    ## ---- end of deprecated code ----\n\n    df = cc._to_long_dataframe()  # TODO: remove\n    mod_names = cc.mod_names\n    # obs_names = cmp.obs_names  # df.observation.unique()\n    qnt_names = cc.quantity_names\n\n    # skill assessment\n    pmetrics = _parse_metric(metrics)\n    sk = cc.skill(metrics=pmetrics)\n    if sk is None:\n        return None\n    skilldf = sk.to_dataframe()\n\n    # weights\n    weights = cc._parse_weights(weights, sk.obs_names)\n    skilldf[\"weights\"] = (\n        skilldf.n if weights is None else np.tile(weights, len(mod_names))  # type: ignore\n    )\n\n    def weighted_mean(x: Any) -> Any:\n        return np.average(x, weights=skilldf.loc[x.index, \"weights\"])\n\n    # group by\n    by = cc._mean_skill_by(skilldf, mod_names, qnt_names)  # type: ignore\n    agg = {\"n\": \"sum\"}\n    for metric in pmetrics:  # type: ignore\n        agg[metric.__name__] = weighted_mean  # type: ignore\n    res = skilldf.groupby(by, observed=False).agg(agg)\n\n    # TODO is this correct?\n    res.index.name = \"model\"\n\n    # output\n    res = cc._add_as_col_if_not_in_index(df, res, fields=[\"model\", \"quantity\"])  # type: ignore\n    return SkillTable(res.astype({\"n\": int}))\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.query","title":"query","text":"
query(query)\n

Select data based on a query.

Parameters:

Name Type Description Default query str

Query string. See pandas.DataFrame.query() for details.

required

Returns:

Type Description ComparerCollection

New ComparerCollection with selected data.

Source code in modelskill/comparison/_collection.py
def query(self, query: str) -> \"ComparerCollection\":\n    \"\"\"Select data based on a query.\n\n    Parameters\n    ----------\n    query : str\n        Query string. See pandas.DataFrame.query() for details.\n\n    Returns\n    -------\n    ComparerCollection\n        New ComparerCollection with selected data.\n    \"\"\"\n    q_cmps = [cmp.query(query) for cmp in self._comparers.values()]\n    cmps_with_data = [cmp for cmp in q_cmps if cmp.n_points > 0]\n\n    return ComparerCollection(cmps_with_data)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.rename","title":"rename","text":"
rename(mapping)\n

Rename observation, model or auxiliary data variables

Parameters:

Name Type Description Default mapping dict

mapping of old names to new names

required

Returns:

Type Description ComparerCollection

Examples:

>>> cc = ms.match([o1, o2], [mr1, mr2])\n>>> cc.mod_names\n['mr1', 'mr2']\n>>> cc2 = cc.rename({'mr1': 'model1'})\n>>> cc2.mod_names\n['model1', 'mr2']\n
Source code in modelskill/comparison/_collection.py
def rename(self, mapping: Dict[str, str]) -> \"ComparerCollection\":\n    \"\"\"Rename observation, model or auxiliary data variables\n\n    Parameters\n    ----------\n    mapping : dict\n        mapping of old names to new names\n\n    Returns\n    -------\n    ComparerCollection\n\n    Examples\n    --------\n    >>> cc = ms.match([o1, o2], [mr1, mr2])\n    >>> cc.mod_names\n    ['mr1', 'mr2']\n    >>> cc2 = cc.rename({'mr1': 'model1'})\n    >>> cc2.mod_names\n    ['model1', 'mr2']\n    \"\"\"\n    for k in mapping.keys():\n        allowed_keys = self.obs_names + self.mod_names + self.aux_names\n        if k not in allowed_keys:\n            raise KeyError(f\"Unknown key: {k}; must be one of {allowed_keys}\")\n\n    cmps = []\n    for cmp in self._comparers.values():\n        cmps.append(cmp.rename(mapping, errors=\"ignore\"))\n    return ComparerCollection(cmps)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.save","title":"save","text":"
save(filename)\n

Save the ComparerCollection to a zip file.

Each comparer is stored as a netcdf file in the zip file.

Parameters:

Name Type Description Default filename str or Path

Filename of the zip file.

required

Examples:

>>> cc = ms.match(obs, mod)\n>>> cc.save(\"my_comparer_collection.msk\")\n
Source code in modelskill/comparison/_collection.py
def save(self, filename: Union[str, Path]) -> None:\n    \"\"\"Save the ComparerCollection to a zip file.\n\n    Each comparer is stored as a netcdf file in the zip file.\n\n    Parameters\n    ----------\n    filename : str or Path\n        Filename of the zip file.\n\n    Examples\n    --------\n    >>> cc = ms.match(obs, mod)\n    >>> cc.save(\"my_comparer_collection.msk\")\n    \"\"\"\n\n    files = []\n    no = 0\n    for name, cmp in self._comparers.items():\n        cmp_fn = f\"{no}_{name}.nc\"\n        cmp.save(cmp_fn)\n        files.append(cmp_fn)\n        no += 1\n\n    with zipfile.ZipFile(filename, \"w\") as zip:\n        for f in files:\n            zip.write(f)\n            os.remove(f)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.score","title":"score","text":"
score(metric=mtr.rmse, **kwargs)\n

Weighted mean score of model(s) over all observations

Wrapping mean_skill() with a single metric.

NOTE: will take simple mean over different quantities!

Parameters:

Name Type Description Default weights str or List(float) or Dict(str, float)

weighting of observations, by default None

  • None: use observations weight attribute (if assigned, else \"equal\")
  • \"equal\": giving all observations equal weight,
  • \"points\": giving all points equal weight,
  • list of weights e.g. [0.3, 0.3, 0.4] per observation,
  • dictionary of observations with special weigths, others will be set to 1.0
required metric list

a single metric from modelskill.metrics, by default rmse

rmse

Returns:

Type Description Dict[str, float]

mean of skills score as a single number (for each model)

See also

skill skill assessment per observation mean_skill weighted mean of skills assessment mean_skill_points skill assessment pooling all observation points together

Examples:

>>> import modelskill as ms\n>>> cc = ms.match([o1, o2], mod)\n>>> cc.score()\n{'mod': 0.30681206}\n>>> cc.score(weights=[0.1,0.1,0.8])\n{'mod': 0.3383011631797379}\n
>>> cc.score(weights='points', metric=\"mape\")\n{'mod': 8.414442957854142}\n
Source code in modelskill/comparison/_collection.py
def score(\n    self,\n    metric: str | Callable = mtr.rmse,\n    **kwargs: Any,\n) -> Dict[str, float]:\n    \"\"\"Weighted mean score of model(s) over all observations\n\n    Wrapping mean_skill() with a single metric.\n\n    NOTE: will take simple mean over different quantities!\n\n    Parameters\n    ----------\n    weights : str or List(float) or Dict(str, float), optional\n        weighting of observations, by default None\n\n        - None: use observations weight attribute (if assigned, else \"equal\")\n        - \"equal\": giving all observations equal weight,\n        - \"points\": giving all points equal weight,\n        - list of weights e.g. [0.3, 0.3, 0.4] per observation,\n        - dictionary of observations with special weigths, others will be set to 1.0\n    metric : list, optional\n        a single metric from modelskill.metrics, by default rmse\n\n    Returns\n    -------\n    Dict[str, float]\n        mean of skills score as a single number (for each model)\n\n    See also\n    --------\n    skill\n        skill assessment per observation\n    mean_skill\n        weighted mean of skills assessment\n    mean_skill_points\n        skill assessment pooling all observation points together\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.match([o1, o2], mod)\n    >>> cc.score()\n    {'mod': 0.30681206}\n    >>> cc.score(weights=[0.1,0.1,0.8])\n    {'mod': 0.3383011631797379}\n\n    >>> cc.score(weights='points', metric=\"mape\")\n    {'mod': 8.414442957854142}\n    \"\"\"\n\n    weights = kwargs.pop(\"weights\", None)\n\n    metric = _parse_metric(metric)[0]\n\n    if weights is None:\n        weights = {c.name: c.weight for c in self._comparers.values()}\n\n    if not (callable(metric) or isinstance(metric, str)):\n        raise ValueError(\"metric must be a string or a function\")\n\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    if model is None:\n        models = self.mod_names\n    else:\n        # TODO: these two lines looks familiar, extract to function\n        models = [model] if np.isscalar(model) else model  # type: ignore\n        models = [_get_name(m, self.mod_names) for m in models]  # type: ignore\n\n    cmp = self.sel(\n        model=models,  # deprecated\n        observation=observation,  # deprecated\n        quantity=variable,  # deprecated\n        start=start,  # deprecated\n        end=end,  # deprecated\n        area=area,  # deprecated\n    )\n\n    if cmp.n_points == 0:\n        raise ValueError(\"Dataset is empty, no data to compare.\")\n\n    ## ---- end of deprecated code ----\n\n    sk = cmp.mean_skill(weights=weights, metrics=[metric])\n    df = sk.to_dataframe()\n\n    metric_name = metric if isinstance(metric, str) else metric.__name__\n    ser = df[metric_name]\n    score = {str(col): float(value) for col, value in ser.items()}\n\n    return score\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.sel","title":"sel","text":"
sel(model=None, observation=None, quantity=None, start=None, end=None, time=None, area=None, variable=None, **kwargs)\n

Select data based on model, time and/or area.

Parameters:

Name Type Description Default model str or int or list of str or list of int

Model name or index. If None, all models are selected.

None observation str or int or list of str or list of int

Observation name or index. If None, all observations are selected.

None quantity str or int or list of str or list of int

Quantity name or index. If None, all quantities are selected.

None start str or datetime

Start time. If None, all times are selected.

None end str or datetime

End time. If None, all times are selected.

None time str or datetime

Time. If None, all times are selected.

None area list of float

bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.

None **kwargs Any

Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs e.g. sel(gtype='track') or sel(obs_provider='CMEMS') if at least one comparer has an entry obs_provider with value CMEMS in its attrs container. Multiple kwargs are combined with logical AND.

{}

Returns:

Type Description ComparerCollection

New ComparerCollection with selected data.

Source code in modelskill/comparison/_collection.py
def sel(\n    self,\n    model: Optional[IdxOrNameTypes] = None,\n    observation: Optional[IdxOrNameTypes] = None,\n    quantity: Optional[IdxOrNameTypes] = None,\n    start: Optional[TimeTypes] = None,\n    end: Optional[TimeTypes] = None,\n    time: Optional[TimeTypes] = None,\n    area: Optional[List[float]] = None,\n    variable: Optional[IdxOrNameTypes] = None,  # obsolete\n    **kwargs: Any,\n) -> \"ComparerCollection\":\n    \"\"\"Select data based on model, time and/or area.\n\n    Parameters\n    ----------\n    model : str or int or list of str or list of int, optional\n        Model name or index. If None, all models are selected.\n    observation : str or int or list of str or list of int, optional\n        Observation name or index. If None, all observations are selected.\n    quantity : str or int or list of str or list of int, optional\n        Quantity name or index. If None, all quantities are selected.\n    start : str or datetime, optional\n        Start time. If None, all times are selected.\n    end : str or datetime, optional\n        End time. If None, all times are selected.\n    time : str or datetime, optional\n        Time. If None, all times are selected.\n    area : list of float, optional\n        bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.\n    **kwargs\n        Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs\n        e.g. `sel(gtype='track')` or `sel(obs_provider='CMEMS')` if at least\n        one comparer has an entry `obs_provider` with value `CMEMS` in its\n        attrs container. Multiple kwargs are combined with logical AND.\n\n    Returns\n    -------\n    ComparerCollection\n        New ComparerCollection with selected data.\n    \"\"\"\n    if variable is not None:\n        warnings.warn(\n            \"variable is deprecated, use quantity instead\",\n            FutureWarning,\n        )\n        quantity = variable\n    # TODO is this really necessary to do both in ComparerCollection and Comparer?\n    if model is not None:\n        if isinstance(model, (str, int)):\n            models = [model]\n        else:\n            models = list(model)\n        mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]\n    if observation is None:\n        observation = self.obs_names\n    else:\n        observation = [observation] if np.isscalar(observation) else observation  # type: ignore\n        observation = [_get_name(o, self.obs_names) for o in observation]  # type: ignore\n\n    if (quantity is not None) and (self.n_quantities > 1):\n        quantity = [quantity] if np.isscalar(quantity) else quantity  # type: ignore\n        quantity = [_get_name(v, self.quantity_names) for v in quantity]  # type: ignore\n    else:\n        quantity = self.quantity_names\n\n    cmps = []\n    for cmp in self._comparers.values():\n        if cmp.name in observation and cmp.quantity.name in quantity:\n            thismodel = (\n                [m for m in mod_names if m in cmp.mod_names] if model else None\n            )\n            if (thismodel is not None) and (len(thismodel) == 0):\n                continue\n            cmpsel = cmp.sel(\n                model=thismodel,\n                start=start,\n                end=end,\n                time=time,\n                area=area,\n            )\n            if cmpsel is not None:\n                # TODO: check if cmpsel is empty\n                if cmpsel.n_points > 0:\n                    cmps.append(cmpsel)\n    cc = ComparerCollection(cmps)\n\n    if kwargs:\n        cc = cc.filter_by_attrs(**kwargs)\n\n    return cc\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.skill","title":"skill","text":"
skill(by=None, metrics=None, observed=False, **kwargs)\n

Aggregated skill assessment of model(s)

Parameters:

Name Type Description Default by str or List[str]

group by, by default [\"model\", \"observation\"]

  • by column name
  • by temporal bin of the DateTimeIndex via the freq-argument (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily
  • by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the syntax 'dt:month'. The dt-argument is different from the freq-argument in that it gives month-of-year rather than month-of-data.
  • by attributes, stored in the cc.data.attrs container, e.g.: 'attrs:obs_provider' = group by observation provider or 'attrs:gtype' = group by geometry type (track or point)
None metrics list

list of modelskill.metrics (or str), by default modelskill.options.metrics.list

None observed bool

This only applies if any of the groupers are Categoricals.

  • True: only show observed values for categorical groupers.
  • False: show all values for categorical groupers.
False

Returns:

Type Description SkillTable

skill assessment as a SkillTable object

See also

sel a method for filtering/selecting data

Examples:

>>> import modelskill as ms\n>>> cc = ms.match([HKNA,EPL,c2], mr)\n>>> cc.skill().round(2)\n               n  bias  rmse  urmse   mae    cc    si    r2\nobservation\nHKNA         385 -0.20  0.35   0.29  0.25  0.97  0.09  0.99\nEPL           66 -0.08  0.22   0.20  0.18  0.97  0.07  0.99\nc2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n
>>> cc.sel(observation='c2', start='2017-10-28').skill().round(2)\n               n  bias  rmse  urmse   mae    cc    si    r2\nobservation\nc2            41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n
>>> cc.skill(by='freq:D').round(2)\n              n  bias  rmse  urmse   mae    cc    si    r2\n2017-10-27  239 -0.15  0.25   0.21  0.20  0.72  0.10  0.98\n2017-10-28  162 -0.07  0.19   0.18  0.16  0.96  0.06  1.00\n2017-10-29  163 -0.21  0.52   0.47  0.42  0.79  0.11  0.99\n
Source code in modelskill/comparison/_collection.py
def skill(\n    self,\n    by: str | Iterable[str] | None = None,\n    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n    observed: bool = False,\n    **kwargs: Any,\n) -> SkillTable:\n    \"\"\"Aggregated skill assessment of model(s)\n\n    Parameters\n    ----------\n    by : str or List[str], optional\n        group by, by default [\"model\", \"observation\"]\n\n        - by column name\n        - by temporal bin of the DateTimeIndex via the freq-argument\n        (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n        - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n        syntax 'dt:month'. The dt-argument is different from the freq-argument\n        in that it gives month-of-year rather than month-of-data.\n        - by attributes, stored in the cc.data.attrs container,\n        e.g.: 'attrs:obs_provider' = group by observation provider or\n        'attrs:gtype' = group by geometry type (track or point)\n    metrics : list, optional\n        list of modelskill.metrics (or str), by default modelskill.options.metrics.list\n    observed: bool, optional\n        This only applies if any of the groupers are Categoricals.\n\n        - True: only show observed values for categorical groupers.\n        - False: show all values for categorical groupers.\n\n    Returns\n    -------\n    SkillTable\n        skill assessment as a SkillTable object\n\n    See also\n    --------\n    sel\n        a method for filtering/selecting data\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.match([HKNA,EPL,c2], mr)\n    >>> cc.skill().round(2)\n                   n  bias  rmse  urmse   mae    cc    si    r2\n    observation\n    HKNA         385 -0.20  0.35   0.29  0.25  0.97  0.09  0.99\n    EPL           66 -0.08  0.22   0.20  0.18  0.97  0.07  0.99\n    c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n\n    >>> cc.sel(observation='c2', start='2017-10-28').skill().round(2)\n                   n  bias  rmse  urmse   mae    cc    si    r2\n    observation\n    c2            41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n\n    >>> cc.skill(by='freq:D').round(2)\n                  n  bias  rmse  urmse   mae    cc    si    r2\n    2017-10-27  239 -0.15  0.25   0.21  0.20  0.72  0.10  0.98\n    2017-10-28  162 -0.07  0.19   0.18  0.16  0.96  0.06  1.00\n    2017-10-29  163 -0.21  0.52   0.47  0.42  0.79  0.11  0.99\n    \"\"\"\n\n    # TODO remove in v1.1 ----------\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    cc = self.sel(\n        model=model,\n        observation=observation,\n        quantity=variable,\n        start=start,\n        end=end,\n        area=area,\n    )\n    if cc.n_points == 0:\n        raise ValueError(\"Dataset is empty, no data to compare.\")\n\n    ## ---- end of deprecated code ----\n\n    pmetrics = _parse_metric(metrics)\n\n    agg_cols = _parse_groupby(by, n_mod=cc.n_models, n_qnt=cc.n_quantities)\n    agg_cols, attrs_keys = self._attrs_keys_in_by(agg_cols)\n\n    df = cc._to_long_dataframe(attrs_keys=attrs_keys, observed=observed)\n\n    res = _groupby_df(df, by=agg_cols, metrics=pmetrics)\n    mtr_cols = [m.__name__ for m in pmetrics]  # type: ignore\n    res = res.dropna(subset=mtr_cols, how=\"all\")  # TODO: ok to remove empty?\n    res = self._append_xy_to_res(res, cc)\n    res = cc._add_as_col_if_not_in_index(df, skilldf=res)  # type: ignore\n    return SkillTable(res)\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter","title":"modelskill.comparison._collection_plotter.ComparerCollectionPlotter","text":"

Plotter for ComparerCollection

Examples:

>>> cc.plot.scatter()\n>>> cc.plot.hist()\n>>> cc.plot.kde()\n>>> cc.plot.taylor()\n>>> cc.plot.box()\n
Source code in modelskill/comparison/_collection_plotter.py
class ComparerCollectionPlotter:\n    \"\"\"Plotter for ComparerCollection\n\n    Examples\n    --------\n    >>> cc.plot.scatter()\n    >>> cc.plot.hist()\n    >>> cc.plot.kde()\n    >>> cc.plot.taylor()\n    >>> cc.plot.box()\n    \"\"\"\n\n    def __init__(self, cc: ComparerCollection) -> None:\n        self.cc = cc\n        self.is_directional = False\n\n    def __call__(self, *args: Any, **kwds: Any) -> Axes | list[Axes]:\n        return self.scatter(*args, **kwds)\n\n    def scatter(\n        self,\n        *,\n        model=None,\n        bins: int | float = 120,\n        quantiles: int | Sequence[float] | None = None,\n        fit_to_quantiles: bool = False,\n        show_points: bool | int | float | None = None,\n        show_hist: Optional[bool] = None,\n        show_density: Optional[bool] = None,\n        norm: Optional[colors.Normalize] = None,\n        backend: Literal[\"matplotlib\", \"plotly\"] = \"matplotlib\",\n        figsize: Tuple[float, float] = (8, 8),\n        xlim: Optional[Tuple[float, float]] = None,\n        ylim: Optional[Tuple[float, float]] = None,\n        reg_method: str | bool = \"ols\",\n        title: Optional[str] = None,\n        xlabel: Optional[str] = None,\n        ylabel: Optional[str] = None,\n        skill_table: Optional[Union[str, List[str], bool]] = None,\n        ax: Optional[Axes] = None,\n        **kwargs,\n    ) -> Axes | list[Axes]:\n        \"\"\"Scatter plot showing compared data: observation vs modelled\n        Optionally, with density histogram.\n\n        Parameters\n        ----------\n        bins: (int, float, sequence), optional\n            bins for the 2D histogram on the background. By default 20 bins.\n            if int, represents the number of bins of 2D\n            if float, represents the bin size\n            if sequence (list of int or float), represents the bin edges\n        quantiles: (int, sequence), optional\n            number of quantiles for QQ-plot, by default None and will depend\n            on the scatter data length (10, 100 or 1000); if int, this is\n            the number of points; if sequence (list of floats), represents\n            the desired quantiles (from 0 to 1)\n        fit_to_quantiles: bool, optional, by default False\n            by default the regression line is fitted to all data, if True,\n            it is fitted to the quantiles which can be useful to represent\n            the extremes of the distribution, by default False\n        show_points : (bool, int, float), optional\n            Should the scatter points be displayed? None means: show all\n            points if fewer than 1e4, otherwise show 1e4 sample points,\n            by default None. float: fraction of points to show on plot\n            from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int)\n            given, then 'n' points will be displayed, randomly selected\n        show_hist : bool, optional\n            show the data density as a a 2d histogram, by default None\n        show_density: bool, optional\n            show the data density as a colormap of the scatter, by default\n            None. If both `show_density` and `show_hist` are None, then\n            `show_density` is used by default. For binning the data, the\n            kword `bins=Float` is used.\n        norm : matplotlib.colors norm\n            colormap normalization. If None, defaults to\n            matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)\n        backend : str, optional\n            use \"plotly\" (interactive) or \"matplotlib\" backend,\n            by default \"matplotlib\"\n        figsize : tuple, optional\n            width and height of the figure, by default (8, 8)\n        xlim : tuple, optional\n            plot range for the observation (xmin, xmax), by default None\n        ylim : tuple, optional\n            plot range for the model (ymin, ymax), by default None\n        reg_method : str or bool, optional\n            method for determining the regression line\n            \"ols\" : ordinary least squares regression\n            \"odr\" : orthogonal distance regression,\n            False : no regression line,\n            by default \"ols\"\n        title : str, optional\n            plot title, by default None\n        xlabel : str, optional\n            x-label text on plot, by default None\n        ylabel : str, optional\n            y-label text on plot, by default None\n        skill_table : str, List[str], bool, optional\n            list of modelskill.metrics or boolean, if True then by default modelskill.options.metrics.list.\n            This kword adds a box at the right of the scatter plot,\n            by default False\n        ax : matplotlib axes, optional\n            axes to plot on, by default None\n        **kwargs\n            other keyword arguments to matplotlib.pyplot.scatter()\n\n        Examples\n        ------\n        >>> cc.plot.scatter()\n        >>> cc.plot.scatter(bins=0.2, backend='plotly')\n        >>> cc.plot.scatter(show_points=False, title='no points')\n        >>> cc.plot.scatter(xlabel='all observations', ylabel='my model')\n        >>> cc.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n        >>> cc.sel(observations=['c2','HKNA']).plot.scatter()\n        \"\"\"\n\n        cc = self.cc\n        if model is None:\n            mod_names = cc.mod_names\n        else:\n            warnings.warn(\n                \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.scatter()\",\n                FutureWarning,\n            )\n\n            model_list = [model] if isinstance(model, (str, int)) else model\n            mod_names = [\n                self.cc.mod_names[_get_idx(m, self.cc.mod_names)] for m in model_list\n            ]\n\n        axes = []\n        for mod_name in mod_names:\n            ax_mod = self._scatter_one_model(\n                mod_name=mod_name,\n                bins=bins,\n                quantiles=quantiles,\n                fit_to_quantiles=fit_to_quantiles,\n                show_points=show_points,\n                show_hist=show_hist,\n                show_density=show_density,\n                norm=norm,\n                backend=backend,\n                figsize=figsize,\n                xlim=xlim,\n                ylim=ylim,\n                reg_method=reg_method,\n                title=title,\n                xlabel=xlabel,\n                ylabel=ylabel,\n                skill_table=skill_table,\n                ax=ax,\n                **kwargs,\n            )\n            axes.append(ax_mod)\n        return axes[0] if len(axes) == 1 else axes\n\n    def _scatter_one_model(\n        self,\n        *,\n        mod_name: str,\n        bins: int | float,\n        quantiles: int | Sequence[float] | None,\n        fit_to_quantiles: bool,\n        show_points: bool | int | float | None,\n        show_hist: Optional[bool],\n        show_density: Optional[bool],\n        backend: Literal[\"matplotlib\", \"plotly\"],\n        figsize: Tuple[float, float],\n        xlim: Optional[Tuple[float, float]],\n        ylim: Optional[Tuple[float, float]],\n        reg_method: str | bool,\n        title: Optional[str],\n        xlabel: Optional[str],\n        ylabel: Optional[str],\n        skill_table: Optional[Union[str, List[str], bool]],\n        ax,\n        **kwargs,\n    ):\n        assert (\n            mod_name in self.cc.mod_names\n        ), f\"Model {mod_name} not found in collection {self.cc.mod_names}\"\n\n        cc_sel_mod = self.cc.sel(model=mod_name)\n\n        if cc_sel_mod.n_points == 0:\n            raise ValueError(\"No data found in selection\")\n\n        df = cc_sel_mod._to_long_dataframe()\n        x = df.obs_val.values\n        y = df.mod_val.values\n\n        # TODO why the first?\n        unit_text = self.cc[0]._unit_text\n\n        xlabel = xlabel or f\"Observation, {unit_text}\"\n        ylabel = ylabel or f\"Model, {unit_text}\"\n        title = title or f\"{mod_name} vs {cc_sel_mod._name}\"\n\n        skill = None\n        skill_score_unit = None\n        if skill_table:\n            metrics = None if skill_table is True else skill_table\n\n            # TODO why is this here?\n            if isinstance(self, ComparerCollectionPlotter) and len(cc_sel_mod) == 1:\n                skill = cc_sel_mod.skill(metrics=metrics)  # type: ignore\n            else:\n                skill = cc_sel_mod.mean_skill(metrics=metrics)  # type: ignore\n            # TODO improve this\n            try:\n                skill_score_unit = unit_text.split(\"[\")[1].split(\"]\")[0]\n            except IndexError:\n                skill_score_unit = \"\"  # Dimensionless\n\n        if self.is_directional:\n            # hide quantiles and regression line\n            quantiles = 0\n            reg_method = False\n\n        skill_scores = skill.iloc[0].to_dict() if skill is not None else None\n\n        ax = scatter(\n            x=x,\n            y=y,\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            skill_scores=skill_scores,\n            skill_score_unit=skill_score_unit,\n            ax=ax,\n            **kwargs,\n        )\n\n        if backend == \"matplotlib\" and self.is_directional:\n            _xtick_directional(ax, xlim)\n            _ytick_directional(ax, ylim)\n\n        return ax\n\n    def kde(self, *, ax=None, figsize=None, title=None, **kwargs) -> Axes:\n        \"\"\"Plot kernel density estimate of observation and model data.\n\n        Parameters\n        ----------\n        ax : Axes, optional\n            matplotlib axes, by default None\n        figsize : tuple, optional\n            width and height of the figure, by default None\n        title : str, optional\n            plot title, by default None\n        **kwargs\n            passed to pandas.DataFrame.plot.kde()\n\n        Returns\n        -------\n        Axes\n            matplotlib axes\n\n        Examples\n        --------\n        >>> cc.plot.kde()\n        >>> cc.plot.kde(bw_method=0.5)\n        >>> cc.plot.kde(bw_method='silverman')\n\n        \"\"\"\n        _, ax = _get_fig_ax(ax, figsize)\n\n        df = self.cc._to_long_dataframe()\n        ax = df.obs_val.plot.kde(\n            ax=ax, linestyle=\"dashed\", label=\"Observation\", **kwargs\n        )\n\n        for model in self.cc.mod_names:\n            df_model = df[df.model == model]\n            df_model.mod_val.plot.kde(ax=ax, label=model, **kwargs)\n\n        ax.set_xlabel(f\"{self.cc._unit_text}\")\n\n        title = (\n            _default_univarate_title(\"Density plot\", self.cc)\n            if title is None\n            else title\n        )\n        ax.set_title(title)\n        ax.legend()\n\n        # remove y-axis, ticks and label\n        ax.yaxis.set_visible(False)\n        ax.tick_params(axis=\"y\", which=\"both\", length=0)\n        ax.set_ylabel(\"\")\n\n        # remove box around plot\n        ax.spines[\"top\"].set_visible(False)\n        ax.spines[\"right\"].set_visible(False)\n        ax.spines[\"left\"].set_visible(False)\n\n        if self.is_directional:\n            _xtick_directional(ax)\n\n        return ax\n\n    def hist(\n        self,\n        bins: int | Sequence = 100,\n        *,\n        model: str | int | None = None,\n        title: Optional[str] = None,\n        density: bool = True,\n        alpha: float = 0.5,\n        ax=None,\n        figsize: Optional[Tuple[float, float]] = None,\n        **kwargs,\n    ):\n        \"\"\"Plot histogram of specific model and all observations.\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            number of bins, by default 100\n        title : str, optional\n            plot title, default: observation name\n        density: bool, optional\n            If True, draw and return a probability density, by default True\n        alpha : float, optional\n            alpha transparency fraction, by default 0.5\n        ax : matplotlib axes, optional\n            axes to plot on, by default None\n        figsize : tuple, optional\n            width and height of the figure, by default None\n        **kwargs\n            other keyword arguments to df.hist()\n\n        Returns\n        -------\n        matplotlib axes\n\n        Examples\n        --------\n        >>> cc.plot.hist()\n        >>> cc.plot.hist(bins=100)\n\n        See also\n        --------\n        pandas.Series.hist\n        matplotlib.axes.Axes.hist\n        \"\"\"\n        if model is None:\n            mod_names = self.cc.mod_names\n        else:\n            warnings.warn(\n                \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.hist()\",\n                FutureWarning,\n            )\n            model_list = [model] if isinstance(model, (str, int)) else model\n            mod_names = [\n                self.cc.mod_names[_get_idx(m, self.cc.mod_names)] for m in model_list\n            ]\n\n        axes = []\n        for mod_name in mod_names:\n            ax_mod = self._hist_one_model(\n                mod_name=mod_name,\n                bins=bins,\n                title=title,\n                density=density,\n                alpha=alpha,\n                ax=ax,\n                figsize=figsize,\n                **kwargs,\n            )\n            axes.append(ax_mod)\n        return axes[0] if len(axes) == 1 else axes\n\n    def _hist_one_model(\n        self,\n        *,\n        mod_name: str,\n        bins: int | Sequence,\n        title: Optional[str],\n        density: bool,\n        alpha: float,\n        ax,\n        figsize: Optional[Tuple[float, float]],\n        **kwargs,\n    ):\n        from ._comparison import MOD_COLORS\n\n        _, ax = _get_fig_ax(ax, figsize)\n\n        assert (\n            mod_name in self.cc.mod_names\n        ), f\"Model {mod_name} not found in collection\"\n        mod_idx = _get_idx(mod_name, self.cc.mod_names)\n\n        title = (\n            _default_univarate_title(\"Histogram\", self.cc) if title is None else title\n        )\n\n        cmp = self.cc\n        df = cmp._to_long_dataframe()\n        kwargs[\"alpha\"] = alpha\n        kwargs[\"density\"] = density\n        df.mod_val.hist(bins=bins, color=MOD_COLORS[mod_idx], ax=ax, **kwargs)\n        df.obs_val.hist(\n            bins=bins,\n            color=self.cc[0].data[\"Observation\"].attrs[\"color\"],\n            ax=ax,\n            **kwargs,\n        )\n\n        ax.legend([mod_name, \"observations\"])\n        ax.set_title(title)\n        ax.set_xlabel(f\"{self.cc[df.observation.iloc[0]]._unit_text}\")\n\n        if density:\n            ax.set_ylabel(\"density\")\n        else:\n            ax.set_ylabel(\"count\")\n\n        if self.is_directional:\n            _xtick_directional(ax)\n\n        return ax\n\n    def taylor(\n        self,\n        *,\n        normalize_std: bool = False,\n        aggregate_observations: bool = True,\n        figsize: Tuple[float, float] = (7, 7),\n        marker: str = \"o\",\n        marker_size: float = 6.0,\n        title: str = \"Taylor diagram\",\n    ):\n        \"\"\"Taylor diagram showing model std and correlation to observation\n        in a single-quadrant polar plot, with r=std and theta=arccos(cc).\n\n        Parameters\n        ----------\n        normalize_std : bool, optional\n            plot model std normalized with observation std, default False\n        aggregate_observations : bool, optional\n            should multiple observations be aggregated before plotting\n            (or shown individually), default True\n        figsize : tuple, optional\n            width and height of the figure (should be square), by default (7, 7)\n        marker : str, optional\n            marker type e.g. \"x\", \"*\", by default \"o\"\n        marker_size : float, optional\n            size of the marker, by default 6\n        title : str, optional\n            title of the plot, by default \"Taylor diagram\"\n\n        Returns\n        -------\n        matplotlib.figure.Figure\n\n        Examples\n        ------\n        >>> cc.plot.taylor()\n        >>> cc.plot.taylor(observation=\"c2\")\n        >>> cc.plot.taylor(start=\"2017-10-28\", figsize=(5,5))\n\n        References\n        ----------\n        Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin <yannick.copin@laposte.net>\n        \"\"\"\n\n        if (not aggregate_observations) and (not normalize_std):\n            raise ValueError(\n                \"aggregate_observations=False is only possible if normalize_std=True!\"\n            )\n\n        metrics = [mtr._std_obs, mtr._std_mod, mtr.cc]\n        skill_func = self.cc.mean_skill if aggregate_observations else self.cc.skill\n        sk = skill_func(\n            metrics=metrics,  # type: ignore\n        )\n        if sk is None:\n            return\n\n        df = sk.to_dataframe()\n        ref_std = 1.0 if normalize_std else df.iloc[0][\"_std_obs\"]\n\n        if isinstance(df.index, pd.MultiIndex):\n            df.index = df.index.map(\"_\".join)\n\n        df = df[[\"_std_obs\", \"_std_mod\", \"cc\"]].copy()\n        df.columns = [\"obs_std\", \"std\", \"cc\"]\n        pts = [\n            TaylorPoint(\n                r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size\n            )\n            for r in df.itertuples()\n        ]\n\n        return taylor_diagram(\n            obs_std=ref_std,\n            points=pts,\n            figsize=figsize,\n            normalize_std=normalize_std,\n            title=title,\n        )\n\n    def box(self, *, ax=None, figsize=None, title=None, **kwargs) -> Axes:\n        \"\"\"Plot box plot of observations and model data.\n\n        Parameters\n        ----------\n        ax : Axes, optional\n            matplotlib axes, by default None\n        figsize : tuple, optional\n            width and height of the figure, by default None\n        title : str, optional\n            plot title, by default None\n        **kwargs\n            passed to pandas.DataFrame.plot.box()\n\n        Returns\n        -------\n        Axes\n            matplotlib axes\n\n        Examples\n        --------\n        >>> cc.plot.box()\n        >>> cc.plot.box(showmeans=True)\n        >>> cc.plot.box(ax=ax, title=\"Box plot\")\n        \"\"\"\n        _, ax = _get_fig_ax(ax, figsize)\n\n        df = self.cc._to_long_dataframe()\n\n        unique_obs_cols = [\"time\", \"x\", \"y\", \"observation\"]\n        df = df.set_index(unique_obs_cols)\n        unique_obs_values = df[~df.duplicated()].obs_val.values\n\n        data = {\"Observation\": unique_obs_values}\n        for model in df.model.unique():\n            df_model = df[df.model == model]\n            data[model] = df_model.mod_val.values\n\n        data = {k: pd.Series(v) for k, v in data.items()}\n        df = pd.DataFrame(data)\n\n        if \"grid\" not in kwargs:\n            kwargs[\"grid\"] = True\n\n        ax = df.plot.box(ax=ax, **kwargs)\n\n        ax.set_ylabel(f\"{self.cc._unit_text}\")\n\n        title = (\n            _default_univarate_title(\"Box plot\", self.cc) if title is None else title\n        )\n        ax.set_title(title)\n\n        if self.is_directional:\n            _ytick_directional(ax)\n\n        return ax\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.box","title":"box","text":"
box(*, ax=None, figsize=None, title=None, **kwargs)\n

Plot box plot of observations and model data.

Parameters:

Name Type Description Default ax Axes

matplotlib axes, by default None

None figsize tuple

width and height of the figure, by default None

None title str

plot title, by default None

None **kwargs

passed to pandas.DataFrame.plot.box()

{}

Returns:

Type Description Axes

matplotlib axes

Examples:

>>> cc.plot.box()\n>>> cc.plot.box(showmeans=True)\n>>> cc.plot.box(ax=ax, title=\"Box plot\")\n
Source code in modelskill/comparison/_collection_plotter.py
def box(self, *, ax=None, figsize=None, title=None, **kwargs) -> Axes:\n    \"\"\"Plot box plot of observations and model data.\n\n    Parameters\n    ----------\n    ax : Axes, optional\n        matplotlib axes, by default None\n    figsize : tuple, optional\n        width and height of the figure, by default None\n    title : str, optional\n        plot title, by default None\n    **kwargs\n        passed to pandas.DataFrame.plot.box()\n\n    Returns\n    -------\n    Axes\n        matplotlib axes\n\n    Examples\n    --------\n    >>> cc.plot.box()\n    >>> cc.plot.box(showmeans=True)\n    >>> cc.plot.box(ax=ax, title=\"Box plot\")\n    \"\"\"\n    _, ax = _get_fig_ax(ax, figsize)\n\n    df = self.cc._to_long_dataframe()\n\n    unique_obs_cols = [\"time\", \"x\", \"y\", \"observation\"]\n    df = df.set_index(unique_obs_cols)\n    unique_obs_values = df[~df.duplicated()].obs_val.values\n\n    data = {\"Observation\": unique_obs_values}\n    for model in df.model.unique():\n        df_model = df[df.model == model]\n        data[model] = df_model.mod_val.values\n\n    data = {k: pd.Series(v) for k, v in data.items()}\n    df = pd.DataFrame(data)\n\n    if \"grid\" not in kwargs:\n        kwargs[\"grid\"] = True\n\n    ax = df.plot.box(ax=ax, **kwargs)\n\n    ax.set_ylabel(f\"{self.cc._unit_text}\")\n\n    title = (\n        _default_univarate_title(\"Box plot\", self.cc) if title is None else title\n    )\n    ax.set_title(title)\n\n    if self.is_directional:\n        _ytick_directional(ax)\n\n    return ax\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.hist","title":"hist","text":"
hist(bins=100, *, model=None, title=None, density=True, alpha=0.5, ax=None, figsize=None, **kwargs)\n

Plot histogram of specific model and all observations.

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

number of bins, by default 100

100 title str

plot title, default: observation name

None density bool

If True, draw and return a probability density, by default True

True alpha float

alpha transparency fraction, by default 0.5

0.5 ax matplotlib axes

axes to plot on, by default None

None figsize tuple

width and height of the figure, by default None

None **kwargs

other keyword arguments to df.hist()

{}

Returns:

Type Description matplotlib axes

Examples:

>>> cc.plot.hist()\n>>> cc.plot.hist(bins=100)\n
See also

pandas.Series.hist matplotlib.axes.Axes.hist

Source code in modelskill/comparison/_collection_plotter.py
def hist(\n    self,\n    bins: int | Sequence = 100,\n    *,\n    model: str | int | None = None,\n    title: Optional[str] = None,\n    density: bool = True,\n    alpha: float = 0.5,\n    ax=None,\n    figsize: Optional[Tuple[float, float]] = None,\n    **kwargs,\n):\n    \"\"\"Plot histogram of specific model and all observations.\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        number of bins, by default 100\n    title : str, optional\n        plot title, default: observation name\n    density: bool, optional\n        If True, draw and return a probability density, by default True\n    alpha : float, optional\n        alpha transparency fraction, by default 0.5\n    ax : matplotlib axes, optional\n        axes to plot on, by default None\n    figsize : tuple, optional\n        width and height of the figure, by default None\n    **kwargs\n        other keyword arguments to df.hist()\n\n    Returns\n    -------\n    matplotlib axes\n\n    Examples\n    --------\n    >>> cc.plot.hist()\n    >>> cc.plot.hist(bins=100)\n\n    See also\n    --------\n    pandas.Series.hist\n    matplotlib.axes.Axes.hist\n    \"\"\"\n    if model is None:\n        mod_names = self.cc.mod_names\n    else:\n        warnings.warn(\n            \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.hist()\",\n            FutureWarning,\n        )\n        model_list = [model] if isinstance(model, (str, int)) else model\n        mod_names = [\n            self.cc.mod_names[_get_idx(m, self.cc.mod_names)] for m in model_list\n        ]\n\n    axes = []\n    for mod_name in mod_names:\n        ax_mod = self._hist_one_model(\n            mod_name=mod_name,\n            bins=bins,\n            title=title,\n            density=density,\n            alpha=alpha,\n            ax=ax,\n            figsize=figsize,\n            **kwargs,\n        )\n        axes.append(ax_mod)\n    return axes[0] if len(axes) == 1 else axes\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.kde","title":"kde","text":"
kde(*, ax=None, figsize=None, title=None, **kwargs)\n

Plot kernel density estimate of observation and model data.

Parameters:

Name Type Description Default ax Axes

matplotlib axes, by default None

None figsize tuple

width and height of the figure, by default None

None title str

plot title, by default None

None **kwargs

passed to pandas.DataFrame.plot.kde()

{}

Returns:

Type Description Axes

matplotlib axes

Examples:

>>> cc.plot.kde()\n>>> cc.plot.kde(bw_method=0.5)\n>>> cc.plot.kde(bw_method='silverman')\n
Source code in modelskill/comparison/_collection_plotter.py
def kde(self, *, ax=None, figsize=None, title=None, **kwargs) -> Axes:\n    \"\"\"Plot kernel density estimate of observation and model data.\n\n    Parameters\n    ----------\n    ax : Axes, optional\n        matplotlib axes, by default None\n    figsize : tuple, optional\n        width and height of the figure, by default None\n    title : str, optional\n        plot title, by default None\n    **kwargs\n        passed to pandas.DataFrame.plot.kde()\n\n    Returns\n    -------\n    Axes\n        matplotlib axes\n\n    Examples\n    --------\n    >>> cc.plot.kde()\n    >>> cc.plot.kde(bw_method=0.5)\n    >>> cc.plot.kde(bw_method='silverman')\n\n    \"\"\"\n    _, ax = _get_fig_ax(ax, figsize)\n\n    df = self.cc._to_long_dataframe()\n    ax = df.obs_val.plot.kde(\n        ax=ax, linestyle=\"dashed\", label=\"Observation\", **kwargs\n    )\n\n    for model in self.cc.mod_names:\n        df_model = df[df.model == model]\n        df_model.mod_val.plot.kde(ax=ax, label=model, **kwargs)\n\n    ax.set_xlabel(f\"{self.cc._unit_text}\")\n\n    title = (\n        _default_univarate_title(\"Density plot\", self.cc)\n        if title is None\n        else title\n    )\n    ax.set_title(title)\n    ax.legend()\n\n    # remove y-axis, ticks and label\n    ax.yaxis.set_visible(False)\n    ax.tick_params(axis=\"y\", which=\"both\", length=0)\n    ax.set_ylabel(\"\")\n\n    # remove box around plot\n    ax.spines[\"top\"].set_visible(False)\n    ax.spines[\"right\"].set_visible(False)\n    ax.spines[\"left\"].set_visible(False)\n\n    if self.is_directional:\n        _xtick_directional(ax)\n\n    return ax\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.scatter","title":"scatter","text":"
scatter(*, model=None, bins=120, quantiles=None, fit_to_quantiles=False, show_points=None, show_hist=None, show_density=None, norm=None, backend='matplotlib', figsize=(8, 8), xlim=None, ylim=None, reg_method='ols', title=None, xlabel=None, ylabel=None, skill_table=None, ax=None, **kwargs)\n

Scatter plot showing compared data: observation vs modelled Optionally, with density histogram.

Parameters:

Name Type Description Default bins int | float

bins for the 2D histogram on the background. By default 20 bins. if int, represents the number of bins of 2D if float, represents the bin size if sequence (list of int or float), represents the bin edges

120 quantiles int | Sequence[float] | None

number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000); if int, this is the number of points; if sequence (list of floats), represents the desired quantiles (from 0 to 1)

None fit_to_quantiles bool

by default the regression line is fitted to all data, if True, it is fitted to the quantiles which can be useful to represent the extremes of the distribution, by default False

False show_points (bool, int, float)

Should the scatter points be displayed? None means: show all points if fewer than 1e4, otherwise show 1e4 sample points, by default None. float: fraction of points to show on plot from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int) given, then 'n' points will be displayed, randomly selected

None show_hist bool

show the data density as a a 2d histogram, by default None

None show_density Optional[bool]

show the data density as a colormap of the scatter, by default None. If both show_density and show_hist are None, then show_density is used by default. For binning the data, the kword bins=Float is used.

None norm matplotlib.colors norm

colormap normalization. If None, defaults to matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)

None backend str

use \"plotly\" (interactive) or \"matplotlib\" backend, by default \"matplotlib\"

'matplotlib' figsize tuple

width and height of the figure, by default (8, 8)

(8, 8) xlim tuple

plot range for the observation (xmin, xmax), by default None

None ylim tuple

plot range for the model (ymin, ymax), by default None

None reg_method str or bool

method for determining the regression line \"ols\" : ordinary least squares regression \"odr\" : orthogonal distance regression, False : no regression line, by default \"ols\"

'ols' title str

plot title, by default None

None xlabel str

x-label text on plot, by default None

None ylabel str

y-label text on plot, by default None

None skill_table (str, List[str], bool)

list of modelskill.metrics or boolean, if True then by default modelskill.options.metrics.list. This kword adds a box at the right of the scatter plot, by default False

None ax matplotlib axes

axes to plot on, by default None

None **kwargs

other keyword arguments to matplotlib.pyplot.scatter()

{}

Examples:

>>> cc.plot.scatter()\n>>> cc.plot.scatter(bins=0.2, backend='plotly')\n>>> cc.plot.scatter(show_points=False, title='no points')\n>>> cc.plot.scatter(xlabel='all observations', ylabel='my model')\n>>> cc.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n>>> cc.sel(observations=['c2','HKNA']).plot.scatter()\n
Source code in modelskill/comparison/_collection_plotter.py
def scatter(\n    self,\n    *,\n    model=None,\n    bins: int | float = 120,\n    quantiles: int | Sequence[float] | None = None,\n    fit_to_quantiles: bool = False,\n    show_points: bool | int | float | None = None,\n    show_hist: Optional[bool] = None,\n    show_density: Optional[bool] = None,\n    norm: Optional[colors.Normalize] = None,\n    backend: Literal[\"matplotlib\", \"plotly\"] = \"matplotlib\",\n    figsize: Tuple[float, float] = (8, 8),\n    xlim: Optional[Tuple[float, float]] = None,\n    ylim: Optional[Tuple[float, float]] = None,\n    reg_method: str | bool = \"ols\",\n    title: Optional[str] = None,\n    xlabel: Optional[str] = None,\n    ylabel: Optional[str] = None,\n    skill_table: Optional[Union[str, List[str], bool]] = None,\n    ax: Optional[Axes] = None,\n    **kwargs,\n) -> Axes | list[Axes]:\n    \"\"\"Scatter plot showing compared data: observation vs modelled\n    Optionally, with density histogram.\n\n    Parameters\n    ----------\n    bins: (int, float, sequence), optional\n        bins for the 2D histogram on the background. By default 20 bins.\n        if int, represents the number of bins of 2D\n        if float, represents the bin size\n        if sequence (list of int or float), represents the bin edges\n    quantiles: (int, sequence), optional\n        number of quantiles for QQ-plot, by default None and will depend\n        on the scatter data length (10, 100 or 1000); if int, this is\n        the number of points; if sequence (list of floats), represents\n        the desired quantiles (from 0 to 1)\n    fit_to_quantiles: bool, optional, by default False\n        by default the regression line is fitted to all data, if True,\n        it is fitted to the quantiles which can be useful to represent\n        the extremes of the distribution, by default False\n    show_points : (bool, int, float), optional\n        Should the scatter points be displayed? None means: show all\n        points if fewer than 1e4, otherwise show 1e4 sample points,\n        by default None. float: fraction of points to show on plot\n        from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int)\n        given, then 'n' points will be displayed, randomly selected\n    show_hist : bool, optional\n        show the data density as a a 2d histogram, by default None\n    show_density: bool, optional\n        show the data density as a colormap of the scatter, by default\n        None. If both `show_density` and `show_hist` are None, then\n        `show_density` is used by default. For binning the data, the\n        kword `bins=Float` is used.\n    norm : matplotlib.colors norm\n        colormap normalization. If None, defaults to\n        matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)\n    backend : str, optional\n        use \"plotly\" (interactive) or \"matplotlib\" backend,\n        by default \"matplotlib\"\n    figsize : tuple, optional\n        width and height of the figure, by default (8, 8)\n    xlim : tuple, optional\n        plot range for the observation (xmin, xmax), by default None\n    ylim : tuple, optional\n        plot range for the model (ymin, ymax), by default None\n    reg_method : str or bool, optional\n        method for determining the regression line\n        \"ols\" : ordinary least squares regression\n        \"odr\" : orthogonal distance regression,\n        False : no regression line,\n        by default \"ols\"\n    title : str, optional\n        plot title, by default None\n    xlabel : str, optional\n        x-label text on plot, by default None\n    ylabel : str, optional\n        y-label text on plot, by default None\n    skill_table : str, List[str], bool, optional\n        list of modelskill.metrics or boolean, if True then by default modelskill.options.metrics.list.\n        This kword adds a box at the right of the scatter plot,\n        by default False\n    ax : matplotlib axes, optional\n        axes to plot on, by default None\n    **kwargs\n        other keyword arguments to matplotlib.pyplot.scatter()\n\n    Examples\n    ------\n    >>> cc.plot.scatter()\n    >>> cc.plot.scatter(bins=0.2, backend='plotly')\n    >>> cc.plot.scatter(show_points=False, title='no points')\n    >>> cc.plot.scatter(xlabel='all observations', ylabel='my model')\n    >>> cc.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n    >>> cc.sel(observations=['c2','HKNA']).plot.scatter()\n    \"\"\"\n\n    cc = self.cc\n    if model is None:\n        mod_names = cc.mod_names\n    else:\n        warnings.warn(\n            \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.scatter()\",\n            FutureWarning,\n        )\n\n        model_list = [model] if isinstance(model, (str, int)) else model\n        mod_names = [\n            self.cc.mod_names[_get_idx(m, self.cc.mod_names)] for m in model_list\n        ]\n\n    axes = []\n    for mod_name in mod_names:\n        ax_mod = self._scatter_one_model(\n            mod_name=mod_name,\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            norm=norm,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            skill_table=skill_table,\n            ax=ax,\n            **kwargs,\n        )\n        axes.append(ax_mod)\n    return axes[0] if len(axes) == 1 else axes\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.taylor","title":"taylor","text":"
taylor(*, normalize_std=False, aggregate_observations=True, figsize=(7, 7), marker='o', marker_size=6.0, title='Taylor diagram')\n

Taylor diagram showing model std and correlation to observation in a single-quadrant polar plot, with r=std and theta=arccos(cc).

Parameters:

Name Type Description Default normalize_std bool

plot model std normalized with observation std, default False

False aggregate_observations bool

should multiple observations be aggregated before plotting (or shown individually), default True

True figsize tuple

width and height of the figure (should be square), by default (7, 7)

(7, 7) marker str

marker type e.g. \"x\", \"*\", by default \"o\"

'o' marker_size float

size of the marker, by default 6

6.0 title str

title of the plot, by default \"Taylor diagram\"

'Taylor diagram'

Returns:

Type Description Figure

Examples:

>>> cc.plot.taylor()\n>>> cc.plot.taylor(observation=\"c2\")\n>>> cc.plot.taylor(start=\"2017-10-28\", figsize=(5,5))\n
References

Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin yannick.copin@laposte.net

Source code in modelskill/comparison/_collection_plotter.py
def taylor(\n    self,\n    *,\n    normalize_std: bool = False,\n    aggregate_observations: bool = True,\n    figsize: Tuple[float, float] = (7, 7),\n    marker: str = \"o\",\n    marker_size: float = 6.0,\n    title: str = \"Taylor diagram\",\n):\n    \"\"\"Taylor diagram showing model std and correlation to observation\n    in a single-quadrant polar plot, with r=std and theta=arccos(cc).\n\n    Parameters\n    ----------\n    normalize_std : bool, optional\n        plot model std normalized with observation std, default False\n    aggregate_observations : bool, optional\n        should multiple observations be aggregated before plotting\n        (or shown individually), default True\n    figsize : tuple, optional\n        width and height of the figure (should be square), by default (7, 7)\n    marker : str, optional\n        marker type e.g. \"x\", \"*\", by default \"o\"\n    marker_size : float, optional\n        size of the marker, by default 6\n    title : str, optional\n        title of the plot, by default \"Taylor diagram\"\n\n    Returns\n    -------\n    matplotlib.figure.Figure\n\n    Examples\n    ------\n    >>> cc.plot.taylor()\n    >>> cc.plot.taylor(observation=\"c2\")\n    >>> cc.plot.taylor(start=\"2017-10-28\", figsize=(5,5))\n\n    References\n    ----------\n    Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin <yannick.copin@laposte.net>\n    \"\"\"\n\n    if (not aggregate_observations) and (not normalize_std):\n        raise ValueError(\n            \"aggregate_observations=False is only possible if normalize_std=True!\"\n        )\n\n    metrics = [mtr._std_obs, mtr._std_mod, mtr.cc]\n    skill_func = self.cc.mean_skill if aggregate_observations else self.cc.skill\n    sk = skill_func(\n        metrics=metrics,  # type: ignore\n    )\n    if sk is None:\n        return\n\n    df = sk.to_dataframe()\n    ref_std = 1.0 if normalize_std else df.iloc[0][\"_std_obs\"]\n\n    if isinstance(df.index, pd.MultiIndex):\n        df.index = df.index.map(\"_\".join)\n\n    df = df[[\"_std_obs\", \"_std_mod\", \"cc\"]].copy()\n    df.columns = [\"obs_std\", \"std\", \"cc\"]\n    pts = [\n        TaylorPoint(\n            r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size\n        )\n        for r in df.itertuples()\n    ]\n\n    return taylor_diagram(\n        obs_std=ref_std,\n        points=pts,\n        figsize=figsize,\n        normalize_std=normalize_std,\n        title=title,\n    )\n
"},{"location":"api/gridded_skill/","title":"Gridded Skill","text":""},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid","title":"modelskill.skill_grid.SkillGrid","text":"

Bases: SkillGridMixin

Gridded skill object for analysis and visualization of spatially gridded skill data. The object wraps the xr.DataSet class which can be accessed from the attribute data.

The object contains one or more \"arrays\" of skill metrics, each corresponding to a single metric (e.g. bias, rmse, r2). The arrays are indexed by the metric name, e.g. ss[\"bias\"] or ss.bias.

Examples:

>>> gs = cc.gridded_skill()\n>>> gs.metrics\n['n', 'bias', 'rmse', 'urmse', 'mae', 'cc', 'si', 'r2']\n
>>> gs.mod_names\n['SW_1', 'SW_2']\n
>>> gs.sel(model='SW_1').rmse.plot()\n
Source code in modelskill/skill_grid.py
class SkillGrid(SkillGridMixin):\n    \"\"\"\n    Gridded skill object for analysis and visualization of spatially\n    gridded skill data. The object wraps the xr.DataSet class\n    which can be accessed from the attribute data.\n\n    The object contains one or more \"arrays\" of skill metrics, each\n    corresponding to a single metric (e.g. bias, rmse, r2). The arrays\n    are indexed by the metric name, e.g. `ss[\"bias\"]` or `ss.bias`.\n\n    Examples\n    --------\n    >>> gs = cc.gridded_skill()\n    >>> gs.metrics\n    ['n', 'bias', 'rmse', 'urmse', 'mae', 'cc', 'si', 'r2']\n\n    >>> gs.mod_names\n    ['SW_1', 'SW_2']\n\n    >>> gs.sel(model='SW_1').rmse.plot()\n    \"\"\"\n\n    def __init__(self, data: xr.Dataset) -> None:\n        # TODO: add type and unit info; add domain to plot outline on map\n        self.data = data\n        self._set_attrs()\n\n    @property\n    def metrics(self) -> list[str]:\n        \"\"\"List of metrics (=data vars)\"\"\"\n        return list(self.data.data_vars)\n\n    def __repr__(self) -> str:\n        out = [\n            \"<SkillGrid>\",\n            f\"Dimensions: (x: {len(self.x)}, y: {len(self.y)})\",\n        ]\n        return \"\\n\".join(out)\n\n    @overload\n    def __getitem__(self, key: Hashable) -> SkillGridArray: ...\n\n    @overload\n    def __getitem__(self, key: Iterable[Hashable]) -> SkillGrid: ...\n\n    def __getitem__(\n        self, key: Hashable | Iterable[Hashable]\n    ) -> SkillGridArray | SkillGrid:\n        result = self.data[key]\n        if isinstance(result, xr.DataArray):\n            return SkillGridArray(result)\n        elif isinstance(result, xr.Dataset):\n            return SkillGrid(result)\n        else:\n            return result\n\n    def __getattr__(self, item: str, *args, **kwargs) -> Any:\n        if item in self.data.data_vars:\n            return self[item]  # Redirects to __getitem__\n        else:\n            # return getattr(self.data, item, *args, **kwargs)\n            raise AttributeError(\n                f\"\"\"\n                    SkillGrid has no attribute {item}; Maybe you are\n                    looking for the corresponding xr.Dataset attribute?\n                    Access SkillGrid's Dataset with '.data'.\n                \"\"\"\n            )\n\n    def _set_attrs(self) -> None:\n        # TODO: use type and unit to give better long name\n        # self.ds[\"bias\"].attrs = dict(long_name=\"Bias of Hm0\", units=\"m\")\n\n        self.data[\"n\"].attrs = dict(long_name=\"Number of observations\", units=\"-\")\n        if self._has_geographical_coords():\n            self.data[\"x\"].attrs = dict(long_name=\"Longitude\", units=\"degrees east\")\n            self.data[\"y\"].attrs = dict(long_name=\"Latitude\", units=\"degrees north\")\n        else:\n            self.data[\"x\"].attrs = dict(long_name=\"Easting\", units=\"meter\")\n            self.data[\"y\"].attrs = dict(long_name=\"Northing\", units=\"meter\")\n\n    def _has_geographical_coords(self) -> bool:\n        is_geo = True\n        if (self.x.min() < -180.0) or (self.x.max() > 360.0):\n            is_geo = False\n        if (self.y.min() < -90.0) or (self.y.max() > 90.0):\n            is_geo = False\n        return is_geo\n\n    def sel(self, model: str) -> SkillGrid:\n        \"\"\"Select a model from the SkillGrid\n\n        Parameters\n        ----------\n        model : str\n            Name of model to select\n\n        Returns\n        -------\n        SkillGrid\n            SkillGrid with only the selected model\n        \"\"\"\n        sel_data = self.data.sel(model=model)\n        assert isinstance(sel_data, xr.Dataset)\n        return SkillGrid(sel_data)\n\n    def plot(self, metric: str, model: str | None = None, **kwargs: Any) -> Axes:\n        warnings.warn(\n            \"plot() is deprecated and will be removed in a future version. \",\n            FutureWarning,\n        )\n        if metric not in self.metrics:\n            raise ValueError(f\"metric {metric} not found in {self.metrics}\")\n        return self[metric].plot(model=model, **kwargs)\n\n    def to_dataframe(self) -> pd.DataFrame:\n        \"\"\"Convert gridded skill data to pandas DataFrame\n\n        Returns\n        -------\n        pd.DataFrame\n            data as a pandas DataFrame\n        \"\"\"\n        return self.data.to_dataframe()\n
"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.coords","title":"coords property","text":"
coords\n

Coordinates (same as xr.DataSet.coords)

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.metrics","title":"metrics property","text":"
metrics\n

List of metrics (=data vars)

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.mod_names","title":"mod_names property","text":"
mod_names\n

List of model names

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.obs_names","title":"obs_names property","text":"
obs_names\n

List of observation names

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.x","title":"x property","text":"
x\n

x-coordinate values

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.y","title":"y property","text":"
y\n

y-coordinate values

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.sel","title":"sel","text":"
sel(model)\n

Select a model from the SkillGrid

Parameters:

Name Type Description Default model str

Name of model to select

required

Returns:

Type Description SkillGrid

SkillGrid with only the selected model

Source code in modelskill/skill_grid.py
def sel(self, model: str) -> SkillGrid:\n    \"\"\"Select a model from the SkillGrid\n\n    Parameters\n    ----------\n    model : str\n        Name of model to select\n\n    Returns\n    -------\n    SkillGrid\n        SkillGrid with only the selected model\n    \"\"\"\n    sel_data = self.data.sel(model=model)\n    assert isinstance(sel_data, xr.Dataset)\n    return SkillGrid(sel_data)\n
"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert gridded skill data to pandas DataFrame

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/skill_grid.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert gridded skill data to pandas DataFrame\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    return self.data.to_dataframe()\n
"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray","title":"modelskill.skill_grid.SkillGridArray","text":"

Bases: SkillGridMixin

A SkillGridArray is a single metric-SkillGrid, corresponding to a \"column\" in a SkillGrid

Typically created by indexing a SkillGrid object, e.g. ss[\"bias\"].

Examples:

>>> gs = cc.gridded_skill()\n>>> gs[\"bias\"].plot()\n
Source code in modelskill/skill_grid.py
class SkillGridArray(SkillGridMixin):\n    \"\"\"A SkillGridArray is a single metric-SkillGrid, corresponding to a \"column\" in a SkillGrid\n\n    Typically created by indexing a SkillGrid object, e.g. `ss[\"bias\"]`.\n\n    Examples\n    --------\n    >>> gs = cc.gridded_skill()\n    >>> gs[\"bias\"].plot()\n    \"\"\"\n\n    def __init__(self, data: xr.DataArray) -> None:\n        assert isinstance(data, xr.DataArray)\n        self.data = data\n\n    def __repr__(self) -> str:\n        out = [\n            \"<SkillGridArray>\",\n            f\"Dimensions: (x: {len(self.x)}, y: {len(self.y)})\",\n        ]\n        return \"\\n\".join(out)\n\n    def plot(self, model: str | None = None, **kwargs: Any) -> Axes:\n        \"\"\"wrapper for xArray DataArray plot function\n\n        Parameters\n        ----------\n        model : str, optional\n            Name of model to plot, by default all models\n        **kwargs\n            keyword arguments passed to xr.DataArray plot()\n            e.g. figsize\n\n        Examples\n        --------\n        >>> gs = cc.gridded_skill()\n        >>> gs[\"bias\"].plot()\n        >>> gs.rmse.plot(model='SW_1')\n        >>> gs.r2.plot(cmap='YlOrRd', figsize=(10,10))\n        \"\"\"\n        if model is None:\n            da = self.data\n        else:\n            warnings.warn(\n                \"model argument is deprecated, use sel(model=...)\",\n                FutureWarning,\n            )\n            if model not in self.mod_names:\n                raise ValueError(f\"model {model} not in model list ({self.mod_names})\")\n            da = self.data.sel({\"model\": model})\n\n        extra_dims = [d for d in da.coords.dims if d not in [\"x\", \"y\"]]\n        if len(extra_dims) == 2:\n            ax = da.plot(col=extra_dims[0], row=extra_dims[1], **kwargs)\n        elif len(extra_dims) == 1:\n            ax = da.plot(col=extra_dims[0], **kwargs)\n        else:\n            ax = da.plot(**kwargs)\n        return ax\n
"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.coords","title":"coords property","text":"
coords\n

Coordinates (same as xr.DataSet.coords)

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.mod_names","title":"mod_names property","text":"
mod_names\n

List of model names

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.obs_names","title":"obs_names property","text":"
obs_names\n

List of observation names

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.x","title":"x property","text":"
x\n

x-coordinate values

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.y","title":"y property","text":"
y\n

y-coordinate values

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.plot","title":"plot","text":"
plot(model=None, **kwargs)\n

wrapper for xArray DataArray plot function

Parameters:

Name Type Description Default model str

Name of model to plot, by default all models

None **kwargs Any

keyword arguments passed to xr.DataArray plot() e.g. figsize

{}

Examples:

>>> gs = cc.gridded_skill()\n>>> gs[\"bias\"].plot()\n>>> gs.rmse.plot(model='SW_1')\n>>> gs.r2.plot(cmap='YlOrRd', figsize=(10,10))\n
Source code in modelskill/skill_grid.py
def plot(self, model: str | None = None, **kwargs: Any) -> Axes:\n    \"\"\"wrapper for xArray DataArray plot function\n\n    Parameters\n    ----------\n    model : str, optional\n        Name of model to plot, by default all models\n    **kwargs\n        keyword arguments passed to xr.DataArray plot()\n        e.g. figsize\n\n    Examples\n    --------\n    >>> gs = cc.gridded_skill()\n    >>> gs[\"bias\"].plot()\n    >>> gs.rmse.plot(model='SW_1')\n    >>> gs.r2.plot(cmap='YlOrRd', figsize=(10,10))\n    \"\"\"\n    if model is None:\n        da = self.data\n    else:\n        warnings.warn(\n            \"model argument is deprecated, use sel(model=...)\",\n            FutureWarning,\n        )\n        if model not in self.mod_names:\n            raise ValueError(f\"model {model} not in model list ({self.mod_names})\")\n        da = self.data.sel({\"model\": model})\n\n    extra_dims = [d for d in da.coords.dims if d not in [\"x\", \"y\"]]\n    if len(extra_dims) == 2:\n        ax = da.plot(col=extra_dims[0], row=extra_dims[1], **kwargs)\n    elif len(extra_dims) == 1:\n        ax = da.plot(col=extra_dims[0], **kwargs)\n    else:\n        ax = da.plot(**kwargs)\n    return ax\n
"},{"location":"api/matching/","title":"Matching","text":"

A Comparer/ComparerCollection can be created in one of the following ways:

  • match() - match observations and model results
  • from_matched() - create a Comparer from matched data
  • from_config() - create a ComparerCollection from a config file
"},{"location":"api/matching/#modelskill.match","title":"modelskill.match","text":"
match(obs, mod, *, obs_item=None, mod_item=None, gtype=None, max_model_gap=None, spatial_method=None)\n

Match observation and model result data in space and time

NOTE: In case of multiple model results with different time coverage, only the overlapping time period will be used! (intersection)

NOTE: In case of multiple observations, multiple models can only be matched if they are all of SpatialField type, e.g. DfsuModelResult or GridModelResult.

Parameters:

Name Type Description Default obs (str, Path, DataFrame, Observation, Sequence[Observation])

Observation(s) to be compared

required mod (str, Path, DataFrame, ModelResult, Sequence[ModelResult])

Model result(s) to be compared

required obs_item int or str

observation item if obs is a file/dataframe, by default None

None mod_item (int, str)

model item if mod is a file/dataframe, by default None

None gtype (str, optional)

Geometry type of the model result (if mod is a file/dataframe). If not specified, it will be guessed.

None max_model_gap (float, optional)

Maximum time gap (s) in the model result (e.g. for event-based model results), by default None

None spatial_method str

For Dfsu- and GridModelResult, spatial interpolation/selection method.

  • For DfsuModelResult, one of: 'contained' (=isel), 'nearest', 'inverse_distance' (with 5 nearest points), by default \"inverse_distance\".
  • For GridModelResult, passed to xarray.interp() as method argument, by default 'linear'.
None

Returns:

Type Description Comparer

In case of a single observation

ComparerCollection

In case of multiple observations

See Also

from_matched Create a Comparer from observation and model results that are already matched

Source code in modelskill/matching.py
def match(\n    obs,\n    mod,\n    *,\n    obs_item=None,\n    mod_item=None,\n    gtype=None,\n    max_model_gap=None,\n    spatial_method: Optional[str] = None,\n):\n    \"\"\"Match observation and model result data in space and time\n\n    NOTE: In case of multiple model results with different time coverage,\n    only the _overlapping_ time period will be used! (intersection)\n\n    NOTE: In case of multiple observations, multiple models can _only_\n    be matched if they are _all_ of SpatialField type, e.g. DfsuModelResult\n    or GridModelResult.\n\n    Parameters\n    ----------\n    obs : (str, Path, pd.DataFrame, Observation, Sequence[Observation])\n        Observation(s) to be compared\n    mod : (str, Path, pd.DataFrame, ModelResult, Sequence[ModelResult])\n        Model result(s) to be compared\n    obs_item : int or str, optional\n        observation item if obs is a file/dataframe, by default None\n    mod_item : (int, str), optional\n        model item if mod is a file/dataframe, by default None\n    gtype : (str, optional)\n        Geometry type of the model result (if mod is a file/dataframe).\n        If not specified, it will be guessed.\n    max_model_gap : (float, optional)\n        Maximum time gap (s) in the model result (e.g. for event-based\n        model results), by default None\n    spatial_method : str, optional\n        For Dfsu- and GridModelResult, spatial interpolation/selection method.\n\n        - For DfsuModelResult, one of: 'contained' (=isel), 'nearest',\n        'inverse_distance' (with 5 nearest points), by default \"inverse_distance\".\n        - For GridModelResult, passed to xarray.interp() as method argument,\n        by default 'linear'.\n\n    Returns\n    -------\n    Comparer\n        In case of a single observation\n    ComparerCollection\n        In case of multiple observations\n\n    See Also\n    --------\n    [from_matched][modelskill.from_matched]\n        Create a Comparer from observation and model results that are already matched\n    \"\"\"\n    if isinstance(obs, get_args(ObsInputType)):\n        return _single_obs_compare(\n            obs,\n            mod,\n            obs_item=obs_item,\n            mod_item=mod_item,\n            gtype=gtype,\n            max_model_gap=max_model_gap,\n            spatial_method=spatial_method,\n        )\n\n    if isinstance(obs, Collection):\n        assert all(isinstance(o, get_args(ObsInputType)) for o in obs)\n    else:\n        raise TypeError(\n            f\"Obs is not the correct type: it is {type(obs)}. Check the order of the arguments (obs, mod).\"\n        )\n\n    if len(obs) > 1 and isinstance(mod, Collection) and len(mod) > 1:\n        if not all(isinstance(m, (DfsuModelResult, GridModelResult)) for m in mod):\n            raise ValueError(\n                \"\"\"\n                In case of multiple observations, multiple models can _only_ \n                be matched if they are _all_ of SpatialField type, e.g. DfsuModelResult \n                or GridModelResult. \n\n                If you want match multiple point observations with multiple point model results, \n                please match one observation at a time and then create a collection of these \n                using modelskill.ComparerCollection(cmp_list) afterwards. The same applies to track data.\n                \"\"\"\n            )\n\n    clist = [\n        _single_obs_compare(\n            o,\n            mod,\n            obs_item=obs_item,\n            mod_item=mod_item,\n            gtype=gtype,\n            max_model_gap=max_model_gap,\n            spatial_method=spatial_method,\n        )\n        for o in obs\n    ]\n\n    return ComparerCollection(clist)\n
"},{"location":"api/matching/#modelskill.from_matched","title":"modelskill.from_matched","text":"
from_matched(data, *, obs_item=0, mod_items=None, aux_items=None, quantity=None, name=None, weight=1.0, x=None, y=None, z=None, x_item=None, y_item=None)\n

Create a Comparer from observation and model results that are already matched (aligned)

Parameters:

Name Type Description Default data [DataFrame, str, Path, Dfs0, Dataset]

DataFrame (or object that can be converted to a DataFrame e.g. dfs0) with columns obs_item, mod_items, aux_items

required obs_item [str, int]

Name or index of observation item, by default first item

0 mod_items Iterable[str, int]

Names or indicies of model items, if None all remaining columns are model items, by default None

None aux_items Iterable[str, int]

Names or indicies of auxiliary items, by default None

None quantity Quantity

Quantity of the observation and model results, by default Quantity(name=\"Undefined\", unit=\"Undefined\")

None name str

Name of the comparer, by default None (will be set to obs_item)

None x float

x-coordinate of observation, by default None

None y float

y-coordinate of observation, by default None

None z float

z-coordinate of observation, by default None

None x_item str | int | None

Name of x item, only relevant for track data

None y_item str | int | None

Name of y item, only relevant for track data

None

Examples:

>>> import pandas as pd\n>>> import modelskill as ms\n>>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1]}, index=pd.date_range('2010-01-01', periods=3))\n>>> cmp = ms.from_matched(df, obs_item='stn_a') # remaining columns are model results\n>>> cmp\n<Comparer>\nQuantity: Undefined [Undefined]\nObservation: stn_a, n_points=3\n Model: local, rmse=0.100\n>>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1], 'global': [1.2,2.2,3.2], 'nonsense':[1,2,3]}, index=pd.date_range('2010-01-01', periods=3))\n>>> cmp = ms.from_matched(df, obs_item='stn_a', mod_items=['local', 'global'])\n>>> cmp\n<Comparer>\nQuantity: Undefined [Undefined]\nObservation: stn_a, n_points=3\n    Model: local, rmse=0.100\n    Model: global, rmse=0.200\n
Source code in modelskill/matching.py
def from_matched(\n    data: Union[str, Path, pd.DataFrame, mikeio.Dfs0, mikeio.Dataset],\n    *,\n    obs_item: str | int | None = 0,\n    mod_items: Optional[Iterable[str | int]] = None,\n    aux_items: Optional[Iterable[str | int]] = None,\n    quantity: Optional[Quantity] = None,\n    name: Optional[str] = None,\n    weight: float = 1.0,\n    x: Optional[float] = None,\n    y: Optional[float] = None,\n    z: Optional[float] = None,\n    x_item: str | int | None = None,\n    y_item: str | int | None = None,\n) -> Comparer:\n    \"\"\"Create a Comparer from observation and model results that are already matched (aligned)\n    Parameters\n    ----------\n    data : [pd.DataFrame, str, Path, mikeio.Dfs0, mikeio.Dataset]\n        DataFrame (or object that can be converted to a DataFrame e.g. dfs0)\n        with columns obs_item, mod_items, aux_items\n    obs_item : [str, int], optional\n        Name or index of observation item, by default first item\n    mod_items : Iterable[str, int], optional\n        Names or indicies of model items, if None all remaining columns are model items, by default None\n    aux_items : Iterable[str, int], optional\n        Names or indicies of auxiliary items, by default None\n    quantity : Quantity, optional\n        Quantity of the observation and model results, by default Quantity(name=\"Undefined\", unit=\"Undefined\")\n    name : str, optional\n        Name of the comparer, by default None (will be set to obs_item)\n    x : float, optional\n        x-coordinate of observation, by default None\n    y : float, optional\n        y-coordinate of observation, by default None\n    z : float, optional\n        z-coordinate of observation, by default None\n    x_item: [str, int], optional,\n        Name of x item, only relevant for track data\n    y_item: [str, int], optional\n        Name of y item, only relevant for track data\n\n    Examples\n    --------\n    >>> import pandas as pd\n    >>> import modelskill as ms\n    >>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1]}, index=pd.date_range('2010-01-01', periods=3))\n    >>> cmp = ms.from_matched(df, obs_item='stn_a') # remaining columns are model results\n    >>> cmp\n    <Comparer>\n    Quantity: Undefined [Undefined]\n    Observation: stn_a, n_points=3\n     Model: local, rmse=0.100\n    >>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1], 'global': [1.2,2.2,3.2], 'nonsense':[1,2,3]}, index=pd.date_range('2010-01-01', periods=3))\n    >>> cmp = ms.from_matched(df, obs_item='stn_a', mod_items=['local', 'global'])\n    >>> cmp\n    <Comparer>\n    Quantity: Undefined [Undefined]\n    Observation: stn_a, n_points=3\n        Model: local, rmse=0.100\n        Model: global, rmse=0.200\n    \"\"\"\n    # pre-process if dfs0, or mikeio.Dataset\n    if isinstance(data, (str, Path)):\n        if Path(data).suffix != \".dfs0\":\n            raise ValueError(f\"File must be a dfs0 file, not {Path(data).suffix}\")\n        data = mikeio.read(data)  # now mikeio.Dataset\n    elif isinstance(data, mikeio.Dfs0):\n        data = data.read()  # now mikeio.Dataset\n    if isinstance(data, mikeio.Dataset):\n        assert len(data.shape) == 1, \"Only 0-dimensional data are supported\"\n        if quantity is None:\n            quantity = Quantity.from_mikeio_iteminfo(data[obs_item].item)\n        data = data.to_dataframe()\n\n    cmp = Comparer.from_matched_data(\n        data,\n        obs_item=obs_item,\n        mod_items=mod_items,\n        aux_items=aux_items,\n        name=name,\n        weight=weight,\n        x=x,\n        y=y,\n        z=z,\n        x_item=x_item,\n        y_item=y_item,\n        quantity=quantity,\n    )\n\n    return cmp\n
"},{"location":"api/matching/#modelskill.from_config","title":"modelskill.from_config","text":"
from_config(conf, *, relative_path=True)\n

Load ComparerCollection from a config file (or dict)

Parameters:

Name Type Description Default conf Union[str, Path, dict]

path to config file or dict with configuration

required relative_path

True: file paths are relative to configuration file, False: file paths are absolute (relative to the current directory), by default True

True

Returns:

Type Description ComparerCollection

A ComparerCollection object from the given configuration

Examples:

>>> import modelskill as ms\n>>> cc = ms.from_config('Oresund.yml')\n
Source code in modelskill/configuration.py
def from_config(\n    conf: Union[dict, str, Path], *, relative_path=True\n) -> ComparerCollection:\n    \"\"\"Load ComparerCollection from a config file (or dict)\n\n    Parameters\n    ----------\n    conf : Union[str, Path, dict]\n        path to config file or dict with configuration\n    relative_path: bool, optional\n        True: file paths are relative to configuration file,\n        False: file paths are absolute (relative to the current directory),\n        by default True\n\n    Returns\n    -------\n    ComparerCollection\n        A ComparerCollection object from the given configuration\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.from_config('Oresund.yml')\n    \"\"\"\n    if isinstance(conf, (str, Path)):\n        p = Path(conf)\n        ext = p.suffix\n        dirname = Path(str(p.parents[0]))\n        if (ext == \".yml\") or (ext == \".yaml\") or (ext == \".conf\"):\n            conf = _yaml_to_dict(p)\n        elif \"xls\" in ext:\n            conf = _excel_to_dict(p)\n        else:\n            raise ValueError(\"Filename extension not supported! Use .yml or .xlsx\")\n    else:\n        dirname = Path(\".\")\n\n    assert isinstance(conf, dict)\n    modelresults = []\n    for name, mr_dict in conf[\"modelresults\"].items():\n        if not mr_dict.get(\"include\", True):\n            continue\n        fp = Path(mr_dict[\"filename\"])\n        if relative_path:\n            fp = dirname / fp\n\n        item = mr_dict.get(\"item\")\n        mr = model_result(fp, name=name, item=item)\n        modelresults.append(mr)\n\n    observations = []\n    for name, data in conf[\"observations\"].items():\n        if data.pop(\"include\", True):\n            data[\"name\"] = name\n            observations.append(_obs_from_dict(name, data, dirname, relative_path))\n\n    return match(obs=observations, mod=modelresults)\n
"},{"location":"api/metrics/","title":"Metrics","text":""},{"location":"api/metrics/#modelskill.metrics","title":"modelskill.metrics","text":"

The metrics module contains different skill metrics for evaluating the difference between a model and an observation.

  • bias
  • max_error
  • root_mean_squared_error (rmse)
  • urmse
  • mean_absolute_error (mae)
  • mean_absolute_percentage_error (mape)
  • kling_gupta_efficiency (kge)
  • nash_sutcliffe_efficiency (nse)
  • r2 (r2=nse)
  • model_efficiency_factor (mef)
  • wilmott
  • scatter_index (si)
  • scatter_index2
  • corrcoef (cc)
  • spearmanr (rho)
  • lin_slope
  • hit_ratio
  • explained_variance (ev)
  • peak_ratio (pr)

Circular metrics (for directional data with units in degrees):

  • c_bias
  • c_max_error
  • c_mean_absolute_error (c_mae)
  • c_root_mean_squared_error (c_rmse)
  • c_unbiased_root_mean_squared_error (c_urmse)

The names in parentheses are shorthand aliases for the different metrics.

Examples:

>>> obs = np.array([0.3, 2.1, -1.0])\n>>> mod = np.array([0.0, 2.3, 1.0])\n>>> bias(obs, mod)\nnp.float64(0.6333333333333332)\n>>> max_error(obs, mod)\nnp.float64(2.0)\n>>> rmse(obs, mod)\nnp.float64(1.173314393786536)\n>>> urmse(obs, mod)\nnp.float64(0.9877021593352702)\n>>> mae(obs, mod)\nnp.float64(0.8333333333333331)\n>>> mape(obs, mod)\nnp.float64(103.17460317460316)\n>>> nse(obs, mod)\nnp.float64(0.14786795048143053)\n>>> r2(obs, mod)\nnp.float64(0.14786795048143053)\n>>> mef(obs, mod)\nnp.float64(0.9231099877688299)\n>>> si(obs, mod)\nnp.float64(0.8715019052958266)\n>>> spearmanr(obs, mod)\nnp.float64(0.5)\n>>> willmott(obs, mod)\nnp.float64(0.7484604452865941)\n>>> hit_ratio(obs, mod, a=0.5)\nnp.float64(0.6666666666666666)\n>>> ev(obs, mod)\nnp.float64(0.39614855570839064)\n
"},{"location":"api/metrics/#modelskill.metrics.add_metric","title":"add_metric","text":"
add_metric(metric, has_units=False)\n

Adds a metric to the metric list. Useful for custom metrics.

Some metrics are dimensionless, others have the same dimension as the observations.

Parameters:

Name Type Description Default metric str or callable

Metric name or function

required has_units bool

True if metric has a dimension, False otherwise. Default:False

False

Returns:

Type Description None

Examples:

>>> add_metric(hit_ratio)\n>>> add_metric(rmse,True)\n
Source code in modelskill/metrics.py
def add_metric(metric: Callable, has_units: bool = False) -> None:\n    \"\"\"Adds a metric to the metric list. Useful for custom metrics.\n\n    Some metrics are dimensionless, others have the same dimension as the observations.\n\n    Parameters\n    ----------\n    metric : str or callable\n        Metric name or function\n    has_units : bool\n        True if metric has a dimension, False otherwise. Default:False\n\n    Returns\n    -------\n    None\n\n    Examples\n    --------\n    >>> add_metric(hit_ratio)\n    >>> add_metric(rmse,True)\n    \"\"\"\n    defined_metrics.add(metric.__name__)\n    if has_units:\n        METRICS_WITH_DIMENSION.add(metric.__name__)\n\n    # add the function to the module\n    setattr(sys.modules[__name__], metric.__name__, metric)\n
"},{"location":"api/metrics/#modelskill.metrics.bias","title":"bias","text":"
bias(obs, model)\n

Bias (mean error)

\\[ bias=\\frac{1}{n}\\sum_{i=1}^n (model_i - obs_i) \\]

Range: \\((-\\infty, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def bias(obs, model) -> Any:\n    r\"\"\"Bias (mean error)\n\n    $$\n    bias=\\frac{1}{n}\\sum_{i=1}^n (model_i - obs_i)\n    $$\n\n    Range: $(-\\infty, \\infty)$; Best: 0\n    \"\"\"\n\n    assert obs.size == model.size\n    return np.mean(model - obs)\n
"},{"location":"api/metrics/#modelskill.metrics.c_bias","title":"c_bias","text":"
c_bias(obs, model)\n

Circular bias (mean error)

Parameters:

Name Type Description Default obs ndarray

Observation in degrees (0, 360)

required model ndarray

Model in degrees (0, 360)

required Range required

Returns:

Type Description float

Circular bias

Examples:

>>> obs = np.array([10., 355., 170.])\n>>> mod = np.array([20., 5., -180.])\n>>> c_bias(obs, mod)\nnp.float64(10.0)\n
Source code in modelskill/metrics.py
def c_bias(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"Circular bias (mean error)\n\n    Parameters\n    ----------\n    obs : np.ndarray\n        Observation in degrees (0, 360)\n    model : np.ndarray\n        Model in degrees (0, 360)\n\n    Range: [-180., 180.]; Best: 0.\n\n    Returns\n    -------\n    float\n        Circular bias\n\n    Examples\n    --------\n    >>> obs = np.array([10., 355., 170.])\n    >>> mod = np.array([20., 5., -180.])\n    >>> c_bias(obs, mod)\n    np.float64(10.0)\n    \"\"\"\n    from scipy.stats import circmean\n\n    resi = _c_residual(obs, model)\n    return circmean(resi, low=-180.0, high=180.0)\n
"},{"location":"api/metrics/#modelskill.metrics.c_mae","title":"c_mae","text":"
c_mae(obs, model, weights=None)\n

alias for circular mean absolute error

Source code in modelskill/metrics.py
def c_mae(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"alias for circular mean absolute error\"\"\"\n    return c_mean_absolute_error(obs, model, weights)\n
"},{"location":"api/metrics/#modelskill.metrics.c_max_error","title":"c_max_error","text":"
c_max_error(obs, model)\n

Circular max error

Parameters:

Name Type Description Default obs ndarray

Observation in degrees (0, 360)

required model ndarray

Model in degrees (0, 360)

required Range required

Returns:

Type Description float

Circular max error

Examples:

>>> obs = np.array([10., 350., 10.])\n>>> mod = np.array([20., 10., 350.])\n>>> c_max_error(obs, mod)\nnp.float64(20.0)\n
Source code in modelskill/metrics.py
def c_max_error(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"Circular max error\n\n    Parameters\n    ----------\n    obs : np.ndarray\n        Observation in degrees (0, 360)\n    model : np.ndarray\n        Model in degrees (0, 360)\n\n    Range: :math:`[0, \\\\infty)`; Best: 0\n\n    Returns\n    -------\n    float\n        Circular max error\n\n    Examples\n    --------\n    >>> obs = np.array([10., 350., 10.])\n    >>> mod = np.array([20., 10., 350.])\n    >>> c_max_error(obs, mod)\n    np.float64(20.0)\n    \"\"\"\n\n    resi = _c_residual(obs, model)\n\n    # Compute the absolute differences and then\n    # find the shortest distance between angles\n    abs_diffs = np.abs(resi)\n    circular_diffs = np.minimum(abs_diffs, 360 - abs_diffs)\n    return np.max(circular_diffs)\n
"},{"location":"api/metrics/#modelskill.metrics.c_mean_absolute_error","title":"c_mean_absolute_error","text":"
c_mean_absolute_error(obs, model, weights=None)\n

Circular mean absolute error

Parameters:

Name Type Description Default obs ndarray

Observation in degrees (0, 360)

required model ndarray

Model in degrees (0, 360)

required weights ndarray

Weights, by default None

None Range required

Returns:

Type Description float

Circular mean absolute error

Source code in modelskill/metrics.py
def c_mean_absolute_error(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"Circular mean absolute error\n\n    Parameters\n    ----------\n    obs : np.ndarray\n        Observation in degrees (0, 360)\n    model : np.ndarray\n        Model in degrees (0, 360)\n    weights : np.ndarray, optional\n        Weights, by default None\n\n    Range: [0, 180]; Best: 0\n\n    Returns\n    -------\n    float\n        Circular mean absolute error\n    \"\"\"\n\n    resi = _c_residual(obs, model)\n    return np.average(np.abs(resi), weights=weights)\n
"},{"location":"api/metrics/#modelskill.metrics.c_rmse","title":"c_rmse","text":"
c_rmse(obs, model, weights=None)\n

alias for circular root mean squared error

Source code in modelskill/metrics.py
def c_rmse(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"alias for circular root mean squared error\"\"\"\n    return c_root_mean_squared_error(obs, model, weights)\n
"},{"location":"api/metrics/#modelskill.metrics.c_root_mean_squared_error","title":"c_root_mean_squared_error","text":"
c_root_mean_squared_error(obs, model, weights=None)\n

Circular root mean squared error

Parameters:

Name Type Description Default obs ndarray

Observation in degrees (0, 360)

required model ndarray

Model in degrees (0, 360)

required weights ndarray

Weights, by default None

None Range required

Returns:

Type Description float

Circular root mean squared error

Source code in modelskill/metrics.py
def c_root_mean_squared_error(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"Circular root mean squared error\n\n    Parameters\n    ----------\n    obs : np.ndarray\n        Observation in degrees (0, 360)\n    model : np.ndarray\n        Model in degrees (0, 360)\n    weights : np.ndarray, optional\n        Weights, by default None\n\n    Range: [0, 180]; Best: 0\n\n    Returns\n    -------\n    float\n        Circular root mean squared error\n    \"\"\"\n    residual = _c_residual(obs, model)\n    return np.sqrt(np.average(residual**2, weights=weights))\n
"},{"location":"api/metrics/#modelskill.metrics.c_unbiased_root_mean_squared_error","title":"c_unbiased_root_mean_squared_error","text":"
c_unbiased_root_mean_squared_error(obs, model, weights=None)\n

Circular unbiased root mean squared error

Parameters:

Name Type Description Default obs ndarray

Observation in degrees (0, 360)

required model ndarray

Model in degrees (0, 360)

required weights ndarray

Weights, by default None

None Range required

Returns:

Type Description float

Circular unbiased root mean squared error

Source code in modelskill/metrics.py
def c_unbiased_root_mean_squared_error(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"Circular unbiased root mean squared error\n\n    Parameters\n    ----------\n    obs : np.ndarray\n        Observation in degrees (0, 360)\n    model : np.ndarray\n        Model in degrees (0, 360)\n    weights : np.ndarray, optional\n        Weights, by default None\n\n    Range: [0, 180]; Best: 0\n\n    Returns\n    -------\n    float\n        Circular unbiased root mean squared error\n    \"\"\"\n    from scipy.stats import circmean\n\n    residual = _c_residual(obs, model)\n    residual = residual - circmean(residual, low=-180.0, high=180.0)\n    return np.sqrt(np.average(residual**2, weights=weights))\n
"},{"location":"api/metrics/#modelskill.metrics.c_urmse","title":"c_urmse","text":"
c_urmse(obs, model, weights=None)\n

alias for circular unbiased root mean squared error

Source code in modelskill/metrics.py
def c_urmse(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"alias for circular unbiased root mean squared error\"\"\"\n    return c_unbiased_root_mean_squared_error(obs, model, weights)\n
"},{"location":"api/metrics/#modelskill.metrics.cc","title":"cc","text":"
cc(obs, model, weights=None)\n

alias for corrcoef

Source code in modelskill/metrics.py
def cc(obs: np.ndarray, model: np.ndarray, weights=None) -> Any:\n    \"\"\"alias for corrcoef\"\"\"\n    return corrcoef(obs, model, weights)\n
"},{"location":"api/metrics/#modelskill.metrics.corrcoef","title":"corrcoef","text":"
corrcoef(obs, model, weights=None)\n

Pearson\u2019s Correlation coefficient (CC)

\\[ CC = \\frac{\\sum_{i=1}^n (model_i - \\overline{model})(obs_i - \\overline{obs}) } {\\sqrt{\\sum_{i=1}^n (model_i - \\overline{model})^2} \\sqrt{\\sum_{i=1}^n (obs_i - \\overline{obs})^2} } \\]

Range: [-1, 1]; Best: 1

See Also

spearmanr np.corrcoef

Source code in modelskill/metrics.py
def corrcoef(obs, model, weights=None) -> Any:\n    r\"\"\"Pearson\u2019s Correlation coefficient (CC)\n\n    $$\n    CC = \\frac{\\sum_{i=1}^n (model_i - \\overline{model})(obs_i - \\overline{obs}) }\n                   {\\sqrt{\\sum_{i=1}^n (model_i - \\overline{model})^2}\n                    \\sqrt{\\sum_{i=1}^n (obs_i - \\overline{obs})^2} }\n    $$\n\n    Range: [-1, 1]; Best: 1\n\n    See Also\n    --------\n    spearmanr\n    np.corrcoef\n    \"\"\"\n    assert obs.size == model.size\n    if len(obs) <= 1:\n        return np.nan\n\n    if weights is None:\n        return np.corrcoef(obs, model)[0, 1]\n    else:\n        C = np.cov(obs, model, fweights=weights)\n        return C[0, 1] / np.sqrt(C[0, 0] * C[1, 1])\n
"},{"location":"api/metrics/#modelskill.metrics.ev","title":"ev","text":"
ev(obs, model)\n

alias for explained_variance

Source code in modelskill/metrics.py
def ev(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for explained_variance\"\"\"\n    assert obs.size == model.size\n    return explained_variance(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.explained_variance","title":"explained_variance","text":"
explained_variance(obs, model)\n

EV: Explained variance

EV is the explained variance and measures the proportion [0 - 1] to which the model accounts for the variation (dispersion) of the observations.

In cases with no bias, EV is equal to r2

\\[ \\frac{ \\sum_{i=1}^n (obs_i - \\overline{obs})^2 - \\sum_{i=1}^n \\left( (obs_i - \\overline{obs}) - (model_i - \\overline{model}) \\right)^2}{\\sum_{i=1}^n (obs_i - \\overline{obs})^2} \\]

Range: [0, 1]; Best: 1

See Also

r2

Source code in modelskill/metrics.py
def explained_variance(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"EV: Explained variance\n\n     EV is the explained variance and measures the proportion\n     [0 - 1] to which the model accounts for the variation\n     (dispersion) of the observations.\n\n     In cases with no bias, EV is equal to r2\n\n    $$\n    \\frac{ \\sum_{i=1}^n (obs_i - \\overline{obs})^2 -\n    \\sum_{i=1}^n \\left( (obs_i - \\overline{obs}) -\n    (model_i - \\overline{model}) \\right)^2}{\\sum_{i=1}^n\n    (obs_i - \\overline{obs})^2}\n    $$\n\n    Range: [0, 1]; Best: 1\n\n    See Also\n    --------\n    r2\n    \"\"\"\n\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n\n    nominator: Any = np.sum((obs - obs.mean()) ** 2) - np.sum(  # type: ignore\n        ((obs - obs.mean()) - (model - model.mean())) ** 2\n    )\n    denominator: Any = np.sum((obs - obs.mean()) ** 2)\n\n    return nominator / denominator\n
"},{"location":"api/metrics/#modelskill.metrics.hit_ratio","title":"hit_ratio","text":"
hit_ratio(obs, model, a=0.1)\n

Fraction within obs \u00b1 acceptable deviation

\\[ HR = \\frac{1}{n}\\sum_{i=1}^n I_{|(model_i - obs_i)|} < a \\]

Range: [0, 1]; Best: 1

Examples:

>>> obs = np.array([1.0, 1.1, 1.2, 1.3, 1.4, 1.4, 1.3])\n>>> model = np.array([1.02, 1.16, 1.3, 1.38, 1.49, 1.45, 1.32])\n>>> hit_ratio(obs, model, a=0.05)\nnp.float64(0.2857142857142857)\n>>> hit_ratio(obs, model, a=0.1)\nnp.float64(0.8571428571428571)\n>>> hit_ratio(obs, model, a=0.15)\nnp.float64(1.0)\n
Source code in modelskill/metrics.py
def hit_ratio(obs: np.ndarray, model: np.ndarray, a=0.1) -> Any:\n    r\"\"\"Fraction within obs \u00b1 acceptable deviation\n\n    $$\n    HR = \\frac{1}{n}\\sum_{i=1}^n I_{|(model_i - obs_i)|} < a\n    $$\n\n    Range: [0, 1]; Best: 1\n\n    Examples\n    --------\n    >>> obs = np.array([1.0, 1.1, 1.2, 1.3, 1.4, 1.4, 1.3])\n    >>> model = np.array([1.02, 1.16, 1.3, 1.38, 1.49, 1.45, 1.32])\n    >>> hit_ratio(obs, model, a=0.05)\n    np.float64(0.2857142857142857)\n    >>> hit_ratio(obs, model, a=0.1)\n    np.float64(0.8571428571428571)\n    >>> hit_ratio(obs, model, a=0.15)\n    np.float64(1.0)\n    \"\"\"\n    assert obs.size == model.size\n\n    return np.mean(np.abs(obs - model) < a)\n
"},{"location":"api/metrics/#modelskill.metrics.kge","title":"kge","text":"
kge(obs, model)\n

alias for kling_gupta_efficiency

Source code in modelskill/metrics.py
def kge(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for kling_gupta_efficiency\"\"\"\n    return kling_gupta_efficiency(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.kling_gupta_efficiency","title":"kling_gupta_efficiency","text":"
kling_gupta_efficiency(obs, model)\n

Kling-Gupta Efficiency (KGE)

\\[ KGE = 1 - \\sqrt{(r-1)^2 + \\left(\\frac{\\sigma_{mod}}{\\sigma_{obs}} - 1\\right)^2 + \\left(\\frac{\\mu_{mod}}{\\mu_{obs}} - 1\\right)^2 } \\]

where \\(r\\) is the pearson correlation coefficient, \\(\\mu_{obs},\\mu_{mod}\\) and \\(\\sigma_{obs},\\sigma_{mod}\\) is the mean and standard deviation of observations and model.

Range: \\((-\\infty, 1]\\); Best: 1

References

Gupta, H. V., Kling, H., Yilmaz, K. K. and Martinez, G. F., (2009), Decomposition of the mean squared error and NSE performance criteria: Implications for improving hydrological modelling, J. Hydrol., 377(1-2), 80-91 https://doi.org/10.1016/j.jhydrol.2009.08.003

Knoben, W. J. M., Freer, J. E., and Woods, R. A. (2019) Technical note: Inherent benchmark or not? Comparing Nash\u2013Sutcliffe and Kling\u2013Gupta efficiency scores, Hydrol. Earth Syst. Sci., 23, 4323-4331 https://doi.org/10.5194/hess-23-4323-2019

Source code in modelskill/metrics.py
def kling_gupta_efficiency(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"\n    Kling-Gupta Efficiency (KGE)\n\n    $$\n    KGE = 1 - \\sqrt{(r-1)^2 + \\left(\\frac{\\sigma_{mod}}{\\sigma_{obs}} - 1\\right)^2 +\n                                \\left(\\frac{\\mu_{mod}}{\\mu_{obs}} - 1\\right)^2 }\n    $$\n\n    where $r$ is the pearson correlation coefficient, $\\mu_{obs},\\mu_{mod}$ and $\\sigma_{obs},\\sigma_{mod}$ is the mean and standard deviation of observations and model.\n\n    Range: $(-\\infty, 1]$; Best: 1\n\n    References\n    ----------\n    Gupta, H. V., Kling, H., Yilmaz, K. K. and Martinez, G. F., (2009), Decomposition of the mean squared error and NSE performance criteria: Implications for improving hydrological modelling, J. Hydrol., 377(1-2), 80-91 <https://doi.org/10.1016/j.jhydrol.2009.08.003>\n\n    Knoben, W. J. M., Freer, J. E., and Woods, R. A. (2019) Technical note: Inherent benchmark or not? Comparing Nash\u2013Sutcliffe and Kling\u2013Gupta efficiency scores, Hydrol. Earth Syst. Sci., 23, 4323-4331 <https://doi.org/10.5194/hess-23-4323-2019>\n    \"\"\"\n    assert obs.size == model.size\n\n    if len(obs) == 0 or obs.std() == 0.0:\n        return np.nan\n\n    if model.std() > 1e-12:\n        r = corrcoef(obs, model)\n        if np.isnan(r):\n            r = 0.0\n    else:\n        r = 0.0\n\n    res = 1 - np.sqrt(\n        (r - 1) ** 2\n        + (model.std() / obs.std() - 1.0) ** 2\n        + (model.mean() / obs.mean() - 1.0) ** 2\n    )\n\n    return res\n
"},{"location":"api/metrics/#modelskill.metrics.lin_slope","title":"lin_slope","text":"
lin_slope(obs, model, reg_method='ols')\n

Slope of the regression line.

\\[ slope = \\frac{\\sum_{i=1}^n (model_i - \\overline {model})(obs_i - \\overline {obs})} {\\sum_{i=1}^n (obs_i - \\overline {obs})^2} \\]

Range: \\((-\\infty, \\infty )\\); Best: 1

Source code in modelskill/metrics.py
def lin_slope(obs: np.ndarray, model: np.ndarray, reg_method=\"ols\") -> Any:\n    r\"\"\"Slope of the regression line.\n\n    $$\n    slope = \\frac{\\sum_{i=1}^n (model_i - \\overline {model})(obs_i - \\overline {obs})}\n                    {\\sum_{i=1}^n (obs_i - \\overline {obs})^2}\n    $$\n\n    Range: $(-\\infty, \\infty )$; Best: 1\n    \"\"\"\n    assert obs.size == model.size\n    return _linear_regression(obs, model, reg_method)[0]\n
"},{"location":"api/metrics/#modelskill.metrics.mae","title":"mae","text":"
mae(obs, model, weights=None)\n

alias for mean_absolute_error

Source code in modelskill/metrics.py
def mae(\n    obs: np.ndarray, model: np.ndarray, weights: Optional[np.ndarray] = None\n) -> Any:\n    \"\"\"alias for mean_absolute_error\"\"\"\n    assert obs.size == model.size\n    return mean_absolute_error(obs, model, weights)\n
"},{"location":"api/metrics/#modelskill.metrics.mape","title":"mape","text":"
mape(obs, model)\n

alias for mean_absolute_percentage_error

Source code in modelskill/metrics.py
def mape(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for mean_absolute_percentage_error\"\"\"\n    return mean_absolute_percentage_error(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.max_error","title":"max_error","text":"
max_error(obs, model)\n

Max (absolute) error

\\[ max_{error} = max(|model_i - obs_i|) \\]

Range: \\([0, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def max_error(obs, model) -> Any:\n    r\"\"\"Max (absolute) error\n\n    $$\n    max_{error} = max(|model_i - obs_i|)\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n    \"\"\"\n\n    assert obs.size == model.size\n    return np.max(np.abs(model - obs))\n
"},{"location":"api/metrics/#modelskill.metrics.mean_absolute_error","title":"mean_absolute_error","text":"
mean_absolute_error(obs, model, weights=None)\n

Mean Absolute Error (MAE)

\\[ MAE=\\frac{1}{n}\\sum_{i=1}^n|model_i - obs_i| \\]

Range: \\([0, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def mean_absolute_error(\n    obs: np.ndarray, model: np.ndarray, weights: Optional[np.ndarray] = None\n) -> Any:\n    r\"\"\"Mean Absolute Error (MAE)\n\n    $$\n    MAE=\\frac{1}{n}\\sum_{i=1}^n|model_i - obs_i|\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n    \"\"\"\n    assert obs.size == model.size\n\n    error = np.average(np.abs(model - obs), weights=weights)\n\n    return error\n
"},{"location":"api/metrics/#modelskill.metrics.mean_absolute_percentage_error","title":"mean_absolute_percentage_error","text":"
mean_absolute_percentage_error(obs, model)\n

Mean Absolute Percentage Error (MAPE)

\\[ MAPE=\\frac{1}{n}\\sum_{i=1}^n\\frac{|model_i - obs_i|}{obs_i}*100 \\]

Range: \\([0, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def mean_absolute_percentage_error(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Mean Absolute Percentage Error (MAPE)\n\n    $$\n    MAPE=\\frac{1}{n}\\sum_{i=1}^n\\frac{|model_i - obs_i|}{obs_i}*100\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n    \"\"\"\n\n    assert obs.size == model.size\n\n    if len(obs) == 0:\n        return np.nan\n    if np.any(obs == 0.0):\n        warnings.warn(\"Observation is zero, consider to use another metric than MAPE\")\n        return np.nan  # TODO is it better to return a large value +inf than NaN?\n\n    return np.mean(np.abs((obs - model) / obs)) * 100\n
"},{"location":"api/metrics/#modelskill.metrics.mef","title":"mef","text":"
mef(obs, model)\n

alias for model_efficiency_factor

Source code in modelskill/metrics.py
def mef(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for model_efficiency_factor\"\"\"\n    return model_efficiency_factor(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.metric_has_units","title":"metric_has_units","text":"
metric_has_units(metric)\n

Check if a metric has units (dimension).

Some metrics are dimensionless, others have the same dimension as the observations.

Parameters:

Name Type Description Default metric str or callable

Metric name or function

required

Returns:

Type Description bool

True if metric has a dimension, False otherwise

Examples:

>>> metric_has_units(\"rmse\")\nTrue\n>>> metric_has_units(\"kge\")\nFalse\n
Source code in modelskill/metrics.py
def metric_has_units(metric: Union[str, Callable]) -> bool:\n    \"\"\"Check if a metric has units (dimension).\n\n    Some metrics are dimensionless, others have the same dimension as the observations.\n\n    Parameters\n    ----------\n    metric : str or callable\n        Metric name or function\n\n    Returns\n    -------\n    bool\n        True if metric has a dimension, False otherwise\n\n    Examples\n    --------\n    >>> metric_has_units(\"rmse\")\n    True\n    >>> metric_has_units(\"kge\")\n    False\n    \"\"\"\n    if hasattr(metric, \"__name__\"):\n        name = metric.__name__\n    else:\n        name = metric\n\n    if name not in defined_metrics:\n        raise ValueError(f\"Metric {name} not defined. Choose from {defined_metrics}\")\n\n    return name in METRICS_WITH_DIMENSION\n
"},{"location":"api/metrics/#modelskill.metrics.model_efficiency_factor","title":"model_efficiency_factor","text":"
model_efficiency_factor(obs, model)\n

Model Efficiency Factor (MEF)

Scale independent RMSE, standardized by Stdev of observations

\\[ MEF = \\frac{RMSE}{STDEV}=\\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^n(model_i - obs_i)^2}} {\\sqrt{\\frac{1}{n} \\sum_{i=1}^n(obs_i - \\overline{obs})^2}}=\\sqrt{1-NSE} \\]

Range: \\([0, \\infty)\\); Best: 0

See Also

nash_sutcliffe_efficiency root_mean_squared_error

Source code in modelskill/metrics.py
def model_efficiency_factor(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Model Efficiency Factor (MEF)\n\n    Scale independent RMSE, standardized by Stdev of observations\n\n    $$\n    MEF = \\frac{RMSE}{STDEV}=\\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^n(model_i - obs_i)^2}}\n                                    {\\sqrt{\\frac{1}{n} \\sum_{i=1}^n(obs_i - \\overline{obs})^2}}=\\sqrt{1-NSE}\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n\n    See Also\n    --------\n    nash_sutcliffe_efficiency\n    root_mean_squared_error\n\n    \"\"\"\n    assert obs.size == model.size\n\n    return rmse(obs, model) / obs.std()\n
"},{"location":"api/metrics/#modelskill.metrics.nash_sutcliffe_efficiency","title":"nash_sutcliffe_efficiency","text":"
nash_sutcliffe_efficiency(obs, model)\n

Nash-Sutcliffe Efficiency (NSE)

\\[ NSE = 1 - \\frac {\\sum _{i=1}^{n}\\left(model_{i} - obs_{i}\\right)^{2}} {\\sum_{i=1}^{n}\\left(obs_{i} - {\\overline{obs}}\\right)^{2}} \\]

Range: \\((-\\infty, 1]\\); Best: 1

Note

r2 = nash_sutcliffe_efficiency(nse)

References

Nash, J. E.; Sutcliffe, J. V. (1970). \"River flow forecasting through conceptual models part I \u2014 A discussion of principles\". Journal of Hydrology. 10 (3): 282\u2013290. https://doi.org/10.1016/0022-1694(70)90255-6

Source code in modelskill/metrics.py
def nash_sutcliffe_efficiency(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Nash-Sutcliffe Efficiency (NSE)\n\n    $$\n    NSE = 1 - \\frac {\\sum _{i=1}^{n}\\left(model_{i} - obs_{i}\\right)^{2}}\n                    {\\sum_{i=1}^{n}\\left(obs_{i} - {\\overline{obs}}\\right)^{2}}\n    $$\n\n    Range: $(-\\infty, 1]$; Best: 1\n\n    Note\n    ----\n    r2 = nash_sutcliffe_efficiency(nse)\n\n    References\n    ----------\n    Nash, J. E.; Sutcliffe, J. V. (1970). \"River flow forecasting through conceptual models part I \u2014 A discussion of principles\". Journal of Hydrology. 10 (3): 282\u2013290. <https://doi.org/10.1016/0022-1694(70)90255-6>\n    \"\"\"\n    assert obs.size == model.size\n\n    if len(obs) == 0:\n        return np.nan\n    error = 1 - (np.sum((obs - model) ** 2) / np.sum((obs - np.mean(obs)) ** 2))  # type: ignore\n\n    return error\n
"},{"location":"api/metrics/#modelskill.metrics.nse","title":"nse","text":"
nse(obs, model)\n

alias for nash_sutcliffe_efficiency

Source code in modelskill/metrics.py
def nse(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for nash_sutcliffe_efficiency\"\"\"\n    return nash_sutcliffe_efficiency(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.peak_ratio","title":"peak_ratio","text":"
peak_ratio(obs, model, inter_event_level=0.7, AAP=2, inter_event_time='36h')\n

Peak Ratio

PR is the mean of the largest-N individual ratios of identified peaks in the model / identified peaks in the measurements (N number of events defined by AAP). PR is calculated only for the joint-events, ie, events that ocurr simulateneously within a window +/- 0.5*inter_event_time.

Parameters:

Name Type Description Default inter_event_level float

Inter-event level threshold (default: 0.7).

0.7 AAP Union[int, float]

Average Annual Peaks (ie, Number of peaks per year, on average). (default: 2)

2 inter_event_time str
Maximum time interval between peaks (default: 36 hours).\n
'36h' Range required Source code in modelskill/metrics.py
def peak_ratio(\n    obs: pd.Series,\n    model: np.ndarray,\n    inter_event_level: float = 0.7,\n    AAP: Union[int, float] = 2,\n    inter_event_time: str = \"36h\",\n) -> Any:\n    r\"\"\"Peak Ratio\n\n    PR is the mean of the largest-N individual ratios of identified peaks in the\n    model / identified peaks in the measurements (N number of events defined by AAP). PR is calculated only for the joint-events,\n    ie, events that ocurr simulateneously within a window +/- 0.5*inter_event_time.\n\n    Parameters\n    ----------\n    inter_event_level (float, optional)\n        Inter-event level threshold (default: 0.7).\n    AAP (int or float, optional)\n        Average Annual Peaks (ie, Number of peaks per year, on average). (default: 2)\n    inter_event_time (str, optional)\n            Maximum time interval between peaks (default: 36 hours).\n\n    $$\n    \\frac{\\sum_{i=1}^{N_{joint-peaks}} (\\frac{Peak_{model_i}}{Peak_{obs_i}} )}{N_{joint-peaks}}\n    $$\n\n    Range: $[0, \\infty)$; Best: 1.0\n    \"\"\"\n\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n    assert isinstance(obs.index, pd.DatetimeIndex)\n    time = obs.index\n\n    # Calculate number of years\n    dt_int = time[1:].values - time[0:-1].values\n    dt_int_mode = float(stats.mode(dt_int, keepdims=False)[0]) / 1e9  # in seconds\n    N_years = dt_int_mode / 24 / 3600 / 365.25 * len(time)\n    peak_index, AAP_ = _partial_duration_series(\n        time,\n        obs,\n        inter_event_level=inter_event_level,\n        AAP=AAP,\n        inter_event_time=inter_event_time,\n    )\n    peaks = obs[peak_index]\n    found_peaks_obs = peaks.sort_values(ascending=False)\n\n    peak_index, _ = _partial_duration_series(\n        time,\n        model,\n        inter_event_level=inter_event_level,\n        AAP=AAP,\n        inter_event_time=inter_event_time,\n    )\n    peaks = model[peak_index]\n    found_peaks_mod = peaks.sort_values(ascending=False)\n\n    top_n_peaks = max(1, min(round(AAP_ * N_years), np.sum(peaks)))\n    # Resample~ish, find peaks spread maximum Half the inter event time (if inter event =36, select data paired +/- 18h) (or inter_event) and then select\n    indices_mod = (\n        abs(found_peaks_obs.index.values[:, None] - found_peaks_mod.index.values)\n        < pd.Timedelta(inter_event_time) / 2\n    ).any(axis=0)\n    indices_obs = (\n        abs(found_peaks_mod.index.values[:, None] - found_peaks_obs.index.values)\n        < pd.Timedelta(inter_event_time) / 2\n    ).any(axis=0)\n    # Find intersection (co-existing peaks, still a large number, O(1000s))\n    obs_joint = found_peaks_obs.loc[indices_obs]\n    mod_joint = found_peaks_mod.loc[indices_mod]\n    # Now we forget about time index, as peaks have been paired already.\n    df_filter = pd.DataFrame(\n        data={\n            \"model\": mod_joint.sort_index().values,\n            \"observation\": obs_joint.sort_index().values,\n        }\n    )\n    df_filter[\"Maximum\"] = df_filter.max(axis=1)\n    df_filter.sort_values(by=\"Maximum\", ascending=False, inplace=True)\n    # Finally we do the selection of the N- largest peaks from either model or measured\n    df_filter = df_filter.iloc[0:top_n_peaks, :]\n    # Rename to avoid further refactoring\n    obs_joint = df_filter.loc[:, \"observation\"]\n    mod_joint = df_filter.loc[:, \"model\"]\n\n    if len(obs_joint) == 0 or len(mod_joint) == 0:\n        return np.nan\n    res = np.mean(mod_joint.values / obs_joint.values)\n    return res\n
"},{"location":"api/metrics/#modelskill.metrics.pr","title":"pr","text":"
pr(obs, model, inter_event_level=0.7, AAP=2, inter_event_time='36h')\n

alias for peak_ratio

Source code in modelskill/metrics.py
def pr(\n    obs: pd.Series,\n    model: np.ndarray,\n    inter_event_level: float = 0.7,\n    AAP: Union[int, float] = 2,\n    inter_event_time: str = \"36h\",\n) -> Any:\n    \"\"\"alias for peak_ratio\"\"\"\n    assert obs.size == model.size\n    return peak_ratio(obs, model, inter_event_level, AAP, inter_event_time)\n
"},{"location":"api/metrics/#modelskill.metrics.r2","title":"r2","text":"
r2(obs, model)\n

Coefficient of determination (R2)

Pronounced 'R-squared'; the proportion of the variation in the dependent variable that is predictable from the independent variable(s), i.e. the proportion of explained variance.

\\[ R^2 = 1 - \\frac{\\sum_{i=1}^n (model_i - obs_i)^2} {\\sum_{i=1}^n (obs_i - \\overline {obs})^2} \\]

Range: \\((-\\infty, 1]\\); Best: 1

Note

r2 = nash_sutcliffe_efficiency(nse)

Examples:

>>> obs = np.array([1.0,1.1,1.2,1.3,1.4])\n>>> model = np.array([1.09, 1.16, 1.3 , 1.38, 1.49])\n>>> r2(obs,model)\nnp.float64(0.6379999999999998)\n
Source code in modelskill/metrics.py
def r2(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Coefficient of determination (R2)\n\n    Pronounced 'R-squared'; the proportion of the variation in the dependent variable that is predictable from the independent variable(s), i.e. the proportion of explained variance.\n\n    $$\n    R^2 = 1 - \\frac{\\sum_{i=1}^n (model_i - obs_i)^2}\n                    {\\sum_{i=1}^n (obs_i - \\overline {obs})^2}\n    $$\n\n    Range: $(-\\infty, 1]$; Best: 1\n\n    Note\n    ----\n    r2 = nash_sutcliffe_efficiency(nse)\n\n    Examples\n    --------\n    >>> obs = np.array([1.0,1.1,1.2,1.3,1.4])\n    >>> model = np.array([1.09, 1.16, 1.3 , 1.38, 1.49])\n    >>> r2(obs,model)\n    np.float64(0.6379999999999998)\n    \"\"\"\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n\n    residual = model - obs\n    SSr: Any = np.sum(residual**2)\n    SSt: Any = np.sum((obs - obs.mean()) ** 2)\n\n    return 1 - SSr / SSt\n
"},{"location":"api/metrics/#modelskill.metrics.rho","title":"rho","text":"
rho(obs, model)\n

alias for spearmanr

Source code in modelskill/metrics.py
def rho(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for spearmanr\"\"\"\n    return spearmanr(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.rmse","title":"rmse","text":"
rmse(obs, model, weights=None, unbiased=False)\n

alias for root_mean_squared_error

Source code in modelskill/metrics.py
def rmse(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n    unbiased: bool = False,\n) -> Any:\n    \"\"\"alias for root_mean_squared_error\"\"\"\n    return root_mean_squared_error(obs, model, weights, unbiased)\n
"},{"location":"api/metrics/#modelskill.metrics.root_mean_squared_error","title":"root_mean_squared_error","text":"
root_mean_squared_error(obs, model, weights=None, unbiased=False)\n

Root Mean Squared Error (RMSE)

\\[ res_i = model_i - obs_i \\] \\[ RMSE=\\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_i^2} \\]

Unbiased version:

\\[ res_{u,i} = res_i - \\overline {res} \\] \\[ uRMSE=\\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_{u,i}^2} \\]

Range: \\([0, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def root_mean_squared_error(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n    unbiased: bool = False,\n) -> Any:\n    r\"\"\"Root Mean Squared Error (RMSE)\n\n    $$\n    res_i = model_i - obs_i\n    $$\n\n    $$\n    RMSE=\\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_i^2}\n    $$\n\n    Unbiased version:\n\n    $$\n    res_{u,i} = res_i - \\overline {res}\n    $$\n\n    $$\n    uRMSE=\\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_{u,i}^2}\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n\n    \"\"\"\n    assert obs.size == model.size\n\n    residual = obs - model\n    if unbiased:\n        residual = residual - residual.mean()\n    error = np.sqrt(np.average(residual**2, weights=weights))\n\n    return error\n
"},{"location":"api/metrics/#modelskill.metrics.scatter_index","title":"scatter_index","text":"
scatter_index(obs, model)\n

Scatter index (SI)

Which is the same as the unbiased-RMSE normalized by the absolute mean of the observations.

\\[ \\frac{ \\sqrt{ \\frac{1}{n} \\sum_{i=1}^n \\left( (model_i - \\overline {model}) - (obs_i - \\overline {obs}) \\right)^2} } {\\frac{1}{n} \\sum_{i=1}^n | obs_i | } \\]

Range: \\([0, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def scatter_index(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Scatter index (SI)\n\n    Which is the same as the unbiased-RMSE normalized by the absolute mean of the observations.\n\n    $$\n    \\frac{ \\sqrt{ \\frac{1}{n} \\sum_{i=1}^n \\left( (model_i - \\overline {model}) - (obs_i - \\overline {obs}) \\right)^2} }\n    {\\frac{1}{n} \\sum_{i=1}^n | obs_i | }\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n    \"\"\"\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n\n    residual = obs - model\n    residual = residual - residual.mean()  # unbiased\n    return np.sqrt(np.mean(residual**2)) / np.mean(np.abs(obs))\n
"},{"location":"api/metrics/#modelskill.metrics.scatter_index2","title":"scatter_index2","text":"
scatter_index2(obs, model)\n

Alternative formulation of the scatter index (SI)

\\[ \\sqrt {\\frac{\\sum_{i=1}^n \\left( (model_i - \\overline {model}) - (obs_i - \\overline {obs}) \\right)^2} {\\sum_{i=1}^n obs_i^2}} \\]

Range: [0, 100]; Best: 0

Source code in modelskill/metrics.py
def scatter_index2(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Alternative formulation of the scatter index (SI)\n\n    $$\n    \\sqrt {\\frac{\\sum_{i=1}^n \\left( (model_i - \\overline {model}) - (obs_i - \\overline {obs}) \\right)^2}\n    {\\sum_{i=1}^n obs_i^2}}\n    $$\n\n    Range: [0, 100]; Best: 0\n    \"\"\"\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n\n    return np.sqrt(\n        np.sum(((model - model.mean()) - (obs - obs.mean())) ** 2) / np.sum(obs**2)  # type: ignore\n    )\n
"},{"location":"api/metrics/#modelskill.metrics.si","title":"si","text":"
si(obs, model)\n

alias for scatter_index

Source code in modelskill/metrics.py
def si(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for scatter_index\"\"\"\n    return scatter_index(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.spearmanr","title":"spearmanr","text":"
spearmanr(obs, model)\n

Spearman rank correlation coefficient

The rank correlation coefficient is similar to the Pearson correlation coefficient but applied to ranked quantities and is useful to quantify a monotonous relationship

\\[ \\rho = \\frac{\\sum_{i=1}^n (rmodel_i - \\overline{rmodel})(robs_i - \\overline{robs}) } {\\sqrt{\\sum_{i=1}^n (rmodel_i - \\overline{rmodel})^2} \\sqrt{\\sum_{i=1}^n (robs_i - \\overline{robs})^2} } \\]

Range: [-1, 1]; Best: 1

Examples:

>>> obs = np.linspace(-20, 20, 100)\n>>> mod = np.tanh(obs)\n>>> rho(obs, mod)\nnp.float64(0.9999759973116955)\n>>> spearmanr(obs, mod)\nnp.float64(0.9999759973116955)\n
See Also

corrcoef

Source code in modelskill/metrics.py
def spearmanr(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Spearman rank correlation coefficient\n\n    The rank correlation coefficient is similar to the Pearson correlation coefficient but\n    applied to ranked quantities and is useful to quantify a monotonous relationship\n\n    $$\n    \\rho = \\frac{\\sum_{i=1}^n (rmodel_i - \\overline{rmodel})(robs_i - \\overline{robs}) }\n                    {\\sqrt{\\sum_{i=1}^n (rmodel_i - \\overline{rmodel})^2}\n                    \\sqrt{\\sum_{i=1}^n (robs_i - \\overline{robs})^2} }\n    $$\n\n    Range: [-1, 1]; Best: 1\n\n    Examples\n    --------\n    >>> obs = np.linspace(-20, 20, 100)\n    >>> mod = np.tanh(obs)\n    >>> rho(obs, mod)\n    np.float64(0.9999759973116955)\n    >>> spearmanr(obs, mod)\n    np.float64(0.9999759973116955)\n\n    See Also\n    --------\n    corrcoef\n    \"\"\"\n    import scipy.stats\n\n    return scipy.stats.spearmanr(obs, model)[0]\n
"},{"location":"api/metrics/#modelskill.metrics.urmse","title":"urmse","text":"
urmse(obs, model, weights=None)\n

Unbiased Root Mean Squared Error (uRMSE)

\\[ res_i = model_i - obs_i \\] \\[ res_{u,i} = res_i - \\overline {res} \\] \\[ uRMSE = \\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_{u,i}^2} \\]

Range: \\([0, \\infty)\\); Best: 0

See Also

root_mean_squared_error

Source code in modelskill/metrics.py
def urmse(\n    obs: np.ndarray, model: np.ndarray, weights: Optional[np.ndarray] = None\n) -> Any:\n    r\"\"\"Unbiased Root Mean Squared Error (uRMSE)\n\n    $$\n    res_i = model_i - obs_i\n    $$\n\n    $$\n    res_{u,i} = res_i - \\overline {res}\n    $$\n\n    $$\n    uRMSE = \\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_{u,i}^2}\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n\n    See Also\n    --------\n    root_mean_squared_error\n    \"\"\"\n    return root_mean_squared_error(obs, model, weights, unbiased=True)\n
"},{"location":"api/metrics/#modelskill.metrics.willmott","title":"willmott","text":"
willmott(obs, model)\n

Willmott's Index of Agreement

A scaled representation of the predictive accuracy of the model against observations. A value of 1 indicates a perfect match, and 0 indicates no agreement at all.

\\[ willmott = 1 - \\frac{\\frac{1}{n} \\sum_{i=1}^n(model_i - obs_i)^2} {\\frac{1}{n} \\sum_{i=1}^n(|model_i - \\overline{obs}| + |obs_i - \\overline{obs}|)^2} \\]

Range: [0, 1]; Best: 1

Examples:

>>> obs = np.array([1.0, 1.1, 1.2, 1.3, 1.4, 1.4, 1.3])\n>>> model = np.array([1.02, 1.16, 1.3, 1.38, 1.49, 1.45, 1.32])\n>>> willmott(obs, model)\nnp.float64(0.9501403174479723)\n
References

Willmott, C. J. 1981. \"On the validation of models\". Physical Geography, 2, 184\u2013194.

Source code in modelskill/metrics.py
def willmott(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Willmott's Index of Agreement\n\n    A scaled representation of the predictive accuracy of the model against observations. A value of 1 indicates a perfect match, and 0 indicates no agreement at all.\n\n    $$\n    willmott = 1 - \\frac{\\frac{1}{n} \\sum_{i=1}^n(model_i - obs_i)^2}\n                        {\\frac{1}{n} \\sum_{i=1}^n(|model_i - \\overline{obs}| + |obs_i - \\overline{obs}|)^2}\n    $$\n\n    Range: [0, 1]; Best: 1\n\n    Examples\n    --------\n    >>> obs = np.array([1.0, 1.1, 1.2, 1.3, 1.4, 1.4, 1.3])\n    >>> model = np.array([1.02, 1.16, 1.3, 1.38, 1.49, 1.45, 1.32])\n    >>> willmott(obs, model)\n    np.float64(0.9501403174479723)\n\n    References\n    ----------\n    Willmott, C. J. 1981. \"On the validation of models\". Physical Geography, 2, 184\u2013194.\n    \"\"\"\n\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n\n    residual = model - obs\n    nominator: Any = np.sum(residual**2)\n    denominator: Any = np.sum(\n        (np.abs(model - obs.mean()) + np.abs(obs - obs.mean())) ** 2\n    )\n\n    return 1 - nominator / denominator\n
"},{"location":"api/plotting/","title":"Plotting","text":""},{"location":"api/plotting/#modelskill.plotting","title":"modelskill.plotting","text":"

The plotting module provides functions useful for skill assessment that can be used independently of the comparison module.

  • scatter is a function that can be used to plot a scatter suitable for skill assessment, with a 1:1 line and a linear regression line.
  • wind_rose is a function that can be used to plot a dual wind rose to compare two datasets of magnitudes and directions.
  • spatial_overview is a function that can be used to plot a spatial overview of two datasets.
  • temporal_coverage is a function that can be used to plot the temporal coverage of two datasets.
"},{"location":"api/plotting/#modelskill.plotting.scatter","title":"scatter","text":"
scatter(x, y, *, bins=120, quantiles=None, fit_to_quantiles=False, show_points=None, show_hist=None, show_density=None, norm=None, backend='matplotlib', figsize=(8, 8), xlim=None, ylim=None, reg_method='ols', title='', xlabel='', ylabel='', skill_table=False, skill_scores=None, skill_score_unit='', ax=None, **kwargs)\n

Scatter plot showing compared data: observation vs modelled Optionally, with density histogram.

Parameters:

Name Type Description Default x ndarray

X values e.g model values, must be same length as y

required y ndarray

Y values e.g observation values, must be same length as x

required bins int | float

bins for the 2D histogram on the background. By default 120 bins. if int, represents the number of bins of 2D if float, represents the bin size if sequence (list of int or float), represents the bin edges

120 quantiles int | Sequence[float] | None

number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000) if int, this is the number of points if sequence (list of floats), represents the desired quantiles (from 0 to 1)

None fit_to_quantiles bool

by default the regression line is fitted to all data, if True, it is fitted to the quantiles which can be useful to represent the extremes of the distribution, by default False

False show_points (bool, int, float)

Should the scatter points be displayed? None means: show all points if fewer than 1e4, otherwise show 1e4 sample points, by default None. float: fraction of points to show on plot from 0 to 1. eg 0.5 shows 50% of the points. int: if 'n' (int) given, then 'n' points will be displayed, randomly selected.

None show_hist bool

show the data density as a 2d histogram, by default None

None show_density Optional[bool]

show the data density as a colormap of the scatter, by default None. If both show_density and show_hist are None, then show_density is used by default. for binning the data, the previous kword bins=Float is used

None norm Normalize

colormap normalization If None, defaults to matplotlib.colors.PowerNorm(vmin=1,gamma=0.5)

None backend str

use \"plotly\" (interactive) or \"matplotlib\" backend, by default \"matplotlib\"

'matplotlib' figsize tuple

width and height of the figure, by default (8, 8)

(8, 8) xlim tuple

plot range for the observation (xmin, xmax), by default None

None ylim tuple

plot range for the model (ymin, ymax), by default None

None reg_method str or bool

method for determining the regression line \"ols\" : ordinary least squares regression \"odr\" : orthogonal distance regression, False : no regression line by default \"ols\"

'ols' title str

plot title, by default None

'' xlabel str

x-label text on plot, by default None

'' ylabel str

y-label text on plot, by default None

'' skill_table Optional[str | Sequence[str] | bool]

calculate skill scores and show in box next to the plot, True will show default metrics, list of metrics will show these skill scores, by default False, Note: cannot be used together with skill_scores argument

False skill_scores dict[str, float]

dictionary with skill scores to be shown in box next to the plot, by default None Note: cannot be used together with skill_table argument

None skill_score_unit str

unit for skill_scores, by default None

'' ax Axes

axes to plot on, by default None

None **kwargs {}

Returns:

Type Description Axes

The axes on which the scatter plot was drawn.

Source code in modelskill/plotting/_scatter.py
def scatter(\n    x: np.ndarray,\n    y: np.ndarray,\n    *,\n    bins: int | float = 120,\n    quantiles: int | Sequence[float] | None = None,\n    fit_to_quantiles: bool = False,\n    show_points: bool | int | float | None = None,\n    show_hist: Optional[bool] = None,\n    show_density: Optional[bool] = None,\n    norm: Optional[colors.Normalize] = None,\n    backend: Literal[\"matplotlib\", \"plotly\"] = \"matplotlib\",\n    figsize: Tuple[float, float] = (8, 8),\n    xlim: Optional[Tuple[float, float]] = None,\n    ylim: Optional[Tuple[float, float]] = None,\n    reg_method: str | bool = \"ols\",\n    title: str = \"\",\n    xlabel: str = \"\",\n    ylabel: str = \"\",\n    skill_table: Optional[str | Sequence[str] | bool] = False,\n    skill_scores: Mapping[str, float] | None = None,\n    skill_score_unit: Optional[str] = \"\",\n    ax: Optional[Axes] = None,\n    **kwargs,\n) -> Axes:\n    \"\"\"Scatter plot showing compared data: observation vs modelled\n    Optionally, with density histogram.\n\n    Parameters\n    ----------\n    x: np.array\n        X values e.g model values, must be same length as y\n    y: np.array\n        Y values e.g observation values, must be same length as x\n    bins: (int, float, sequence), optional\n        bins for the 2D histogram on the background. By default 120 bins.\n        if int, represents the number of bins of 2D\n        if float, represents the bin size\n        if sequence (list of int or float), represents the bin edges\n    quantiles: (int, sequence), optional\n        number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000)\n        if int, this is the number of points\n        if sequence (list of floats), represents the desired quantiles (from 0 to 1)\n    fit_to_quantiles: bool, optional\n        by default the regression line is fitted to all data, if True, it is fitted to the quantiles\n        which can be useful to represent the extremes of the distribution, by default False\n    show_points : (bool, int, float), optional\n        Should the scatter points be displayed?\n        None means: show all points if fewer than 1e4, otherwise show 1e4 sample points, by default None.\n        float: fraction of points to show on plot from 0 to 1. eg 0.5 shows 50% of the points.\n        int: if 'n' (int) given, then 'n' points will be displayed, randomly selected.\n    show_hist : bool, optional\n        show the data density as a 2d histogram, by default None\n    show_density: bool, optional\n        show the data density as a colormap of the scatter, by default None. If both `show_density` and `show_hist`\n        are None, then `show_density` is used by default.\n        for binning the data, the previous kword `bins=Float` is used\n    norm : matplotlib.colors.Normalize\n        colormap normalization\n        If None, defaults to matplotlib.colors.PowerNorm(vmin=1,gamma=0.5)\n    backend : str, optional\n        use \"plotly\" (interactive) or \"matplotlib\" backend, by default \"matplotlib\"\n    figsize : tuple, optional\n        width and height of the figure, by default (8, 8)\n    xlim : tuple, optional\n        plot range for the observation (xmin, xmax), by default None\n    ylim : tuple, optional\n        plot range for the model (ymin, ymax), by default None\n    reg_method : str or bool, optional\n        method for determining the regression line\n        \"ols\" : ordinary least squares regression\n        \"odr\" : orthogonal distance regression,\n        False : no regression line\n        by default \"ols\"\n    title : str, optional\n        plot title, by default None\n    xlabel : str, optional\n        x-label text on plot, by default None\n    ylabel : str, optional\n        y-label text on plot, by default None\n    skill_table: str, List[str], bool, optional\n        calculate skill scores and show in box next to the plot,\n        True will show default metrics, list of metrics will show\n        these skill scores, by default False,\n        Note: cannot be used together with skill_scores argument\n    skill_scores : dict[str, float], optional\n        dictionary with skill scores to be shown in box next to\n        the plot, by default None\n        Note: cannot be used together with skill_table argument\n    skill_score_unit : str, optional\n        unit for skill_scores, by default None\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    **kwargs\n\n    Returns\n    -------\n    matplotlib.axes.Axes\n        The axes on which the scatter plot was drawn.\n    \"\"\"\n    if \"skill_df\" in kwargs:\n        warnings.warn(\n            \"The `skill_df` keyword argument is deprecated. Use `skill_scores` instead.\",\n            FutureWarning,\n        )\n        skill_scores = kwargs.pop(\"skill_df\").to_dict(\"records\")[0]\n\n    if show_hist is None and show_density is None:\n        # Default: points density\n        show_density = True\n\n    if len(x) != len(y):\n        raise ValueError(\"x & y are not of equal length\")\n\n    if norm is None:\n        norm = colors.PowerNorm(vmin=1, gamma=0.5)\n\n    x_sample, y_sample = sample_points(x, y, show_points)\n    xq, yq = quantiles_xy(x, y, quantiles)\n\n    xmin, xmax = x.min(), x.max()\n    ymin, ymax = y.min(), y.max()\n    xymin = min([xmin, ymin])\n    xymax = max([xmax, ymax])\n\n    nbins_hist, binsize = _get_bins(bins, xymin=xymin, xymax=xymax)\n\n    if xlim is None:\n        xlim = (xymin - binsize, xymax + binsize)\n\n    if ylim is None:\n        ylim = (xymin - binsize, xymax + binsize)\n\n    x_trend = np.array([xlim[0], xlim[1]])\n\n    if show_hist and show_density:\n        raise TypeError(\n            \"if `show_hist=True` then `show_density` must be either `False` or `None`\"\n        )\n\n    z = None\n    if show_density and len(x_sample) > 0:\n        if not isinstance(bins, (float, int)):\n            raise TypeError(\n                \"if `show_density=True` then bins must be either float or int\"\n            )\n\n        # calculate density data\n        z = __scatter_density(x_sample, y_sample, binsize=binsize)\n        idx = z.argsort()\n        # Sort data by colormaps\n        x_sample, y_sample, z = x_sample[idx], y_sample[idx], z[idx]\n        # scale Z by sample size\n        z = z * len(x) / len(x_sample)\n\n    PLOTTING_BACKENDS: dict[str, Callable] = {\n        \"matplotlib\": _scatter_matplotlib,\n        \"plotly\": _scatter_plotly,\n    }\n\n    if backend not in PLOTTING_BACKENDS:\n        raise ValueError(f\"backend must be one of {list(PLOTTING_BACKENDS.keys())}\")\n\n    if skill_table:\n        from modelskill import from_matched\n\n        if skill_scores is not None:\n            raise ValueError(\n                \"Cannot pass skill_scores and skill_table at the same time\"\n            )\n        df = pd.DataFrame({\"obs\": x, \"model\": y})\n        cmp = from_matched(df)\n        metrics = None if skill_table is True else skill_table\n        skill = cmp.skill(metrics=metrics)\n        skill_scores = skill.to_dict(\"records\")[0]\n\n    return PLOTTING_BACKENDS[backend](\n        x=x,\n        y=y,\n        x_sample=x_sample,\n        y_sample=y_sample,\n        z=z,\n        xq=xq,\n        yq=yq,\n        x_trend=x_trend,\n        show_density=show_density,\n        norm=norm,\n        show_points=show_points,\n        show_hist=show_hist,\n        nbins_hist=nbins_hist,\n        reg_method=reg_method,\n        xlabel=xlabel,\n        ylabel=ylabel,\n        figsize=figsize,\n        xlim=xlim,\n        ylim=ylim,\n        title=title,\n        skill_scores=skill_scores,\n        skill_score_unit=skill_score_unit,\n        fit_to_quantiles=fit_to_quantiles,\n        ax=ax,\n        **kwargs,\n    )\n
"},{"location":"api/plotting/#modelskill.plotting.spatial_overview","title":"spatial_overview","text":"
spatial_overview(obs, mod=None, ax=None, figsize=None, title=None)\n

Plot observation points on a map showing the model domain

Parameters:

Name Type Description Default obs List[Observation]

List of observations to be shown on map

required mod Union[ModelResult, GeometryFM]

Model domain to be shown as outline

None ax

Adding to existing axis, instead of creating new fig

None figsize (float, float)

figure size, by default None

None title Optional[str]

plot title, default empty

None See Also

temporal_coverage

Returns:

Type Description Axes

The matplotlib axes object

Examples:

>>> import modelskill as ms\n>>> o1 = ms.PointObservation('HKNA_Hm0.dfs0', item=0, x=4.2420, y=52.6887, name=\"HKNA\")\n>>> o2 = ms.TrackObservation(\"Alti_c2_Dutch.dfs0\", item=3, name=\"c2\")\n>>> mr1 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast.dfsu', name='SW_1', item=0)\n>>> mr2 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast_v2.dfsu', name='SW_2', item=0)\n>>> ms.plotting.spatial_overview([o1, o2], [mr1, mr2])\n
Source code in modelskill/plotting/_spatial_overview.py
def spatial_overview(\n    obs: List[Observation],\n    mod=None,\n    ax=None,\n    figsize: Optional[Tuple] = None,\n    title: Optional[str] = None,\n) -> matplotlib.axes.Axes:\n    \"\"\"Plot observation points on a map showing the model domain\n\n    Parameters\n    ----------\n    obs: list[Observation]\n        List of observations to be shown on map\n    mod : Union[ModelResult, mikeio.GeometryFM], optional\n        Model domain to be shown as outline\n    ax: matplotlib.axes, optional\n        Adding to existing axis, instead of creating new fig\n    figsize : (float, float), optional\n        figure size, by default None\n    title: str, optional\n        plot title, default empty\n\n    See Also\n    --------\n    temporal_coverage\n\n    Returns\n    -------\n    matplotlib.axes.Axes\n        The matplotlib axes object\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> o1 = ms.PointObservation('HKNA_Hm0.dfs0', item=0, x=4.2420, y=52.6887, name=\"HKNA\")\n    >>> o2 = ms.TrackObservation(\"Alti_c2_Dutch.dfs0\", item=3, name=\"c2\")\n    >>> mr1 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast.dfsu', name='SW_1', item=0)\n    >>> mr2 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast_v2.dfsu', name='SW_2', item=0)\n    >>> ms.plotting.spatial_overview([o1, o2], [mr1, mr2])\n    \"\"\"\n    obs = [] if obs is None else list(obs) if isinstance(obs, Sequence) else [obs]  # type: ignore\n    mod = [] if mod is None else list(mod) if isinstance(mod, Sequence) else [mod]  # type: ignore\n\n    ax = _get_ax(ax=ax, figsize=figsize)\n    offset_x = 1  # TODO: better default\n\n    for m in mod:\n        # TODO: support Gridded ModelResults\n        if isinstance(m, (PointModelResult, TrackModelResult)):\n            raise ValueError(\n                f\"Model type {type(m)} not supported. Only DfsuModelResult and mikeio.GeometryFM supported!\"\n            )\n        if hasattr(m, \"data\") and hasattr(m.data, \"geometry\"):\n            # mod_name = m.name  # TODO: better support for multiple models\n            m = m.data.geometry\n        if hasattr(m, \"node_coordinates\"):\n            xn = m.node_coordinates[:, 0]\n            offset_x = 0.02 * (max(xn) - min(xn))\n        m.plot.outline(ax=ax)\n\n    for o in obs:\n        if isinstance(o, PointObservation):\n            ax.scatter(x=o.x, y=o.y, marker=\"x\")\n            ax.annotate(o.name, (o.x + offset_x, o.y))  # type: ignore\n        elif isinstance(o, TrackObservation):\n            if o.n_points < 10000:\n                ax.scatter(x=o.x, y=o.y, c=o.values, marker=\".\", cmap=\"Reds\")\n            else:\n                print(f\"{o.name}: Too many points to plot\")\n                # TODO: group by lonlat bin or sample randomly\n        else:\n            raise ValueError(\n                f\"Could not show observation {o}. Only PointObservation and TrackObservation supported.\"\n            )\n\n    if not title:\n        title = \"Spatial coverage\"\n    ax.set_title(title)\n\n    return ax\n
"},{"location":"api/plotting/#modelskill.plotting.taylor_diagram","title":"taylor_diagram","text":"
taylor_diagram(obs_std, points, figsize=(7, 7), obs_text='Observations', normalize_std=False, ax=None, title='Taylor diagram')\n

Plot a Taylor diagram using the given observations and points.

Parameters:

Name Type Description Default obs_std float

Standard deviation of the observations.

required points list of TaylorPoint objects or a single TaylorPoint object

Points to plot on the Taylor diagram.

required figsize tuple

Figure size in inches. Default is (7, 7).

(7, 7) obs_text str

Label for the observations. Default is \"Observations\".

'Observations' normalize_std bool

Whether to normalize the standard deviation of the points by the standard deviation of the observations. Default is False.

False title str

Title of the plot. Default is \"Taylor diagram\".

'Taylor diagram'

Returns:

Type Description Figure

The matplotlib figure object

Source code in modelskill/plotting/_taylor_diagram.py
def taylor_diagram(\n    obs_std,\n    points,\n    figsize=(7, 7),\n    obs_text=\"Observations\",\n    normalize_std=False,\n    ax=None,\n    title=\"Taylor diagram\",\n) -> matplotlib.figure.Figure:\n    \"\"\"\n    Plot a Taylor diagram using the given observations and points.\n\n    Parameters\n    -----------\n    obs_std : float\n        Standard deviation of the observations.\n    points : list of TaylorPoint objects or a single TaylorPoint object\n        Points to plot on the Taylor diagram.\n    figsize : tuple, optional\n        Figure size in inches. Default is (7, 7).\n    obs_text : str, optional\n        Label for the observations. Default is \"Observations\".\n    normalize_std : bool, optional\n        Whether to normalize the standard deviation of the points by the standard deviation of the observations. Default is False.\n    title : str, optional\n        Title of the plot. Default is \"Taylor diagram\".\n\n    Returns\n    --------\n    matplotlib.figure.Figure\n            The matplotlib figure object\n    \"\"\"\n\n    if np.isscalar(figsize):\n        figsize = (figsize, figsize)\n    elif figsize[0] != figsize[1]:\n        warnings.warn(\n            \"It is strongly recommended that the aspect ratio is 1:1 for Taylor diagrams\"\n        )\n    fig = plt.figure(figsize=figsize)\n\n    # srange=(0, 1.5),\n    if len(obs_text) > 30:\n        obs_text = obs_text[:25] + \"...\"\n\n    td = TaylorDiagram(\n        obs_std, fig=fig, rect=111, label=obs_text, normalize_std=normalize_std\n    )\n    contours = td.add_contours(levels=8, colors=\"0.5\", linestyles=\"dotted\")\n    plt.clabel(contours, inline=1, fontsize=10, fmt=\"%.2f\")\n\n    if isinstance(points, TaylorPoint):\n        points = [points]\n    for p in points:\n        assert isinstance(p, TaylorPoint)\n        m = \"o\" if p.marker is None else p.marker\n        ms = \"6\" if p.marker_size is None else p.marker_size\n        std = p.std / p.obs_std if normalize_std else p.std\n        td.add_sample(std, p.cc, marker=m, ms=ms, ls=\"\", label=p.name)\n        # marker=f\"${1}$\",\n        # td.add_sample(0.2, 0.8, marker=\"+\", ms=15, mew=1.2, ls=\"\", label=\"m2\")\n    td.add_grid()\n    fig.legend(\n        td.samplePoints,\n        [p.get_label() for p in td.samplePoints],\n        numpoints=1,\n        prop=dict(size=\"medium\"),\n        loc=\"upper right\",\n    )\n    fig.suptitle(title, size=\"x-large\")\n\n    # prevent the plot from being displayed, since it is also displayed by the returned object\n    plt.close()\n    return fig\n
"},{"location":"api/plotting/#modelskill.plotting.temporal_coverage","title":"temporal_coverage","text":"
temporal_coverage(obs=None, mod=None, *, limit_to_model_period=True, marker='_', ax=None, figsize=None, title=None)\n

Plot graph showing temporal coverage for all observations and models

Parameters:

Name Type Description Default obs List[Observation]

Show observation(s) as separate lines on plot

None mod List[ModelResult]

Show model(s) as separate lines on plot, by default None

None limit_to_model_period bool

Show temporal coverage only for period covered by the model, by default True

True marker str

plot marker for observations, by default \"_\"

'_' ax

Adding to existing axis, instead of creating new fig

None figsize Tuple(float, float)

size of figure, by default (7, 0.45*n_lines)

None title

plot title, default empty

None See Also

spatial_overview

Returns:

Type Description Axes

The matplotlib axes object

Examples:

>>> import modelskill as ms\n>>> o1 = ms.PointObservation('HKNA_Hm0.dfs0', item=0, x=4.2420, y=52.6887, name=\"HKNA\")\n>>> o2 = ms.TrackObservation(\"Alti_c2_Dutch.dfs0\", item=3, name=\"c2\")\n>>> mr1 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast.dfsu', name='SW_1', item=0)\n>>> mr2 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast_v2.dfsu', name='SW_2', item=0)\n>>> ms.plotting.temporal_coverage([o1, o2], [mr1, mr2])\n>>> ms.plotting.temporal_coverage([o1, o2], mr2, limit_to_model_period=False)\n>>> ms.plotting.temporal_coverage(o2, [mr1, mr2], marker=\".\")\n>>> ms.plotting.temporal_coverage(mod=[mr1, mr2], figsize=(5,3))\n
Source code in modelskill/plotting/_temporal_coverage.py
def temporal_coverage(\n    obs=None,\n    mod=None,\n    *,\n    limit_to_model_period=True,\n    marker=\"_\",\n    ax=None,\n    figsize=None,\n    title=None,\n) -> matplotlib.axes.Axes:\n    \"\"\"Plot graph showing temporal coverage for all observations and models\n\n    Parameters\n    ----------\n    obs : List[Observation], optional\n        Show observation(s) as separate lines on plot\n    mod : List[ModelResult], optional\n        Show model(s) as separate lines on plot, by default None\n    limit_to_model_period : bool, optional\n        Show temporal coverage only for period covered\n        by the model, by default True\n    marker : str, optional\n        plot marker for observations, by default \"_\"\n    ax: matplotlib.axes, optional\n        Adding to existing axis, instead of creating new fig\n    figsize : Tuple(float, float), optional\n        size of figure, by default (7, 0.45*n_lines)\n    title: str, optional\n        plot title, default empty\n\n    See Also\n    --------\n    spatial_overview\n\n    Returns\n    -------\n    matplotlib.axes.Axes\n        The matplotlib axes object\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> o1 = ms.PointObservation('HKNA_Hm0.dfs0', item=0, x=4.2420, y=52.6887, name=\"HKNA\")\n    >>> o2 = ms.TrackObservation(\"Alti_c2_Dutch.dfs0\", item=3, name=\"c2\")\n    >>> mr1 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast.dfsu', name='SW_1', item=0)\n    >>> mr2 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast_v2.dfsu', name='SW_2', item=0)\n    >>> ms.plotting.temporal_coverage([o1, o2], [mr1, mr2])\n    >>> ms.plotting.temporal_coverage([o1, o2], mr2, limit_to_model_period=False)\n    >>> ms.plotting.temporal_coverage(o2, [mr1, mr2], marker=\".\")\n    >>> ms.plotting.temporal_coverage(mod=[mr1, mr2], figsize=(5,3))\n    \"\"\"\n    obs = [] if obs is None else list(obs) if isinstance(obs, Sequence) else [obs]\n    mod = [] if mod is None else list(mod) if isinstance(mod, Sequence) else [mod]\n\n    n_lines = len(obs) + len(mod)\n    if figsize is None:\n        ysize = max(2.0, 0.45 * n_lines)\n        figsize = (7, ysize)\n\n    fig, ax = _get_fig_ax(ax=ax, figsize=figsize)\n    y = np.repeat(0.0, 2)\n    labels = []\n\n    if len(mod) > 0:\n        for mr in mod:\n            y += 1.0\n            plt.plot([mr.time[0], mr.time[-1]], y)\n            labels.append(mr.name)\n\n    for o in obs:\n        y += 1.0\n        plt.plot(o.time, y[0] * np.ones(len(o.time)), marker, markersize=5)\n        labels.append(o.name)\n\n    if len(mod) > 0 and limit_to_model_period:\n        mr = mod[0]  # take first model\n        plt.xlim([mr.time[0], mr.time[-1]])\n\n    plt.yticks(np.arange(n_lines) + 1, labels)\n    if len(mod) > 0:\n        for j in range(len(mod)):\n            ax.get_yticklabels()[j].set_fontstyle(\"italic\")\n            ax.get_yticklabels()[j].set_weight(\"bold\")\n            # set_color(\"#004165\")\n    fig.autofmt_xdate()\n\n    if title:\n        ax.set_title(title)\n    return ax\n
"},{"location":"api/plotting/#modelskill.plotting.wind_rose","title":"wind_rose","text":"
wind_rose(data, *, labels=('Measurement', 'Model'), mag_step=None, n_sectors=16, calm_threshold=None, calm_size=None, calm_text='Calm', r_step=0.1, r_max=None, legend=True, cmap1='viridis', cmap2='Greys', mag_bins=None, max_bin=None, n_dir_labels=None, secondary_dir_step_factor=2.0, figsize=(8, 8), ax=None, title=None)\n

Plots a (dual) wind (wave or current) roses with calms.

The size of the calm is determined by the primary (measurement) data.

Parameters:

Name Type Description Default data

array with 2 or 4 columns (magnitude, direction, magnitude2, direction2)

required labels

labels for the legend(s)

('Measurement', 'Model') mag_step Optional[float]

discretization for magnitude (delta_r, in radial direction )

None n_sectors int

number of directional sectors

16 calm_threshold Optional[float]

minimum value for data being counted as valid (i.e. below this is calm)

None calm_text str

text to display in calm.

'Calm' r_step float

radial axis discretization. By default 0.1 i.e. every 10%.

0.1 r_max Optional[float]

maximum radius (%) of plot, e.g. if 50% wanted then r_max=0.5

None max_bin Optional[float]

max value to truncate the data, e.g., max_bin=1.0 if hm0=1m is the desired final bin.

None mag_bins array of floats (optional) Default = None

force bins to array of values, e.g. when specifying non-equidistant bins.

None legend bool

show legend

True cmap1 string. Default= 'viridis'

colormap for main axis

'viridis' cmap2 string. Default= 'Greys'

colormap for secondary axis

'Greys' n_dir_labels int. Default= 4

number of labels in the polar plot, choose between 4, 8 or 16, default is to use the same as n_sectors

None secondary_dir_step_factor float. Default= 2.0

reduce width of secondary axis by this factor

2.0 figsize Tuple[float, float]

figure size

(8, 8) ax

Matplotlib axis to plot on defined as polar, it can be done using \"subplot_kw = dict(projection = 'polar')\". Default = None, new axis created.

None title

title of the plot

None

Returns:

Type Description Axes

Matplotlib axis with the plot

Source code in modelskill/plotting/_wind_rose.py
def wind_rose(\n    data,\n    *,\n    labels=(\"Measurement\", \"Model\"),\n    mag_step: Optional[float] = None,\n    n_sectors: int = 16,\n    calm_threshold: Optional[float] = None,  # TODO rename to vmin?\n    calm_size: Optional[float] = None,\n    calm_text: str = \"Calm\",\n    r_step: float = 0.1,\n    r_max: Optional[float] = None,\n    legend: bool = True,\n    cmap1: str = \"viridis\",\n    cmap2: str = \"Greys\",\n    mag_bins: Optional[List[float]] = None,\n    max_bin: Optional[float] = None,  # TODO rename to vmax?\n    n_dir_labels: Optional[int] = None,\n    secondary_dir_step_factor: float = 2.0,\n    figsize: Tuple[float, float] = (8, 8),\n    ax=None,\n    title=None,\n) -> matplotlib.axes.Axes:\n    \"\"\"Plots a (dual) wind (wave or current) roses with calms.\n\n    The size of the calm is determined by the primary (measurement) data.\n\n    Parameters\n    ----------\n    data: array-like\n        array with 2 or 4 columns (magnitude, direction, magnitude2, direction2)\n    labels: tuple of strings. Default= (\"Measurement\", \"Model\")\n        labels for the legend(s)\n    mag_step: float, (optional) Default= None\n        discretization for magnitude (delta_r, in radial direction )\n    n_sectors: int (optional) Default= 16\n        number of directional sectors\n    calm_threshold: float (optional) Default= None (auto calculated)\n        minimum value for data being counted as valid (i.e. below this is calm)\n    calm_text: str (optional) Default: 'Calm'\n        text to display in calm.\n    r_step: float (optional) Default= 0.1\n        radial axis discretization. By default 0.1 i.e. every 10%.\n    r_max: float (optional) Default= None\n        maximum radius (%) of plot, e.g. if 50% wanted then r_max=0.5\n    max_bin:  float (optional) Default= None\n        max value to truncate the data, e.g.,  max_bin=1.0 if hm0=1m is the desired final bin.\n    mag_bins : array of floats (optional) Default = None\n        force bins to array of values, e.g. when specifying non-equidistant bins.\n    legend: boolean. Default= True\n        show legend\n    cmap1 : string. Default= 'viridis'\n        colormap for main axis\n    cmap2 : string. Default= 'Greys'\n        colormap for secondary axis\n    n_dir_labels : int. Default= 4\n        number of labels in the polar plot, choose between 4, 8 or 16, default is to use the same as n_sectors\n    secondary_dir_step_factor : float. Default= 2.0\n        reduce width of secondary axis by this factor\n    figsize: tuple(float,float)\n        figure size\n    ax: Matplotlib axis Default= None\n        Matplotlib axis to plot on defined as polar, it can be done using \"subplot_kw = dict(projection = 'polar')\". Default = None, new axis created.\n    title: str Default= None\n        title of the plot\n\n    Returns\n    -------\n    matplotlib.axes.Axes\n        Matplotlib axis with the plot\n    \"\"\"\n    if hasattr(data, \"to_numpy\"):\n        data = data.to_numpy()\n\n    # check that data is array_like\n    assert hasattr(data, \"__array__\"), \"data must be array_like\"\n\n    data_1 = data[:, 0:2]  # primary magnitude and direction\n    magmax = data_1[:, 0].max()\n\n    ncols = data.shape[1]\n    assert ncols in [2, 4], \"data must have 2 or 4 columns\"\n    dual = ncols == 4\n\n    if dual:\n        data_2 = data[:, 2:4]  # secondary magnitude and direction\n        magmax = max(magmax, data_2[:, 0].max())\n        assert len(labels) == 2, \"labels must have 2 elements\"\n\n    # magnitude bins\n    ui, vmin, vmax = pretty_intervals(\n        magmax,\n        mag_bins,\n        mag_step,\n        calm_threshold,\n        max_bin,\n    )\n\n    dir_step = 360 // n_sectors\n\n    if n_dir_labels is None:\n        if n_sectors in (4, 8, 16):\n            n_dir_labels = n_sectors\n        else:\n            # Directional labels are not identical to the number of sectors, use a sane default\n            n_dir_labels = 16\n\n    dh = _dirhist2d(data_1, ui=ui, dir_step=dir_step)\n    calm = dh.calm\n\n    if dual:\n        assert len(data_1) == len(data_2), \"data_1 and data_2 must have same length\"\n        dh2 = _dirhist2d(data_2, ui=ui, dir_step=dir_step)\n        assert dh.density.shape == dh2.density.shape\n\n    ri, rmax = _calc_radial_ticks(counts=dh.density, step=r_step, stop=r_max)\n\n    # Resize calm\n    # TODO this overwrites the calm value calculated above\n    if calm_size is not None:\n        calm = calm_size\n\n    cmap = _get_cmap(cmap1)\n\n    if ax is None:\n        _, ax = plt.subplots(figsize=figsize, subplot_kw=dict(projection=\"polar\"))\n\n    ax.set_title(title)\n    ax.set_theta_zero_location(\"N\")\n    ax.set_theta_direction(-1)\n\n    dir_labels = directional_labels(n_dir_labels)\n    grid = np.linspace(0, 360, n_dir_labels + 1)[:-1]\n    ax.set_thetagrids(grid, dir_labels)\n\n    # ax.tick_params(pad=-24)\n\n    ax.set_ylim(0, calm + rmax)\n    ax.set_yticks(ri + calm)\n    tick_labels = [f\"{tick * 100 :.0f}%\" for tick in ri]\n    ax.set_yticklabels(tick_labels)\n    ax.set_rlabel_position(5)\n\n    if vmin > 0:\n        _add_calms_to_ax(ax, threshold=calm, text=calm_text)\n\n    # primary histogram (model)\n    p = _create_patch(\n        thetac=dh.dir_centers,\n        dir_step=dir_step,\n        calm=calm,\n        ui=ui,\n        counts=dh.density,\n        cmap=cmap,\n        vmax=vmax,\n    )\n    ax.add_collection(p)\n\n    if legend:\n        _add_legend_to_ax(\n            ax,\n            cmap=cmap,\n            vmax=vmax,\n            ui=ui,\n            calm=calm,\n            counts=dh.density,\n            label=labels[0],\n            primary=True,\n            dual=dual,\n        )\n\n    if dual:\n        # add second histogram (observation)\n        cmap = _get_cmap(cmap2)\n\n        # TODO should this be calm2?\n        p = _create_patch(\n            thetac=dh.dir_centers,\n            dir_step=dir_step,\n            calm=calm,\n            ui=ui,\n            counts=dh2.density,\n            cmap=cmap,\n            vmax=vmax,\n            dir_step_factor=secondary_dir_step_factor,\n        )\n        ax.add_collection(p)\n\n        if legend:\n            _add_legend_to_ax(\n                ax,\n                cmap=cmap,\n                vmax=vmax,\n                ui=ui,\n                calm=dh2.calm,\n                counts=dh2.density,\n                label=labels[1],\n                primary=False,\n                dual=dual,\n            )\n\n    return ax\n
"},{"location":"api/quantity/","title":"Quantity","text":""},{"location":"api/quantity/#modelskill.quantity.Quantity","title":"modelskill.quantity.Quantity dataclass","text":"

Quantity of data

Parameters:

Name Type Description Default name str

Name of the quantity

required unit str

Unit of the quantity

required is_directional bool

Whether the quantity is directional (e.g. Wind Direction), by default False

False

Examples:

>>> wl = Quantity(name=\"Water Level\", unit=\"meter\")\n>>> wl\nQuantity(name='Water Level', unit='meter')\n>>> wl.name\n'Water Level'\n>>> wl.unit\n'meter'\n>>> wl.is_compatible(wl)\nTrue\n>>> ws = Quantity(name=\"Wind Direction\", unit=\"degree\", is_directional=True)\n>>> ws\nQuantity(name='Wind Direction', unit='degree', is_directional=True)\n
Source code in modelskill/quantity.py
@dataclass(frozen=True)\nclass Quantity:\n    \"\"\"Quantity of data\n\n    Parameters\n    ----------\n    name : str\n        Name of the quantity\n    unit : str\n        Unit of the quantity\n    is_directional : bool, optional\n        Whether the quantity is directional (e.g. Wind Direction), by default False\n\n    Examples\n    --------\n    >>> wl = Quantity(name=\"Water Level\", unit=\"meter\")\n    >>> wl\n    Quantity(name='Water Level', unit='meter')\n    >>> wl.name\n    'Water Level'\n    >>> wl.unit\n    'meter'\n    >>> wl.is_compatible(wl)\n    True\n    >>> ws = Quantity(name=\"Wind Direction\", unit=\"degree\", is_directional=True)\n    >>> ws\n    Quantity(name='Wind Direction', unit='degree', is_directional=True)\n    \"\"\"\n\n    name: str\n    unit: str\n    is_directional: bool = False\n\n    def __str__(self):\n        return f\"{self.name} [{self.unit}]\"\n\n    def __repr__(self):\n        if self.is_directional:\n            return (\n                f\"Quantity(name='{self.name}', unit='{self.unit}', is_directional=True)\"\n            )\n        else:\n            # hide is_directional if False to avoid clutter\n            return f\"Quantity(name='{self.name}', unit='{self.unit}')\"\n\n    def is_compatible(self, other) -> bool:\n        \"\"\"Check if the quantity is compatible with another quantity\n\n        Examples\n        --------\n        >>> wl = Quantity(name=\"Water Level\", unit=\"meter\")\n        >>> ws = Quantity(name=\"Wind Speed\", unit=\"meter per second\")\n        >>> wl.is_compatible(ws)\n        False\n        >>> uq = Quantity(name=\"Undefined\", unit=\"Undefined\")\n        >>> wl.is_compatible(uq)\n        True\n        \"\"\"\n\n        if self == other:\n            return True\n\n        if (self.name == \"Undefined\") or (other.name == \"Undefined\"):\n            return True\n\n        return False\n\n    @staticmethod\n    def undefined() -> \"Quantity\":\n        return Quantity(name=\"\", unit=\"\")\n\n    def to_dict(self) -> Dict[str, str]:\n        return {\"name\": self.name, \"unit\": self.unit}\n\n    @staticmethod\n    def from_cf_attrs(attrs: Mapping[str, str]) -> \"Quantity\":\n        \"\"\"Create a Quantity from a CF compliant attributes dictionary\n\n        If units is \"degree\", \"degrees\" or \"Degree true\", the quantity is assumed\n        to be directional. Based on https://codes.ecmwf.int/grib/param-db/ and\n        https://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html\n\n        Parameters\n        ----------\n        attrs : Mapping[str, str]\n            Attributes dictionary\n\n        Examples\n        --------\n        >>> Quantity.from_cf_attrs({'long_name': 'Water Level', 'units': 'meter'})\n        Quantity(name='Water Level', unit='meter')\n        >>> Quantity.from_cf_attrs({'long_name': 'Wind direction', 'units': 'degree'})\n        Quantity(name='Wind direction', unit='degree', is_directional=True)\n\n        \"\"\"\n        quantity = Quantity.undefined()\n        if long_name := attrs.get(\"long_name\"):\n            if units := attrs.get(\"units\"):\n                is_directional = units in [\"degree\", \"degrees\", \"Degree true\"]\n                quantity = Quantity(\n                    name=long_name,\n                    unit=units,\n                    is_directional=is_directional,\n                )\n        return quantity\n\n    @staticmethod\n    def from_mikeio_iteminfo(iteminfo: mikeio.ItemInfo) -> \"Quantity\":\n        \"\"\"Create a Quantity from mikeio ItemInfo\n\n        If the unit is \"degree\", the quantity is assumed to be directional.\n        \"\"\"\n\n        unit = iteminfo.unit.short_name\n        is_directional = unit == \"degree\"\n        return Quantity(\n            name=repr(iteminfo.type), unit=unit, is_directional=is_directional\n        )\n\n    @staticmethod\n    def from_mikeio_eum_name(type_name: str) -> \"Quantity\":\n        \"\"\"Create a Quantity from a name recognized by mikeio\n\n        Parameters\n        ----------\n        type_name : str\n            Name of the quantity\n\n        Examples\n        --------\n        >>> Quantity.from_mikeio_eum_name(\"Water Level\")\n        Quantity(name='Water Level', unit='meter')\n        \"\"\"\n        try:\n            etype = mikeio.EUMType[type_name]\n        except KeyError:\n            name_underscore = type_name.replace(\" \", \"_\")\n            try:\n                etype = mikeio.EUMType[name_underscore]\n            except KeyError:\n                raise ValueError(\n                    f\"{type_name=} is not recognized as a known type. Please create a Quantity(name='{type_name}' unit='<FILL IN UNIT>')\"\n                )\n        unit = etype.units[0].name\n        is_directional = unit == \"degree\"\n        warnings.warn(f\"{unit=} was automatically set for {type_name=}\")\n        return Quantity(name=type_name, unit=unit, is_directional=is_directional)\n
"},{"location":"api/quantity/#modelskill.quantity.Quantity.from_cf_attrs","title":"from_cf_attrs staticmethod","text":"
from_cf_attrs(attrs)\n

Create a Quantity from a CF compliant attributes dictionary

If units is \"degree\", \"degrees\" or \"Degree true\", the quantity is assumed to be directional. Based on https://codes.ecmwf.int/grib/param-db/ and https://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html

Parameters:

Name Type Description Default attrs Mapping[str, str]

Attributes dictionary

required

Examples:

>>> Quantity.from_cf_attrs({'long_name': 'Water Level', 'units': 'meter'})\nQuantity(name='Water Level', unit='meter')\n>>> Quantity.from_cf_attrs({'long_name': 'Wind direction', 'units': 'degree'})\nQuantity(name='Wind direction', unit='degree', is_directional=True)\n
Source code in modelskill/quantity.py
@staticmethod\ndef from_cf_attrs(attrs: Mapping[str, str]) -> \"Quantity\":\n    \"\"\"Create a Quantity from a CF compliant attributes dictionary\n\n    If units is \"degree\", \"degrees\" or \"Degree true\", the quantity is assumed\n    to be directional. Based on https://codes.ecmwf.int/grib/param-db/ and\n    https://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html\n\n    Parameters\n    ----------\n    attrs : Mapping[str, str]\n        Attributes dictionary\n\n    Examples\n    --------\n    >>> Quantity.from_cf_attrs({'long_name': 'Water Level', 'units': 'meter'})\n    Quantity(name='Water Level', unit='meter')\n    >>> Quantity.from_cf_attrs({'long_name': 'Wind direction', 'units': 'degree'})\n    Quantity(name='Wind direction', unit='degree', is_directional=True)\n\n    \"\"\"\n    quantity = Quantity.undefined()\n    if long_name := attrs.get(\"long_name\"):\n        if units := attrs.get(\"units\"):\n            is_directional = units in [\"degree\", \"degrees\", \"Degree true\"]\n            quantity = Quantity(\n                name=long_name,\n                unit=units,\n                is_directional=is_directional,\n            )\n    return quantity\n
"},{"location":"api/quantity/#modelskill.quantity.Quantity.from_mikeio_eum_name","title":"from_mikeio_eum_name staticmethod","text":"
from_mikeio_eum_name(type_name)\n

Create a Quantity from a name recognized by mikeio

Parameters:

Name Type Description Default type_name str

Name of the quantity

required

Examples:

>>> Quantity.from_mikeio_eum_name(\"Water Level\")\nQuantity(name='Water Level', unit='meter')\n
Source code in modelskill/quantity.py
@staticmethod\ndef from_mikeio_eum_name(type_name: str) -> \"Quantity\":\n    \"\"\"Create a Quantity from a name recognized by mikeio\n\n    Parameters\n    ----------\n    type_name : str\n        Name of the quantity\n\n    Examples\n    --------\n    >>> Quantity.from_mikeio_eum_name(\"Water Level\")\n    Quantity(name='Water Level', unit='meter')\n    \"\"\"\n    try:\n        etype = mikeio.EUMType[type_name]\n    except KeyError:\n        name_underscore = type_name.replace(\" \", \"_\")\n        try:\n            etype = mikeio.EUMType[name_underscore]\n        except KeyError:\n            raise ValueError(\n                f\"{type_name=} is not recognized as a known type. Please create a Quantity(name='{type_name}' unit='<FILL IN UNIT>')\"\n            )\n    unit = etype.units[0].name\n    is_directional = unit == \"degree\"\n    warnings.warn(f\"{unit=} was automatically set for {type_name=}\")\n    return Quantity(name=type_name, unit=unit, is_directional=is_directional)\n
"},{"location":"api/quantity/#modelskill.quantity.Quantity.from_mikeio_iteminfo","title":"from_mikeio_iteminfo staticmethod","text":"
from_mikeio_iteminfo(iteminfo)\n

Create a Quantity from mikeio ItemInfo

If the unit is \"degree\", the quantity is assumed to be directional.

Source code in modelskill/quantity.py
@staticmethod\ndef from_mikeio_iteminfo(iteminfo: mikeio.ItemInfo) -> \"Quantity\":\n    \"\"\"Create a Quantity from mikeio ItemInfo\n\n    If the unit is \"degree\", the quantity is assumed to be directional.\n    \"\"\"\n\n    unit = iteminfo.unit.short_name\n    is_directional = unit == \"degree\"\n    return Quantity(\n        name=repr(iteminfo.type), unit=unit, is_directional=is_directional\n    )\n
"},{"location":"api/quantity/#modelskill.quantity.Quantity.is_compatible","title":"is_compatible","text":"
is_compatible(other)\n

Check if the quantity is compatible with another quantity

Examples:

>>> wl = Quantity(name=\"Water Level\", unit=\"meter\")\n>>> ws = Quantity(name=\"Wind Speed\", unit=\"meter per second\")\n>>> wl.is_compatible(ws)\nFalse\n>>> uq = Quantity(name=\"Undefined\", unit=\"Undefined\")\n>>> wl.is_compatible(uq)\nTrue\n
Source code in modelskill/quantity.py
def is_compatible(self, other) -> bool:\n    \"\"\"Check if the quantity is compatible with another quantity\n\n    Examples\n    --------\n    >>> wl = Quantity(name=\"Water Level\", unit=\"meter\")\n    >>> ws = Quantity(name=\"Wind Speed\", unit=\"meter per second\")\n    >>> wl.is_compatible(ws)\n    False\n    >>> uq = Quantity(name=\"Undefined\", unit=\"Undefined\")\n    >>> wl.is_compatible(uq)\n    True\n    \"\"\"\n\n    if self == other:\n        return True\n\n    if (self.name == \"Undefined\") or (other.name == \"Undefined\"):\n        return True\n\n    return False\n
"},{"location":"api/settings/","title":"Settings","text":""},{"location":"api/settings/#modelskill.settings","title":"modelskill.settings","text":"

The settings module holds package-wide configurables and provides a uniform API for working with them.

This module is inspired by pandas config module.

Overview

This module supports the following requirements:

  • options are referenced using keys in dot.notation, e.g. \"x.y.option - z\".
  • keys are case-insensitive.
  • functions should accept partial/regex keys, when unambiguous.
  • options can be registered by modules at import time.
  • options have a default value, and (optionally) a description and validation function associated with them.
  • options can be reset to their default value.
  • all option can be reset to their default value at once.
  • all options in a certain sub - namespace can be reset at once.
  • the user can set / get / reset or ask for the description of an option.
  • a developer can register an option.
Implementation
  • Data is stored using nested dictionaries, and should be accessed through the provided API.
  • \"Registered options\" have metadata associated with them, which are stored in auxiliary dictionaries keyed on the fully-qualified key, e.g. \"x.y.z.option\".

Examples:

>>> import modelskill as ms\n>>> ms.options\nmetrics.list : [<function bias at 0x0000029D614A2DD0>, (...)]\nplot.rcparams : {}\nplot.scatter.legend.bbox : {'facecolor': 'white', (...)}\nplot.scatter.legend.fontsize : 12\nplot.scatter.legend.kwargs : {}\nplot.scatter.oneone_line.color : blue\nplot.scatter.oneone_line.label : 1:1\nplot.scatter.points.alpha : 0.5\nplot.scatter.points.label :\nplot.scatter.points.size : 20\nplot.scatter.quantiles.color : darkturquoise\nplot.scatter.quantiles.kwargs : {}\nplot.scatter.quantiles.label : Q-Q\nplot.scatter.quantiles.marker : X\nplot.scatter.quantiles.markeredgecolor : (0, 0, 0, 0.4)\nplot.scatter.quantiles.markeredgewidth : 0.5\nplot.scatter.quantiles.markersize : 3.5\nplot.scatter.reg_line.kwargs : {'color': 'r'}\n>>> ms.set_option(\"plot.scatter.points.size\", 4)\n>>> plot.scatter.points.size\n4\n>>> ms.get_option(\"plot.scatter.points.size\")\n4\n>>> ms.options.plot.scatter.points.size = 10\n>>> ms.options.plot.scatter.points.size\n10\n>>> ms.reset_option(\"plot.scatter.points.size\")\n>>> ms.options.plot.scatter.points.size\n20\n
"},{"location":"api/settings/#modelskill.settings.OptionsContainer","title":"OptionsContainer","text":"

provide attribute-style access to a nested dict of options

Accessed by ms.options

Source code in modelskill/settings.py
class OptionsContainer:\n    \"\"\"provide attribute-style access to a nested dict of options\n\n    Accessed by ms.options\n    \"\"\"\n\n    def __init__(self, d: Dict[str, Any], prefix: str = \"\") -> None:\n        object.__setattr__(self, \"d\", d)\n        object.__setattr__(self, \"prefix\", prefix)\n\n    def __setattr__(self, key: str, val: Any) -> None:\n        prefix = object.__getattribute__(self, \"prefix\")\n        if prefix:\n            prefix += \".\"\n        prefix += key\n        # you can't set new keys\n        # can you can't overwrite subtrees\n        if key in self.d and not isinstance(self.d[key], dict):\n            set_option(prefix, val)\n        else:\n            raise OptionError(\"You can only set the value of existing options\")\n\n    def __getattr__(self, key: str):\n        prefix = object.__getattribute__(self, \"prefix\")\n        if prefix:\n            prefix += \".\"\n        prefix += key\n        try:\n            v = object.__getattribute__(self, \"d\")[key]\n        except KeyError as err:\n            raise OptionError(f\"No such option: {key}\") from err\n        if isinstance(v, dict):\n            return OptionsContainer(v, prefix)\n        else:\n            return get_option(prefix)\n\n    def to_dict(self) -> Dict:\n        \"\"\"Return options as dictionary with full-name keys\"\"\"\n        return _option_to_dict(self.prefix)\n\n    # def search(self, pat: str = \"\") -> List[str]:\n    #     keys = _select_options(f\"{self.prefix}*{pat}\")\n    #     return list(keys)\n\n    def __repr__(self) -> str:\n        return _describe_option_short(self.prefix, False) or \"\"\n\n    def __dir__(self) -> Iterable[str]:\n        return list(self.d.keys())\n
"},{"location":"api/settings/#modelskill.settings.OptionsContainer.to_dict","title":"to_dict","text":"
to_dict()\n

Return options as dictionary with full-name keys

Source code in modelskill/settings.py
def to_dict(self) -> Dict:\n    \"\"\"Return options as dictionary with full-name keys\"\"\"\n    return _option_to_dict(self.prefix)\n
"},{"location":"api/settings/#modelskill.settings.get_option","title":"get_option","text":"
get_option(pat)\n

Get value of a single option matching a pattern

Parameters:

Name Type Description Default pat str

pattern of seeked option

required

Returns:

Type Description Any

value of matched option

Source code in modelskill/settings.py
def get_option(pat: str) -> Any:\n    \"\"\"Get value of a single option matching a pattern\n\n    Parameters\n    ----------\n    pat : str\n        pattern of seeked option\n\n    Returns\n    -------\n    Any\n        value of matched option\n    \"\"\"\n    key = _get_single_key(pat)\n\n    # walk the nested dict\n    root, k = _get_root(key)\n    return root[k]\n
"},{"location":"api/settings/#modelskill.settings.load_style","title":"load_style","text":"
load_style(name)\n

Load a number of options from a named style.

Parameters:

Name Type Description Default name str

Name of the predefined style to load. Available styles are: 'MOOD': Resembling the plots of the www.metocean-on-demand.com data portal.

required

Raises:

Type Description KeyError

If a named style is not found.

Examples:

>>> import modelskill as ms\n>>> ms.load_style('MOOD')\n
Source code in modelskill/settings.py
def load_style(name: str) -> None:\n    \"\"\"Load a number of options from a named style.\n\n    Parameters\n    ----------\n    name : str\n        Name of the predefined style to load. Available styles are:\n        'MOOD': Resembling the plots of the www.metocean-on-demand.com data portal.\n\n    Raises\n    ------\n    KeyError\n        If a named style is not found.\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> ms.load_style('MOOD')\n    \"\"\"\n\n    lname = name.lower()\n\n    # The number of folders to search can be expanded in the future\n    path = Path(__file__).parent / \"styles\"\n    NAMED_STYLES = {x.stem: x for x in path.glob(\"*.yml\")}\n\n    if lname not in NAMED_STYLES:\n        raise KeyError(\n            f\"Style '{name}' not found. Choose from {list(NAMED_STYLES.keys())}\"\n        )\n\n    style_path = NAMED_STYLES[lname]\n\n    with open(style_path, encoding=\"utf-8\") as f:\n        contents = f.read()\n        d = yaml.load(contents, Loader=yaml.FullLoader)\n\n    set_option(d)\n
"},{"location":"api/settings/#modelskill.settings.register_option","title":"register_option","text":"
register_option(key, defval, doc='', validator=None)\n

Register an option in the package-wide modelskill settingss object

Parameters:

Name Type Description Default key str

Fully-qualified key, e.g. \"x.y.option - z\".

required defval object

Default value of the option.

required doc str

Description of the option.

'' validator Callable

Function of a single argument, should raise ValueError if called with a value which is not a legal value for the option.

None

Raises:

Type Description ValueError if `validator` is specified and `defval` is not a valid value. Source code in modelskill/settings.py
def register_option(\n    key: str,\n    defval: object,\n    doc: str = \"\",\n    validator: Optional[Callable[[Any], Any]] = None,\n    # cb: Optional[Callable[[str], Any]] = None,\n) -> None:\n    \"\"\"\n    Register an option in the package-wide modelskill settingss object\n\n    Parameters\n    ----------\n    key : str\n        Fully-qualified key, e.g. \"x.y.option - z\".\n    defval : object\n        Default value of the option.\n    doc : str\n        Description of the option.\n    validator : Callable, optional\n        Function of a single argument, should raise `ValueError` if\n        called with a value which is not a legal value for the option.\n\n    Raises\n    ------\n    ValueError if `validator` is specified and `defval` is not a valid value.\n    \"\"\"\n    import keyword\n    import tokenize\n\n    key = key.lower()\n\n    if key in _registered_options:\n        raise OptionError(f\"Option '{key}' has already been registered\")\n    # if key in _reserved_keys:\n    #     raise OptionError(f\"Option '{key}' is a reserved key\")\n\n    # the default value should be legal\n    if validator:\n        validator(defval)\n\n    # walk the nested dict, creating dicts as needed along the path\n    path = key.split(\".\")\n\n    for k in path:\n        if not re.match(\"^\" + tokenize.Name + \"$\", k):\n            raise ValueError(f\"{k} is not a valid identifier\")\n        if keyword.iskeyword(k):\n            raise ValueError(f\"{k} is a python keyword\")\n\n    cursor = _global_settings\n    msg = \"Path prefix to option '{option}' is already an option\"\n\n    for i, p in enumerate(path[:-1]):\n        if not isinstance(cursor, dict):\n            raise OptionError(msg.format(option=\".\".join(path[:i])))\n        if p not in cursor:\n            cursor[p] = {}\n        cursor = cursor[p]\n\n    if not isinstance(cursor, dict):\n        raise OptionError(msg.format(option=\".\".join(path[:-1])))\n\n    cursor[path[-1]] = defval  # initialize\n\n    # save the option metadata\n    _registered_options[key] = RegisteredOption(\n        key=key,\n        defval=defval,\n        doc=doc,\n        validator=validator,  # , cb=cb\n    )\n
"},{"location":"api/settings/#modelskill.settings.reset_option","title":"reset_option","text":"
reset_option(pat='', silent=False)\n

Reset one or more options (matching a pattern) to the default value

Examples:

>>> ms.options.plot.scatter.points.size\n20\n>>> ms.options.plot.scatter.points.size = 10\n>>> ms.options.plot.scatter.points.size\n10\n>>> ms.reset_option(\"plot.scatter.points.size\")\n>>> ms.options.plot.scatter.points.size\n20\n
Source code in modelskill/settings.py
def reset_option(pat: str = \"\", silent: bool = False) -> None:\n    \"\"\"Reset one or more options (matching a pattern) to the default value\n\n    Examples\n    --------\n    >>> ms.options.plot.scatter.points.size\n    20\n    >>> ms.options.plot.scatter.points.size = 10\n    >>> ms.options.plot.scatter.points.size\n    10\n    >>> ms.reset_option(\"plot.scatter.points.size\")\n    >>> ms.options.plot.scatter.points.size\n    20\n\n    \"\"\"\n\n    keys = _select_options(pat)\n\n    if len(keys) == 0:\n        raise OptionError(\"No such keys(s)\")\n\n    if len(keys) > 1 and len(pat) < 4 and pat != \"all\":\n        raise ValueError(\n            \"You must specify at least 4 characters when \"\n            \"resetting multiple keys, use the special keyword \"\n            '\"all\" to reset all the options to their default value'\n        )\n\n    for k in keys:\n        set_option(k, _registered_options[k].defval, silent=silent)\n
"},{"location":"api/settings/#modelskill.settings.set_option","title":"set_option","text":"
set_option(*args, **kwargs)\n

Set the value of one or more options

Examples:

>>> ms.set_option(\"plot.scatter.points.size\", 4)\n>>> ms.set_option({\"plot.scatter.points.size\": 4})\n>>> ms.options.plot.scatter.points.size = 4\n
Source code in modelskill/settings.py
def set_option(*args, **kwargs) -> None:\n    \"\"\"Set the value of one or more options\n\n    Examples\n    --------\n    >>> ms.set_option(\"plot.scatter.points.size\", 4)\n    >>> ms.set_option({\"plot.scatter.points.size\": 4})\n    >>> ms.options.plot.scatter.points.size = 4\n    \"\"\"\n    # must at least 1 arg deal with constraints later\n\n    if len(args) == 1 and isinstance(args[0], dict):\n        kwargs.update(args[0])\n\n    if len(args) % 2 == 0:\n        keys = args[::2]\n        values = args[1::2]\n        kwargs.update(dict(zip(keys, values)))\n\n    if len(args) > 1 and len(args) % 2 != 0:\n        raise ValueError(\"Must provide a value for each key, i.e. even number of args\")\n\n    # default to false\n    kwargs.pop(\"silent\", False)\n\n    for k, v in kwargs.items():\n        key = _get_single_key(k)  # , silent)\n\n        o = _get_registered_option(key)\n        if o and o.validator:\n            o.validator(v)\n\n        # walk the nested dict\n        root, k = _get_root(key)\n        root[k] = v\n
"},{"location":"api/skill/","title":"Skill","text":""},{"location":"api/skill/#modelskill.skill.SkillTable","title":"modelskill.skill.SkillTable","text":"

SkillTable object for visualization and analysis returned by the comparer's skill method. The object wraps the pd.DataFrame class which can be accessed from the attribute data.

The columns are assumed to be metrics and data for a single metric can be accessed by e.g. s.rmse or s[\"rmse\"]. The resulting object can be used for plotting.

Examples:

>>> sk = cc.skill()\n>>> sk.mod_names\n['SW_1', 'SW_2']\n>>> sk.style()\n>>> sk.sel(model='SW_1').style()\n>>> sk.rmse.plot.bar()\n
Source code in modelskill/skill.py
class SkillTable:\n    \"\"\"\n    SkillTable object for visualization and analysis returned by\n    the comparer's `skill` method. The object wraps the pd.DataFrame\n    class which can be accessed from the attribute `data`.\n\n    The columns are assumed to be metrics and data for a single metric\n    can be accessed by e.g. `s.rmse` or `s[\"rmse\"]`. The resulting object\n    can be used for plotting.\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk.mod_names\n    ['SW_1', 'SW_2']\n    >>> sk.style()\n    >>> sk.sel(model='SW_1').style()\n    >>> sk.rmse.plot.bar()\n    \"\"\"\n\n    _large_is_best_metrics = [\n        \"cc\",\n        \"corrcoef\",\n        \"r2\",\n        \"spearmanr\",\n        \"rho\",\n        \"nash_sutcliffe_efficiency\",\n        \"nse\",\n        \"kge\",\n    ]\n    _small_is_best_metrics = [\n        \"mae\",\n        \"mape\",\n        \"mean_absolute_error\",\n        \"mean_absolute_percentage_error\",\n        \"rmse\",\n        \"root_mean_squared_error\",\n        \"urmse\",\n        \"scatter_index\",\n        \"si\",\n        \"mef\",\n        \"model_efficiency_factor\",\n    ]\n    _one_is_best_metrics = [\"lin_slope\"]\n    _zero_is_best_metrics = [\"bias\"]\n\n    def __init__(self, data: pd.DataFrame):\n        self.data: pd.DataFrame = (\n            data if isinstance(data, pd.DataFrame) else data.to_dataframe()\n        )\n        # TODO remove in v1.1\n        self.plot = DeprecatedSkillPlotter(self)  # type: ignore\n\n    # TODO: remove?\n    @property\n    def _df(self) -> pd.DataFrame:\n        \"\"\"Data as DataFrame without x and y columns\"\"\"\n        return self.to_dataframe(drop_xy=True)\n\n    @property\n    def metrics(self) -> Collection[str]:\n        \"\"\"List of metrics (columns) in the SkillTable\"\"\"\n        return list(self._df.columns)\n\n    # TODO: remove?\n    def __len__(self) -> int:\n        return len(self._df)\n\n    def to_dataframe(self, drop_xy: bool = True) -> pd.DataFrame:\n        \"\"\"Convert SkillTable to pd.DataFrame\n\n        Parameters\n        ----------\n        drop_xy : bool, optional\n            Drop the x, y coordinates?, by default True\n\n        Returns\n        -------\n        pd.DataFrame\n            Skill data as pd.DataFrame\n        \"\"\"\n        if drop_xy:\n            return self.data.drop(columns=[\"x\", \"y\"], errors=\"ignore\")\n        else:\n            return self.data.copy()\n\n    def to_geodataframe(self, crs: str = \"EPSG:4326\") -> gpd.GeoDataFrame:\n        \"\"\"Convert SkillTable to geopandas.GeoDataFrame\n\n        Note: requires geopandas to be installed\n\n        Note: requires x and y columns to be present\n\n        Parameters\n        ----------\n        crs : str, optional\n            Coordinate reference system identifier passed to the\n            GeoDataFrame constructor, by default \"EPSG:4326\"\n\n        Returns\n        -------\n        gpd.GeoDataFrame\n            Skill data as GeoDataFrame\n        \"\"\"\n        import geopandas as gpd\n\n        assert \"x\" in self.data.columns\n        assert \"y\" in self.data.columns\n\n        df = self.to_dataframe(drop_xy=False)\n\n        gdf = gpd.GeoDataFrame(\n            df,\n            geometry=gpd.points_from_xy(df.x, df.y),\n            crs=crs,\n        )\n\n        return gdf\n\n    def __repr__(self) -> str:\n        return repr(self._df)\n\n    def _repr_html_(self) -> Any:\n        return self._df._repr_html_()\n\n    @overload\n    def __getitem__(self, key: Hashable | int) -> SkillArray: ...\n\n    @overload\n    def __getitem__(self, key: Iterable[Hashable]) -> SkillTable: ...\n\n    def __getitem__(\n        self, key: Hashable | Iterable[Hashable]\n    ) -> SkillArray | SkillTable:\n        if isinstance(key, int):\n            key = list(self.data.columns)[key]\n        result = self.data[key]\n        if isinstance(result, pd.Series):\n            # I don't think this should be necessary, but in some cases the input doesn't contain x and y\n            if \"x\" in self.data.columns and \"y\" in self.data.columns:\n                cols = [\"x\", \"y\", key]\n                return SkillArray(self.data[cols])\n            else:\n                return SkillArray(result.to_frame())\n        elif isinstance(result, pd.DataFrame):\n            return SkillTable(result)\n        else:\n            raise NotImplementedError(\"Unexpected type of result\")\n\n    def __getattr__(self, item: str, *args, **kwargs) -> Any:\n        # note: no help from type hints here!\n        if item in self.data.columns:\n            return self[item]  # Redirects to __getitem__\n        else:\n            # act as a DataFrame... (necessary for style() to work)\n            # drawback is that methods such as head() etc would appear\n            # as working but return a DataFrame instead of a SkillTable!\n            return getattr(self.data, item, *args, **kwargs)\n            # raise AttributeError(\n            #     f\"\"\"\n            #         SkillTable has no attribute {item}; Maybe you are\n            #         looking for the corresponding DataFrame attribute?\n            #         Try exporting the skill table to a DataFrame using sk.to_dataframe().\n            #     \"\"\"\n            # )\n\n    @property\n    def iloc(self, *args, **kwargs):  # type: ignore\n        return self.data.iloc(*args, **kwargs)\n\n    @property\n    def loc(self, *args, **kwargs):  # type: ignore\n        return self.data.loc(*args, **kwargs)\n\n    def sort_index(self, *args, **kwargs) -> SkillTable:  # type: ignore\n        \"\"\"Sort by index (level) e.g. sorting by observation\n\n        Wrapping pd.DataFrame.sort_index()\n\n        Returns\n        -------\n        SkillTable\n            A new SkillTable with sorted index\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk.sort_index()\n        >>> sk.sort_index(level=\"observation\")\n        \"\"\"\n        return self.__class__(self.data.sort_index(*args, **kwargs))\n\n    def sort_values(self, *args, **kwargs) -> SkillTable:  # type: ignore\n        \"\"\"Sort by values e.g. sorting by rmse values\n\n        Wrapping pd.DataFrame.sort_values()\n\n        Returns\n        -------\n        SkillTable\n            A new SkillTable with sorted values\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk.sort_values(\"rmse\")\n        >>> sk.sort_values(\"rmse\", ascending=False)\n        >>> sk.sort_values([\"n\", \"rmse\"])\n        \"\"\"\n        return self.__class__(self.data.sort_values(*args, **kwargs))\n\n    def swaplevel(self, *args, **kwargs) -> SkillTable:  # type: ignore\n        \"\"\"Swap the levels of the MultiIndex e.g. swapping 'model' and 'observation'\n\n        Wrapping pd.DataFrame.swaplevel()\n\n        Returns\n        -------\n        SkillTable\n            A new SkillTable with swapped levels\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk.swaplevel().sort_index(level=\"observation\")\n        >>> sk.swaplevel(\"model\", \"observation\")\n        >>> sk.swaplevel(0, 1)\n        \"\"\"\n        return self.__class__(self.data.swaplevel(*args, **kwargs))\n\n    @property\n    def mod_names(self) -> list[str]:\n        \"\"\"List of model names (in index)\"\"\"\n        return self._get_index_level_by_name(\"model\")\n\n    @property\n    def obs_names(self) -> list[str]:\n        \"\"\"List of observation names (in index)\"\"\"\n        return self._get_index_level_by_name(\"observation\")\n\n    @property\n    def quantity_names(self) -> list[str]:\n        \"\"\"List of quantity names (in index)\"\"\"\n        return self._get_index_level_by_name(\"quantity\")\n\n    def _get_index_level_by_name(self, name: str) -> list[str]:\n        # Helper function to get unique values of a level in the index (e.g. model)\n        index = self._df.index\n        if name in index.names:\n            level = index.names.index(name)\n            return list(index.get_level_values(level).unique())\n        else:\n            return []\n            # raise ValueError(f\"name {name} not in index {list(self.index.names)}\")\n\n    def query(self, query: str) -> SkillTable:\n        \"\"\"Select a subset of the SkillTable by a query string\n\n        wrapping pd.DataFrame.query()\n\n        Parameters\n        ----------\n        query : str\n            string supported by pd.DataFrame.query()\n\n        Returns\n        -------\n        SkillTable\n            A subset of the original SkillTable\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk_above_0p3 = sk.query(\"rmse>0.3\")\n        \"\"\"\n        return self.__class__(self.data.query(query))\n\n    def sel(\n        self, query: str | None = None, reduce_index: bool = True, **kwargs: Any\n    ) -> SkillTable | SkillArray:\n        \"\"\"Select a subset of the SkillTable by a query,\n           (part of) the index, or specific columns\n\n        Parameters\n        ----------\n        reduce_index : bool, optional\n            Should unnecessary levels of the index be removed after subsetting?\n            Removed levels will stay as columns. By default True\n        **kwargs\n            Concrete keys depend on the index names of the SkillTable\n            (from the \"by\" argument in cc.skill() method)\n            \"model\"=... to select specific models,\n            \"observation\"=... to select specific observations\n\n        Returns\n        -------\n        SkillTable\n            A subset of the original SkillTable\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk_SW1 = sk.sel(model = \"SW_1\")\n        >>> sk2 = sk.sel(observation = [\"EPL\", \"HKNA\"])\n        \"\"\"\n        if query is not None:\n            warnings.warn(\n                \"s.sel(query=...) is deprecated, use s.query(...) instead\",\n                FutureWarning,\n            )\n            return self.query(query)\n\n        for key, value in kwargs.items():\n            if key == \"metrics\" or key == \"columns\":\n                warnings.warn(\n                    f\"s.sel({key}=...) is deprecated, use getitem s[...] instead\",\n                    FutureWarning,\n                )\n                return self[value]  # type: ignore\n\n        df = self.to_dataframe(drop_xy=False)\n\n        for key, value in kwargs.items():\n            if key in df.index.names:\n                df = self._sel_from_index(df, key, value)\n            else:\n                raise KeyError(\n                    f\"Unknown index {key}. Valid index names are {df.index.names}\"\n                )\n\n        if isinstance(df, pd.Series):\n            return SkillArray(df)\n        if reduce_index and isinstance(df.index, pd.MultiIndex):\n            df = self._reduce_index(df)\n        return self.__class__(df)\n\n    def _sel_from_index(\n        self, df: pd.DataFrame, key: str, value: str | int\n    ) -> pd.DataFrame:\n        if (not isinstance(value, str)) and isinstance(value, Iterable):\n            for i, v in enumerate(value):\n                dfi = self._sel_from_index(df, key, v)\n                if i == 0:\n                    dfout = dfi\n                else:\n                    dfout = pd.concat([dfout, dfi])\n            return dfout\n\n        if isinstance(value, int):\n            value = self._idx_to_name(key, value)\n\n        if isinstance(df.index, pd.MultiIndex):\n            df = df.xs(value, level=key, drop_level=False)\n        else:\n            df = df[df.index == value]  # .copy()\n        return df\n\n    def _idx_to_name(self, index_name: str, pos: int) -> str:\n        \"\"\"Assumes that index is valid and idx is int\"\"\"\n        names = self._get_index_level_by_name(index_name)\n        n = len(names)\n        if (pos < 0) or (pos >= n):\n            raise KeyError(f\"Id {pos} is out of bounds for index {index_name} (0, {n})\")\n        return names[pos]\n\n    def _reduce_index(self, df: pd.DataFrame) -> pd.DataFrame:\n        \"\"\"Remove unnecessary levels of MultiIndex\"\"\"\n        df.index = df.index.remove_unused_levels()\n        levels_to_reset = []\n        for j, level in enumerate(df.index.levels):\n            if len(level) == 1:\n                levels_to_reset.append(j)\n        return df.reset_index(level=levels_to_reset)\n\n    def round(self, decimals: int = 3) -> SkillTable:\n        \"\"\"Round all values in SkillTable\n\n        Parameters\n        ----------\n        decimals : int, optional\n            Number of decimal places to round to (default: 3).\n            If decimals is negative, it specifies the number of\n            positions to the left of the decimal point.\n\n        Returns\n        -------\n        SkillTable\n            A new SkillTable with rounded values\n        \"\"\"\n\n        return self.__class__(self.data.round(decimals=decimals))\n\n    def style(\n        self,\n        decimals: int = 3,\n        metrics: Iterable[str] | None = None,\n        cmap: str = \"OrRd\",\n        show_best: bool = True,\n        **kwargs: Any,\n    ) -> pd.io.formats.style.Styler:\n        \"\"\"Style SkillTable with colors using pandas style\n\n        Parameters\n        ----------\n        decimals : int, optional\n            Number of decimal places to round to (default: 3).\n        metrics : str or List[str], optional\n            apply background gradient color to these columns, by default all;\n            if columns is [] then no background gradient will be applied.\n        cmap : str, optional\n            colormap of background gradient, by default \"OrRd\",\n            except \"bias\" column which will always be \"coolwarm\"\n        show_best : bool, optional\n            indicate best of each column by underline, by default True\n\n        Returns\n        -------\n        pd.Styler\n            Returns a pandas Styler object.\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk.style()\n        >>> sk.style(precision=1, metrics=\"rmse\")\n        >>> sk.style(cmap=\"Blues\", show_best=False)\n        \"\"\"\n        # identity metric columns\n        float_cols = list(self._df.select_dtypes(include=\"number\").columns)\n\n        if \"precision\" in kwargs:\n            warnings.warn(\n                FutureWarning(\n                    \"precision is deprecated, it has been renamed to decimals\"\n                )\n            )\n            decimals = kwargs[\"precision\"]\n\n        # selected columns\n        if metrics is None:\n            metrics = float_cols\n        else:\n            if isinstance(metrics, str):\n                if not metrics:\n                    metrics = []\n                else:\n                    metrics = [metrics]\n            for column in metrics:\n                if column not in float_cols:\n                    raise ValueError(\n                        f\"Invalid column name {column} (must be one of {float_cols})\"\n                    )\n\n        sdf = self._df.style.format(precision=decimals)\n\n        # apply background gradient\n        bg_cols = list(set(metrics) & set(float_cols))\n        if \"bias\" in bg_cols:\n            mm = self._df.bias.abs().max()\n            sdf = sdf.background_gradient(\n                subset=[\"bias\"], cmap=\"coolwarm\", vmin=-mm, vmax=mm\n            )\n            bg_cols.remove(\"bias\")\n        if \"lin_slope\" in bg_cols:\n            mm = (self._df.lin_slope - 1).abs().max()\n            sdf = sdf.background_gradient(\n                subset=[\"lin_slope\"], cmap=\"coolwarm\", vmin=(1 - mm), vmax=(1 + mm)\n            )\n            bg_cols.remove(\"lin_slope\")\n        if len(bg_cols) > 0:\n            cols = list(set(self._small_is_best_metrics) & set(bg_cols))\n            sdf = sdf.background_gradient(subset=cols, cmap=cmap)\n\n            cols = list(set(self._large_is_best_metrics) & set(bg_cols))\n            cmap_r = self._reverse_colormap(cmap)  # type: ignore\n            sdf = sdf.background_gradient(subset=cols, cmap=cmap_r)\n\n        if show_best:\n            cols = list(set(self._large_is_best_metrics) & set(float_cols))\n            sdf = sdf.apply(self._style_max, subset=cols)\n            cols = list(set(self._small_is_best_metrics) & set(float_cols))\n            sdf = sdf.apply(self._style_min, subset=cols)\n            cols = list(set(self._one_is_best_metrics) & set(float_cols))\n            sdf = sdf.apply(self._style_one_best, subset=cols)\n            if \"bias\" in float_cols:\n                sdf = sdf.apply(self._style_abs_min, subset=[\"bias\"])\n\n        return sdf\n\n    def _reverse_colormap(self, cmap):  # type: ignore\n        cmap_r = cmap\n        if isinstance(cmap, str):\n            if cmap[-2:] == \"_r\":\n                cmap_r = cmap_r[:-2]\n            else:\n                cmap_r = cmap + \"_r\"\n        else:\n            cmap_r = cmap.reversed()\n        return cmap_r\n\n    def _style_one_best(self, s: pd.Series) -> list[str]:\n        \"\"\"Using underline-etc to highlight the best in a Series.\"\"\"\n        is_best = (s - 1.0).abs() == (s - 1.0).abs().min()\n        cell_style = (\n            \"text-decoration: underline; font-style: italic; font-weight: bold;\"\n        )\n        return [cell_style if v else \"\" for v in is_best]\n\n    def _style_abs_min(self, s: pd.Series) -> list[str]:\n        \"\"\"Using underline-etc to highlight the best in a Series.\"\"\"\n        is_best = s.abs() == s.abs().min()\n        cell_style = (\n            \"text-decoration: underline; font-style: italic; font-weight: bold;\"\n        )\n        return [cell_style if v else \"\" for v in is_best]\n\n    def _style_min(self, s: pd.Series) -> list[str]:\n        \"\"\"Using underline-etc to highlight the best in a Series.\"\"\"\n        cell_style = (\n            \"text-decoration: underline; font-style: italic; font-weight: bold;\"\n        )\n        return [cell_style if v else \"\" for v in (s == s.min())]\n\n    def _style_max(self, s: pd.Series) -> list[str]:\n        \"\"\"Using underline-etc to highlight the best in a Series.\"\"\"\n        cell_style = (\n            \"text-decoration: underline; font-style: italic; font-weight: bold;\"\n        )\n        return [cell_style if v else \"\" for v in (s == s.max())]\n\n    # =============== Deprecated methods ===============\n\n    # TODO: remove plot_* methods in v1.1; warnings are not needed\n    # as the refering method is also deprecated\n    def plot_line(self, **kwargs):  # type: ignore\n        return self.plot.line(**kwargs)  # type: ignore\n\n    def plot_bar(self, **kwargs):  # type: ignore\n        return self.plot.bar(**kwargs)  # type: ignore\n\n    def plot_barh(self, **kwargs):  # type: ignore\n        return self.plot.barh(**kwargs)  # type: ignore\n\n    def plot_grid(self, **kwargs):  # type: ignore\n        return self.plot.grid(**kwargs)  # type: ignore\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.metrics","title":"metrics property","text":"
metrics\n

List of metrics (columns) in the SkillTable

"},{"location":"api/skill/#modelskill.skill.SkillTable.mod_names","title":"mod_names property","text":"
mod_names\n

List of model names (in index)

"},{"location":"api/skill/#modelskill.skill.SkillTable.obs_names","title":"obs_names property","text":"
obs_names\n

List of observation names (in index)

"},{"location":"api/skill/#modelskill.skill.SkillTable.quantity_names","title":"quantity_names property","text":"
quantity_names\n

List of quantity names (in index)

"},{"location":"api/skill/#modelskill.skill.SkillTable.query","title":"query","text":"
query(query)\n

Select a subset of the SkillTable by a query string

wrapping pd.DataFrame.query()

Parameters:

Name Type Description Default query str

string supported by pd.DataFrame.query()

required

Returns:

Type Description SkillTable

A subset of the original SkillTable

Examples:

>>> sk = cc.skill()\n>>> sk_above_0p3 = sk.query(\"rmse>0.3\")\n
Source code in modelskill/skill.py
def query(self, query: str) -> SkillTable:\n    \"\"\"Select a subset of the SkillTable by a query string\n\n    wrapping pd.DataFrame.query()\n\n    Parameters\n    ----------\n    query : str\n        string supported by pd.DataFrame.query()\n\n    Returns\n    -------\n    SkillTable\n        A subset of the original SkillTable\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk_above_0p3 = sk.query(\"rmse>0.3\")\n    \"\"\"\n    return self.__class__(self.data.query(query))\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.round","title":"round","text":"
round(decimals=3)\n

Round all values in SkillTable

Parameters:

Name Type Description Default decimals int

Number of decimal places to round to (default: 3). If decimals is negative, it specifies the number of positions to the left of the decimal point.

3

Returns:

Type Description SkillTable

A new SkillTable with rounded values

Source code in modelskill/skill.py
def round(self, decimals: int = 3) -> SkillTable:\n    \"\"\"Round all values in SkillTable\n\n    Parameters\n    ----------\n    decimals : int, optional\n        Number of decimal places to round to (default: 3).\n        If decimals is negative, it specifies the number of\n        positions to the left of the decimal point.\n\n    Returns\n    -------\n    SkillTable\n        A new SkillTable with rounded values\n    \"\"\"\n\n    return self.__class__(self.data.round(decimals=decimals))\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.sel","title":"sel","text":"
sel(query=None, reduce_index=True, **kwargs)\n

Select a subset of the SkillTable by a query, (part of) the index, or specific columns

Parameters:

Name Type Description Default reduce_index bool

Should unnecessary levels of the index be removed after subsetting? Removed levels will stay as columns. By default True

True **kwargs Any

Concrete keys depend on the index names of the SkillTable (from the \"by\" argument in cc.skill() method) \"model\"=... to select specific models, \"observation\"=... to select specific observations

{}

Returns:

Type Description SkillTable

A subset of the original SkillTable

Examples:

>>> sk = cc.skill()\n>>> sk_SW1 = sk.sel(model = \"SW_1\")\n>>> sk2 = sk.sel(observation = [\"EPL\", \"HKNA\"])\n
Source code in modelskill/skill.py
def sel(\n    self, query: str | None = None, reduce_index: bool = True, **kwargs: Any\n) -> SkillTable | SkillArray:\n    \"\"\"Select a subset of the SkillTable by a query,\n       (part of) the index, or specific columns\n\n    Parameters\n    ----------\n    reduce_index : bool, optional\n        Should unnecessary levels of the index be removed after subsetting?\n        Removed levels will stay as columns. By default True\n    **kwargs\n        Concrete keys depend on the index names of the SkillTable\n        (from the \"by\" argument in cc.skill() method)\n        \"model\"=... to select specific models,\n        \"observation\"=... to select specific observations\n\n    Returns\n    -------\n    SkillTable\n        A subset of the original SkillTable\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk_SW1 = sk.sel(model = \"SW_1\")\n    >>> sk2 = sk.sel(observation = [\"EPL\", \"HKNA\"])\n    \"\"\"\n    if query is not None:\n        warnings.warn(\n            \"s.sel(query=...) is deprecated, use s.query(...) instead\",\n            FutureWarning,\n        )\n        return self.query(query)\n\n    for key, value in kwargs.items():\n        if key == \"metrics\" or key == \"columns\":\n            warnings.warn(\n                f\"s.sel({key}=...) is deprecated, use getitem s[...] instead\",\n                FutureWarning,\n            )\n            return self[value]  # type: ignore\n\n    df = self.to_dataframe(drop_xy=False)\n\n    for key, value in kwargs.items():\n        if key in df.index.names:\n            df = self._sel_from_index(df, key, value)\n        else:\n            raise KeyError(\n                f\"Unknown index {key}. Valid index names are {df.index.names}\"\n            )\n\n    if isinstance(df, pd.Series):\n        return SkillArray(df)\n    if reduce_index and isinstance(df.index, pd.MultiIndex):\n        df = self._reduce_index(df)\n    return self.__class__(df)\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.sort_index","title":"sort_index","text":"
sort_index(*args, **kwargs)\n

Sort by index (level) e.g. sorting by observation

Wrapping pd.DataFrame.sort_index()

Returns:

Type Description SkillTable

A new SkillTable with sorted index

Examples:

>>> sk = cc.skill()\n>>> sk.sort_index()\n>>> sk.sort_index(level=\"observation\")\n
Source code in modelskill/skill.py
def sort_index(self, *args, **kwargs) -> SkillTable:  # type: ignore\n    \"\"\"Sort by index (level) e.g. sorting by observation\n\n    Wrapping pd.DataFrame.sort_index()\n\n    Returns\n    -------\n    SkillTable\n        A new SkillTable with sorted index\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk.sort_index()\n    >>> sk.sort_index(level=\"observation\")\n    \"\"\"\n    return self.__class__(self.data.sort_index(*args, **kwargs))\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.sort_values","title":"sort_values","text":"
sort_values(*args, **kwargs)\n

Sort by values e.g. sorting by rmse values

Wrapping pd.DataFrame.sort_values()

Returns:

Type Description SkillTable

A new SkillTable with sorted values

Examples:

>>> sk = cc.skill()\n>>> sk.sort_values(\"rmse\")\n>>> sk.sort_values(\"rmse\", ascending=False)\n>>> sk.sort_values([\"n\", \"rmse\"])\n
Source code in modelskill/skill.py
def sort_values(self, *args, **kwargs) -> SkillTable:  # type: ignore\n    \"\"\"Sort by values e.g. sorting by rmse values\n\n    Wrapping pd.DataFrame.sort_values()\n\n    Returns\n    -------\n    SkillTable\n        A new SkillTable with sorted values\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk.sort_values(\"rmse\")\n    >>> sk.sort_values(\"rmse\", ascending=False)\n    >>> sk.sort_values([\"n\", \"rmse\"])\n    \"\"\"\n    return self.__class__(self.data.sort_values(*args, **kwargs))\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.style","title":"style","text":"
style(decimals=3, metrics=None, cmap='OrRd', show_best=True, **kwargs)\n

Style SkillTable with colors using pandas style

Parameters:

Name Type Description Default decimals int

Number of decimal places to round to (default: 3).

3 metrics str or List[str]

apply background gradient color to these columns, by default all; if columns is [] then no background gradient will be applied.

None cmap str

colormap of background gradient, by default \"OrRd\", except \"bias\" column which will always be \"coolwarm\"

'OrRd' show_best bool

indicate best of each column by underline, by default True

True

Returns:

Type Description Styler

Returns a pandas Styler object.

Examples:

>>> sk = cc.skill()\n>>> sk.style()\n>>> sk.style(precision=1, metrics=\"rmse\")\n>>> sk.style(cmap=\"Blues\", show_best=False)\n
Source code in modelskill/skill.py
def style(\n    self,\n    decimals: int = 3,\n    metrics: Iterable[str] | None = None,\n    cmap: str = \"OrRd\",\n    show_best: bool = True,\n    **kwargs: Any,\n) -> pd.io.formats.style.Styler:\n    \"\"\"Style SkillTable with colors using pandas style\n\n    Parameters\n    ----------\n    decimals : int, optional\n        Number of decimal places to round to (default: 3).\n    metrics : str or List[str], optional\n        apply background gradient color to these columns, by default all;\n        if columns is [] then no background gradient will be applied.\n    cmap : str, optional\n        colormap of background gradient, by default \"OrRd\",\n        except \"bias\" column which will always be \"coolwarm\"\n    show_best : bool, optional\n        indicate best of each column by underline, by default True\n\n    Returns\n    -------\n    pd.Styler\n        Returns a pandas Styler object.\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk.style()\n    >>> sk.style(precision=1, metrics=\"rmse\")\n    >>> sk.style(cmap=\"Blues\", show_best=False)\n    \"\"\"\n    # identity metric columns\n    float_cols = list(self._df.select_dtypes(include=\"number\").columns)\n\n    if \"precision\" in kwargs:\n        warnings.warn(\n            FutureWarning(\n                \"precision is deprecated, it has been renamed to decimals\"\n            )\n        )\n        decimals = kwargs[\"precision\"]\n\n    # selected columns\n    if metrics is None:\n        metrics = float_cols\n    else:\n        if isinstance(metrics, str):\n            if not metrics:\n                metrics = []\n            else:\n                metrics = [metrics]\n        for column in metrics:\n            if column not in float_cols:\n                raise ValueError(\n                    f\"Invalid column name {column} (must be one of {float_cols})\"\n                )\n\n    sdf = self._df.style.format(precision=decimals)\n\n    # apply background gradient\n    bg_cols = list(set(metrics) & set(float_cols))\n    if \"bias\" in bg_cols:\n        mm = self._df.bias.abs().max()\n        sdf = sdf.background_gradient(\n            subset=[\"bias\"], cmap=\"coolwarm\", vmin=-mm, vmax=mm\n        )\n        bg_cols.remove(\"bias\")\n    if \"lin_slope\" in bg_cols:\n        mm = (self._df.lin_slope - 1).abs().max()\n        sdf = sdf.background_gradient(\n            subset=[\"lin_slope\"], cmap=\"coolwarm\", vmin=(1 - mm), vmax=(1 + mm)\n        )\n        bg_cols.remove(\"lin_slope\")\n    if len(bg_cols) > 0:\n        cols = list(set(self._small_is_best_metrics) & set(bg_cols))\n        sdf = sdf.background_gradient(subset=cols, cmap=cmap)\n\n        cols = list(set(self._large_is_best_metrics) & set(bg_cols))\n        cmap_r = self._reverse_colormap(cmap)  # type: ignore\n        sdf = sdf.background_gradient(subset=cols, cmap=cmap_r)\n\n    if show_best:\n        cols = list(set(self._large_is_best_metrics) & set(float_cols))\n        sdf = sdf.apply(self._style_max, subset=cols)\n        cols = list(set(self._small_is_best_metrics) & set(float_cols))\n        sdf = sdf.apply(self._style_min, subset=cols)\n        cols = list(set(self._one_is_best_metrics) & set(float_cols))\n        sdf = sdf.apply(self._style_one_best, subset=cols)\n        if \"bias\" in float_cols:\n            sdf = sdf.apply(self._style_abs_min, subset=[\"bias\"])\n\n    return sdf\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.swaplevel","title":"swaplevel","text":"
swaplevel(*args, **kwargs)\n

Swap the levels of the MultiIndex e.g. swapping 'model' and 'observation'

Wrapping pd.DataFrame.swaplevel()

Returns:

Type Description SkillTable

A new SkillTable with swapped levels

Examples:

>>> sk = cc.skill()\n>>> sk.swaplevel().sort_index(level=\"observation\")\n>>> sk.swaplevel(\"model\", \"observation\")\n>>> sk.swaplevel(0, 1)\n
Source code in modelskill/skill.py
def swaplevel(self, *args, **kwargs) -> SkillTable:  # type: ignore\n    \"\"\"Swap the levels of the MultiIndex e.g. swapping 'model' and 'observation'\n\n    Wrapping pd.DataFrame.swaplevel()\n\n    Returns\n    -------\n    SkillTable\n        A new SkillTable with swapped levels\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk.swaplevel().sort_index(level=\"observation\")\n    >>> sk.swaplevel(\"model\", \"observation\")\n    >>> sk.swaplevel(0, 1)\n    \"\"\"\n    return self.__class__(self.data.swaplevel(*args, **kwargs))\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.to_dataframe","title":"to_dataframe","text":"
to_dataframe(drop_xy=True)\n

Convert SkillTable to pd.DataFrame

Parameters:

Name Type Description Default drop_xy bool

Drop the x, y coordinates?, by default True

True

Returns:

Type Description DataFrame

Skill data as pd.DataFrame

Source code in modelskill/skill.py
def to_dataframe(self, drop_xy: bool = True) -> pd.DataFrame:\n    \"\"\"Convert SkillTable to pd.DataFrame\n\n    Parameters\n    ----------\n    drop_xy : bool, optional\n        Drop the x, y coordinates?, by default True\n\n    Returns\n    -------\n    pd.DataFrame\n        Skill data as pd.DataFrame\n    \"\"\"\n    if drop_xy:\n        return self.data.drop(columns=[\"x\", \"y\"], errors=\"ignore\")\n    else:\n        return self.data.copy()\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.to_geodataframe","title":"to_geodataframe","text":"
to_geodataframe(crs='EPSG:4326')\n

Convert SkillTable to geopandas.GeoDataFrame

Note: requires geopandas to be installed

Note: requires x and y columns to be present

Parameters:

Name Type Description Default crs str

Coordinate reference system identifier passed to the GeoDataFrame constructor, by default \"EPSG:4326\"

'EPSG:4326'

Returns:

Type Description GeoDataFrame

Skill data as GeoDataFrame

Source code in modelskill/skill.py
def to_geodataframe(self, crs: str = \"EPSG:4326\") -> gpd.GeoDataFrame:\n    \"\"\"Convert SkillTable to geopandas.GeoDataFrame\n\n    Note: requires geopandas to be installed\n\n    Note: requires x and y columns to be present\n\n    Parameters\n    ----------\n    crs : str, optional\n        Coordinate reference system identifier passed to the\n        GeoDataFrame constructor, by default \"EPSG:4326\"\n\n    Returns\n    -------\n    gpd.GeoDataFrame\n        Skill data as GeoDataFrame\n    \"\"\"\n    import geopandas as gpd\n\n    assert \"x\" in self.data.columns\n    assert \"y\" in self.data.columns\n\n    df = self.to_dataframe(drop_xy=False)\n\n    gdf = gpd.GeoDataFrame(\n        df,\n        geometry=gpd.points_from_xy(df.x, df.y),\n        crs=crs,\n    )\n\n    return gdf\n
"},{"location":"api/skill/#modelskill.skill.SkillArray","title":"modelskill.skill.SkillArray","text":"

SkillArray object for visualization obtained by selecting a single metric from a SkillTable.

Examples:

>>> sk = cc.skill()   # SkillTable\n>>> sk.rmse           # SkillArray\n>>> sk.rmse.plot.line()\n
Source code in modelskill/skill.py
class SkillArray:\n    \"\"\"SkillArray object for visualization obtained by\n    selecting a single metric from a SkillTable.\n\n    Examples\n    --------\n    >>> sk = cc.skill()   # SkillTable\n    >>> sk.rmse           # SkillArray\n    >>> sk.rmse.plot.line()\n    \"\"\"\n\n    def __init__(self, data: pd.DataFrame) -> None:\n        self.data = data\n        self._ser = data.iloc[:, -1]  # last column is the metric\n\n        self.plot = SkillArrayPlotter(self)\n        \"\"\"Plot using the SkillArrayPlotter\n\n        Examples\n        --------\n        >>> sk.rmse.plot.line()\n        >>> sk.rmse.plot.bar()\n        >>> sk.rmse.plot.barh()\n        >>> sk.rmse.plot.grid()\n        \"\"\"\n\n    def to_dataframe(self, drop_xy: bool = True) -> pd.DataFrame:\n        \"\"\"Convert SkillArray to pd.DataFrame\n\n        Parameters\n        ----------\n        drop_xy : bool, optional\n            Drop the x, y coordinates?, by default True\n\n        Returns\n        -------\n        pd.DataFrame\n            Skill data as pd.DataFrame\n        \"\"\"\n        if drop_xy:\n            return self._ser.to_frame()\n        else:\n            return self.data.copy()\n\n    def __repr__(self) -> str:\n        return repr(self.to_dataframe())\n\n    def _repr_html_(self) -> Any:\n        return self.to_dataframe()._repr_html_()\n\n    @property\n    def name(self) -> Any:\n        \"\"\"Name of the metric\"\"\"\n        return self._ser.name\n\n    def to_geodataframe(self, crs: str = \"EPSG:4326\") -> gpd.GeoDataFrame:\n        \"\"\"Convert SkillArray to geopandas.GeoDataFrame\n\n        Note: requires geopandas to be installed\n\n        Note: requires x and y columns to be present\n\n        Parameters\n        ----------\n        crs : str, optional\n            Coordinate reference system identifier passed to the\n            GeoDataFrame constructor, by default \"EPSG:4326\"\n\n        Returns\n        -------\n        gpd.GeoDataFrame\n            Skill data as GeoDataFrame\n        \"\"\"\n        import geopandas as gpd\n\n        assert \"x\" in self.data.columns\n        assert \"y\" in self.data.columns\n\n        gdf = gpd.GeoDataFrame(\n            self._ser,\n            geometry=gpd.points_from_xy(self.data.x, self.data.y),\n            crs=crs,\n        )\n\n        return gdf\n
"},{"location":"api/skill/#modelskill.skill.SkillArray.name","title":"name property","text":"
name\n

Name of the metric

"},{"location":"api/skill/#modelskill.skill.SkillArray.plot","title":"plot instance-attribute","text":"
plot = SkillArrayPlotter(self)\n

Plot using the SkillArrayPlotter

Examples:

>>> sk.rmse.plot.line()\n>>> sk.rmse.plot.bar()\n>>> sk.rmse.plot.barh()\n>>> sk.rmse.plot.grid()\n
"},{"location":"api/skill/#modelskill.skill.SkillArray.to_dataframe","title":"to_dataframe","text":"
to_dataframe(drop_xy=True)\n

Convert SkillArray to pd.DataFrame

Parameters:

Name Type Description Default drop_xy bool

Drop the x, y coordinates?, by default True

True

Returns:

Type Description DataFrame

Skill data as pd.DataFrame

Source code in modelskill/skill.py
def to_dataframe(self, drop_xy: bool = True) -> pd.DataFrame:\n    \"\"\"Convert SkillArray to pd.DataFrame\n\n    Parameters\n    ----------\n    drop_xy : bool, optional\n        Drop the x, y coordinates?, by default True\n\n    Returns\n    -------\n    pd.DataFrame\n        Skill data as pd.DataFrame\n    \"\"\"\n    if drop_xy:\n        return self._ser.to_frame()\n    else:\n        return self.data.copy()\n
"},{"location":"api/skill/#modelskill.skill.SkillArray.to_geodataframe","title":"to_geodataframe","text":"
to_geodataframe(crs='EPSG:4326')\n

Convert SkillArray to geopandas.GeoDataFrame

Note: requires geopandas to be installed

Note: requires x and y columns to be present

Parameters:

Name Type Description Default crs str

Coordinate reference system identifier passed to the GeoDataFrame constructor, by default \"EPSG:4326\"

'EPSG:4326'

Returns:

Type Description GeoDataFrame

Skill data as GeoDataFrame

Source code in modelskill/skill.py
def to_geodataframe(self, crs: str = \"EPSG:4326\") -> gpd.GeoDataFrame:\n    \"\"\"Convert SkillArray to geopandas.GeoDataFrame\n\n    Note: requires geopandas to be installed\n\n    Note: requires x and y columns to be present\n\n    Parameters\n    ----------\n    crs : str, optional\n        Coordinate reference system identifier passed to the\n        GeoDataFrame constructor, by default \"EPSG:4326\"\n\n    Returns\n    -------\n    gpd.GeoDataFrame\n        Skill data as GeoDataFrame\n    \"\"\"\n    import geopandas as gpd\n\n    assert \"x\" in self.data.columns\n    assert \"y\" in self.data.columns\n\n    gdf = gpd.GeoDataFrame(\n        self._ser,\n        geometry=gpd.points_from_xy(self.data.x, self.data.y),\n        crs=crs,\n    )\n\n    return gdf\n
"},{"location":"api/skill/#modelskill.skill.SkillArrayPlotter","title":"modelskill.skill.SkillArrayPlotter","text":"

SkillArrayPlotter object for visualization of a single metric (SkillArray)

plot.line() : line plot plot.bar() : bar chart plot.barh() : horizontal bar chart plot.grid() : colored grid

Source code in modelskill/skill.py
class SkillArrayPlotter:\n    \"\"\"SkillArrayPlotter object for visualization of a single metric (SkillArray)\n\n    plot.line() : line plot\n    plot.bar() : bar chart\n    plot.barh() : horizontal bar chart\n    plot.grid() : colored grid\n    \"\"\"\n\n    def __init__(self, skillarray: \"SkillArray\") -> None:\n        self.skillarray = skillarray\n\n    def _name_to_title_in_kwargs(self, kwargs: Any) -> None:\n        if \"title\" not in kwargs:\n            if self.skillarray.name is not None:\n                kwargs[\"title\"] = self.skillarray.name\n\n    def _get_plot_df(self, level: int | str = 0) -> pd.DataFrame:\n        ser = self.skillarray._ser\n        if isinstance(ser.index, pd.MultiIndex):\n            df = ser.unstack(level=level)\n        else:\n            df = ser.to_frame()\n        return df\n\n    # TODO hide this for now until we are certain about the API\n    # def map(self, **kwargs):\n    #     if \"model\" in self.skillarray.data.index.names:\n    #         n_models = len(self.skillarray.data.reset_index().model.unique())\n    #         if n_models > 1:\n    #             raise ValueError(\n    #                 \"map() is only possible for single model skill. Use .sel(model=...) to select a single model.\"\n    #             )\n\n    #     gdf = self.skillarray.to_geodataframe()\n    #     column = self.skillarray.name\n    #     kwargs = {\"marker_kwds\": {\"radius\": 10}} | kwargs\n\n    #     return gdf.explore(column=column, **kwargs)\n\n    def __call__(self, *args: Any, **kwds: Any) -> Any:\n        raise NotImplementedError(\n            \"It is not possible to call plot directly (has no default)! Use one of the plot methods explicitly e.g. plot.line() or plot.bar()\"\n        )\n\n    def line(\n        self,\n        level: int | str = 0,\n        **kwargs: Any,\n    ) -> Axes:\n        \"\"\"Plot statistic as a lines using pd.DataFrame.plot.line()\n\n        Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations\n\n        Parameters\n        ----------\n        level : int or str, optional\n            level to unstack, by default 0\n        **kwargs\n            key word arguments to be pased to pd.DataFrame.plot.line()\n            e.g. marker, title, figsize, ...\n\n        Examples\n        --------\n        >>> sk = cc.skill()[\"rmse\"]\n        >>> sk.plot.line()\n        >>> sk.plot.line(marker=\"o\", linestyle=':')\n        >>> sk.plot.line(color=['0.2', '0.4', '0.6'])\n        \"\"\"\n        df = self._get_plot_df(level=level)\n        self._name_to_title_in_kwargs(kwargs)\n        axes = df.plot.line(**kwargs)\n\n        xlabels = list(df.index)\n        numeric_index = all(isinstance(item, (int, float)) for item in xlabels)\n\n        if not isinstance(axes, Iterable):\n            axes = [axes]\n        for ax in axes:\n            if not isinstance(df.index, pd.DatetimeIndex):\n                if numeric_index:\n                    xlabel_positions = xlabels\n                else:\n                    xlabel_positions = np.arange(len(xlabels)).tolist()\n                ax.set_xticks(xlabel_positions)\n                ax.set_xticklabels(xlabels, rotation=90)\n        return axes\n\n    def bar(self, level: int | str = 0, **kwargs: Any) -> Axes:\n        \"\"\"Plot statistic as bar chart using pd.DataFrame.plot.bar()\n\n        Parameters\n        ----------\n        level : int or str, optional\n            level to unstack, by default 0\n        **kwargs\n            key word arguments to be pased to pd.DataFrame.plot.bar()\n            e.g. color, title, figsize, ...\n\n        Returns\n        -------\n        AxesSubplot\n\n        Examples\n        --------\n        >>> sk = cc.skill()[\"rmse\"]\n        >>> sk.plot.bar()\n        >>> sk.plot.bar(level=\"observation\")\n        >>> sk.plot.bar(title=\"Root Mean Squared Error\")\n        >>> sk.plot.bar(color=[\"red\",\"blue\"])\n        \"\"\"\n        df = self._get_plot_df(level=level)\n        self._name_to_title_in_kwargs(kwargs)\n        return df.plot.bar(**kwargs)\n\n    def barh(self, level: int | str = 0, **kwargs: Any) -> Axes:\n        \"\"\"Plot statistic as horizontal bar chart using pd.DataFrame.plot.barh()\n\n        Parameters\n        ----------\n        level : int or str, optional\n            level to unstack, by default 0\n        **kwargs\n            key word arguments to be passed to pd.DataFrame.plot.barh()\n            e.g. color, title, figsize, ...\n\n        Returns\n        -------\n        AxesSubplot\n\n        Examples\n        --------\n        >>> sk = cc.skill()[\"rmse\"]\n        >>> sk.plot.barh()\n        >>> sk.plot.barh(level=\"observation\")\n        >>> sk.plot.barh(title=\"Root Mean Squared Error\")\n        \"\"\"\n        df = self._get_plot_df(level)\n        self._name_to_title_in_kwargs(kwargs)\n        return df.plot.barh(**kwargs)\n\n    def grid(\n        self,\n        show_numbers: bool = True,\n        precision: int = 3,\n        fmt: str | None = None,\n        ax: Axes | None = None,\n        figsize: tuple[float, float] | None = None,\n        title: str | None = None,\n        cmap: str | Colormap | None = None,\n    ) -> Axes | None:\n        \"\"\"Plot statistic as a colored grid, optionally with values in the cells.\n\n        Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations\n\n        Parameters\n        ----------\n        show_numbers : bool, optional\n            should values of the static be shown in the cells?, by default True\n            if False, a colorbar will be displayed instead\n        precision : int, optional\n            number of decimals if show_numbers, by default 3\n        fmt : str, optional\n            format string, e.g. \".0%\" to show value as percentage\n        ax : Axes, optional\n            matplotlib axes, by default None\n        figsize : Tuple(float, float), optional\n            figure size, by default None\n        title : str, optional\n            plot title, by default name of statistic\n        cmap : str, optional\n            colormap, by default \"OrRd\" (\"coolwarm\" if bias)\n\n        Returns\n        -------\n        AxesSubplot\n\n        Examples\n        --------\n        >>> sk = cc.skill()[\"rmse\"]\n        >>> sk.plot.grid()\n        >>> sk.plot.grid(show_numbers=False, cmap=\"magma\")\n        >>> sk.plot.grid(precision=1)\n        >>> sk.plot.grid(fmt=\".0%\", title=\"Root Mean Squared Error\")\n        \"\"\"\n\n        s = self.skillarray\n        ser = s._ser\n\n        errors = _validate_multi_index(ser.index)  # type: ignore\n        if len(errors) > 0:\n            warnings.warn(\"plot_grid: \" + \"\\n\".join(errors))\n            # TODO raise error?\n            return None\n            # df = self.df[field]    TODO: at_least_2d...\n        df = ser.unstack()\n\n        vmin = None\n        vmax = None\n        if cmap is None:\n            cmap = \"OrRd\"\n            if s.name == \"bias\":\n                cmap = \"coolwarm\"\n                mm = ser.abs().max()\n                vmin = -mm\n                vmax = mm\n        if title is None:\n            title = s.name\n        xlabels = list(df.keys())\n        nx = len(xlabels)\n        ylabels = list(df.index)\n        ny = len(ylabels)\n\n        if (fmt is not None) and fmt[0] != \"{\":\n            fmt = \"{:\" + fmt + \"}\"\n\n        if figsize is None:\n            figsize = (nx, ny)\n        fig, ax = _get_fig_ax(ax, figsize)\n        assert ax is not None\n        pcm = ax.pcolormesh(df, cmap=cmap, vmin=vmin, vmax=vmax)\n        ax.set_xticks(np.arange(nx) + 0.5)\n        ax.set_xticklabels(xlabels, rotation=90)\n        ax.set_yticks(np.arange(ny) + 0.5)\n        ax.set_yticklabels(ylabels)\n        if show_numbers:\n            mean_val = df.to_numpy().mean()\n            for ii in range(ny):\n                for jj in range(nx):\n                    val = df.iloc[ii, jj].round(precision)\n                    col = \"w\" if val > mean_val else \"k\"\n                    if s.name == \"bias\":\n                        col = \"w\" if np.abs(val) > (0.7 * mm) else \"k\"\n                    if fmt is not None:\n                        val = fmt.format(val)\n                    ax.text(\n                        jj + 0.5,\n                        ii + 0.5,\n                        val,\n                        ha=\"center\",\n                        va=\"center\",\n                        # size=15,\n                        color=col,\n                    )\n        else:\n            fig.colorbar(pcm, ax=ax)\n        ax.set_title(title, fontsize=14)\n        return ax\n
"},{"location":"api/skill/#modelskill.skill.SkillArrayPlotter.bar","title":"bar","text":"
bar(level=0, **kwargs)\n

Plot statistic as bar chart using pd.DataFrame.plot.bar()

Parameters:

Name Type Description Default level int or str

level to unstack, by default 0

0 **kwargs Any

key word arguments to be pased to pd.DataFrame.plot.bar() e.g. color, title, figsize, ...

{}

Returns:

Type Description AxesSubplot

Examples:

>>> sk = cc.skill()[\"rmse\"]\n>>> sk.plot.bar()\n>>> sk.plot.bar(level=\"observation\")\n>>> sk.plot.bar(title=\"Root Mean Squared Error\")\n>>> sk.plot.bar(color=[\"red\",\"blue\"])\n
Source code in modelskill/skill.py
def bar(self, level: int | str = 0, **kwargs: Any) -> Axes:\n    \"\"\"Plot statistic as bar chart using pd.DataFrame.plot.bar()\n\n    Parameters\n    ----------\n    level : int or str, optional\n        level to unstack, by default 0\n    **kwargs\n        key word arguments to be pased to pd.DataFrame.plot.bar()\n        e.g. color, title, figsize, ...\n\n    Returns\n    -------\n    AxesSubplot\n\n    Examples\n    --------\n    >>> sk = cc.skill()[\"rmse\"]\n    >>> sk.plot.bar()\n    >>> sk.plot.bar(level=\"observation\")\n    >>> sk.plot.bar(title=\"Root Mean Squared Error\")\n    >>> sk.plot.bar(color=[\"red\",\"blue\"])\n    \"\"\"\n    df = self._get_plot_df(level=level)\n    self._name_to_title_in_kwargs(kwargs)\n    return df.plot.bar(**kwargs)\n
"},{"location":"api/skill/#modelskill.skill.SkillArrayPlotter.barh","title":"barh","text":"
barh(level=0, **kwargs)\n

Plot statistic as horizontal bar chart using pd.DataFrame.plot.barh()

Parameters:

Name Type Description Default level int or str

level to unstack, by default 0

0 **kwargs Any

key word arguments to be passed to pd.DataFrame.plot.barh() e.g. color, title, figsize, ...

{}

Returns:

Type Description AxesSubplot

Examples:

>>> sk = cc.skill()[\"rmse\"]\n>>> sk.plot.barh()\n>>> sk.plot.barh(level=\"observation\")\n>>> sk.plot.barh(title=\"Root Mean Squared Error\")\n
Source code in modelskill/skill.py
def barh(self, level: int | str = 0, **kwargs: Any) -> Axes:\n    \"\"\"Plot statistic as horizontal bar chart using pd.DataFrame.plot.barh()\n\n    Parameters\n    ----------\n    level : int or str, optional\n        level to unstack, by default 0\n    **kwargs\n        key word arguments to be passed to pd.DataFrame.plot.barh()\n        e.g. color, title, figsize, ...\n\n    Returns\n    -------\n    AxesSubplot\n\n    Examples\n    --------\n    >>> sk = cc.skill()[\"rmse\"]\n    >>> sk.plot.barh()\n    >>> sk.plot.barh(level=\"observation\")\n    >>> sk.plot.barh(title=\"Root Mean Squared Error\")\n    \"\"\"\n    df = self._get_plot_df(level)\n    self._name_to_title_in_kwargs(kwargs)\n    return df.plot.barh(**kwargs)\n
"},{"location":"api/skill/#modelskill.skill.SkillArrayPlotter.grid","title":"grid","text":"
grid(show_numbers=True, precision=3, fmt=None, ax=None, figsize=None, title=None, cmap=None)\n

Plot statistic as a colored grid, optionally with values in the cells.

Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations

Parameters:

Name Type Description Default show_numbers bool

should values of the static be shown in the cells?, by default True if False, a colorbar will be displayed instead

True precision int

number of decimals if show_numbers, by default 3

3 fmt str

format string, e.g. \".0%\" to show value as percentage

None ax Axes

matplotlib axes, by default None

None figsize Tuple(float, float)

figure size, by default None

None title str

plot title, by default name of statistic

None cmap str

colormap, by default \"OrRd\" (\"coolwarm\" if bias)

None

Returns:

Type Description AxesSubplot

Examples:

>>> sk = cc.skill()[\"rmse\"]\n>>> sk.plot.grid()\n>>> sk.plot.grid(show_numbers=False, cmap=\"magma\")\n>>> sk.plot.grid(precision=1)\n>>> sk.plot.grid(fmt=\".0%\", title=\"Root Mean Squared Error\")\n
Source code in modelskill/skill.py
def grid(\n    self,\n    show_numbers: bool = True,\n    precision: int = 3,\n    fmt: str | None = None,\n    ax: Axes | None = None,\n    figsize: tuple[float, float] | None = None,\n    title: str | None = None,\n    cmap: str | Colormap | None = None,\n) -> Axes | None:\n    \"\"\"Plot statistic as a colored grid, optionally with values in the cells.\n\n    Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations\n\n    Parameters\n    ----------\n    show_numbers : bool, optional\n        should values of the static be shown in the cells?, by default True\n        if False, a colorbar will be displayed instead\n    precision : int, optional\n        number of decimals if show_numbers, by default 3\n    fmt : str, optional\n        format string, e.g. \".0%\" to show value as percentage\n    ax : Axes, optional\n        matplotlib axes, by default None\n    figsize : Tuple(float, float), optional\n        figure size, by default None\n    title : str, optional\n        plot title, by default name of statistic\n    cmap : str, optional\n        colormap, by default \"OrRd\" (\"coolwarm\" if bias)\n\n    Returns\n    -------\n    AxesSubplot\n\n    Examples\n    --------\n    >>> sk = cc.skill()[\"rmse\"]\n    >>> sk.plot.grid()\n    >>> sk.plot.grid(show_numbers=False, cmap=\"magma\")\n    >>> sk.plot.grid(precision=1)\n    >>> sk.plot.grid(fmt=\".0%\", title=\"Root Mean Squared Error\")\n    \"\"\"\n\n    s = self.skillarray\n    ser = s._ser\n\n    errors = _validate_multi_index(ser.index)  # type: ignore\n    if len(errors) > 0:\n        warnings.warn(\"plot_grid: \" + \"\\n\".join(errors))\n        # TODO raise error?\n        return None\n        # df = self.df[field]    TODO: at_least_2d...\n    df = ser.unstack()\n\n    vmin = None\n    vmax = None\n    if cmap is None:\n        cmap = \"OrRd\"\n        if s.name == \"bias\":\n            cmap = \"coolwarm\"\n            mm = ser.abs().max()\n            vmin = -mm\n            vmax = mm\n    if title is None:\n        title = s.name\n    xlabels = list(df.keys())\n    nx = len(xlabels)\n    ylabels = list(df.index)\n    ny = len(ylabels)\n\n    if (fmt is not None) and fmt[0] != \"{\":\n        fmt = \"{:\" + fmt + \"}\"\n\n    if figsize is None:\n        figsize = (nx, ny)\n    fig, ax = _get_fig_ax(ax, figsize)\n    assert ax is not None\n    pcm = ax.pcolormesh(df, cmap=cmap, vmin=vmin, vmax=vmax)\n    ax.set_xticks(np.arange(nx) + 0.5)\n    ax.set_xticklabels(xlabels, rotation=90)\n    ax.set_yticks(np.arange(ny) + 0.5)\n    ax.set_yticklabels(ylabels)\n    if show_numbers:\n        mean_val = df.to_numpy().mean()\n        for ii in range(ny):\n            for jj in range(nx):\n                val = df.iloc[ii, jj].round(precision)\n                col = \"w\" if val > mean_val else \"k\"\n                if s.name == \"bias\":\n                    col = \"w\" if np.abs(val) > (0.7 * mm) else \"k\"\n                if fmt is not None:\n                    val = fmt.format(val)\n                ax.text(\n                    jj + 0.5,\n                    ii + 0.5,\n                    val,\n                    ha=\"center\",\n                    va=\"center\",\n                    # size=15,\n                    color=col,\n                )\n    else:\n        fig.colorbar(pcm, ax=ax)\n    ax.set_title(title, fontsize=14)\n    return ax\n
"},{"location":"api/skill/#modelskill.skill.SkillArrayPlotter.line","title":"line","text":"
line(level=0, **kwargs)\n

Plot statistic as a lines using pd.DataFrame.plot.line()

Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations

Parameters:

Name Type Description Default level int or str

level to unstack, by default 0

0 **kwargs Any

key word arguments to be pased to pd.DataFrame.plot.line() e.g. marker, title, figsize, ...

{}

Examples:

>>> sk = cc.skill()[\"rmse\"]\n>>> sk.plot.line()\n>>> sk.plot.line(marker=\"o\", linestyle=':')\n>>> sk.plot.line(color=['0.2', '0.4', '0.6'])\n
Source code in modelskill/skill.py
def line(\n    self,\n    level: int | str = 0,\n    **kwargs: Any,\n) -> Axes:\n    \"\"\"Plot statistic as a lines using pd.DataFrame.plot.line()\n\n    Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations\n\n    Parameters\n    ----------\n    level : int or str, optional\n        level to unstack, by default 0\n    **kwargs\n        key word arguments to be pased to pd.DataFrame.plot.line()\n        e.g. marker, title, figsize, ...\n\n    Examples\n    --------\n    >>> sk = cc.skill()[\"rmse\"]\n    >>> sk.plot.line()\n    >>> sk.plot.line(marker=\"o\", linestyle=':')\n    >>> sk.plot.line(color=['0.2', '0.4', '0.6'])\n    \"\"\"\n    df = self._get_plot_df(level=level)\n    self._name_to_title_in_kwargs(kwargs)\n    axes = df.plot.line(**kwargs)\n\n    xlabels = list(df.index)\n    numeric_index = all(isinstance(item, (int, float)) for item in xlabels)\n\n    if not isinstance(axes, Iterable):\n        axes = [axes]\n    for ax in axes:\n        if not isinstance(df.index, pd.DatetimeIndex):\n            if numeric_index:\n                xlabel_positions = xlabels\n            else:\n                xlabel_positions = np.arange(len(xlabels)).tolist()\n            ax.set_xticks(xlabel_positions)\n            ax.set_xticklabels(xlabels, rotation=90)\n    return axes\n
"},{"location":"api/model/","title":"Model Result","text":"

A model result can either be a simple point/track, or spatial field (e.g. 2d dfsu file) from which data can be extracted at the observation positions by spatial interpolation. The following types are available:

  • Timeseries
    • PointModelResult - a point result from a dfs0/nc file or a DataFrame
    • TrackModelResult - a track (moving point) result from a dfs0/nc file or a DataFrame
  • SpatialField (extractable)
    • GridModelResult - a spatial field from a dfs2/nc file or a Xarray Dataset
    • DfsuModelResult - a spatial field from a dfsu file

A model result can be created by explicitly invoking one of the above classes or using the model_result() function which will return the appropriate type based on the input data (if possible).

"},{"location":"api/model/dfsu/","title":"DfsuModelResult","text":""},{"location":"api/model/dfsu/#modelskill.DfsuModelResult","title":"modelskill.DfsuModelResult","text":"

Bases: SpatialField

Construct a DfsuModelResult from a dfsu file or mikeio.Dataset/DataArray.

Parameters:

Name Type Description Default data UnstructuredType

the input data or file path

required name Optional[str]

The name of the model result, by default None (will be set to file name or item name)

None item str | int | None

If multiple items/arrays are present in the input an item must be given (as either an index or a string), by default None

None quantity Quantity

Model quantity, for MIKE files this is inferred from the EUM information

None aux_items Optional[list[int | str]]

Auxiliary items, by default None

None Source code in modelskill/model/dfsu.py
class DfsuModelResult(SpatialField):\n    \"\"\"Construct a DfsuModelResult from a dfsu file or mikeio.Dataset/DataArray.\n\n    Parameters\n    ----------\n    data : types.UnstructuredType\n        the input data or file path\n    name : Optional[str], optional\n        The name of the model result,\n        by default None (will be set to file name or item name)\n    item : str | int | None, optional\n        If multiple items/arrays are present in the input an item\n        must be given (as either an index or a string), by default None\n    quantity : Quantity, optional\n        Model quantity, for MIKE files this is inferred from the EUM information\n    aux_items : Optional[list[int | str]], optional\n        Auxiliary items, by default None\n    \"\"\"\n\n    def __init__(\n        self,\n        data: UnstructuredType,\n        *,\n        name: Optional[str] = None,\n        item: str | int | None = None,\n        quantity: Optional[Quantity] = None,\n        aux_items: Optional[list[int | str]] = None,\n    ) -> None:\n        filename = None\n\n        assert isinstance(\n            data, get_args(UnstructuredType)\n        ), \"Could not construct DfsuModelResult from provided data\"\n\n        if isinstance(data, (str, Path)):\n            if Path(data).suffix != \".dfsu\":\n                raise ValueError(f\"File must be a dfsu file, not {Path(data).suffix}\")\n            name = name or Path(data).stem\n            filename = str(data)\n            data = mikeio.open(data)\n\n        elif isinstance(data, (mikeio.DataArray, mikeio.Dataset)):\n            pass\n        else:\n            raise ValueError(\n                f\"data type must be .dfsu or dfsu-Dataset/DataArray. Not {type(data)}.\"\n            )\n\n        if isinstance(data, mikeio.DataArray):\n            if item is not None:\n                raise ValueError(\"item must be None when data is a DataArray\")\n            if aux_items is not None:\n                raise ValueError(\"aux_items must be None when data is a DataArray\")\n            item_info = data.item\n            item = data.name\n            self.sel_items = SelectedItems(values=data.name, aux=[])\n            data = mikeio.Dataset({data.name: data})\n        else:\n            item_names = [i.name for i in data.items]\n            idx = _get_idx(x=item, valid_names=item_names)\n            item_info = data.items[idx]\n\n            self.sel_items = SelectedItems.parse(\n                item_names, item=item, aux_items=aux_items\n            )\n            item = self.sel_items.values\n        if isinstance(data, mikeio.Dataset):\n            data = data[self.sel_items.all]\n\n        self.data: mikeio.dfsu.Dfsu2DH | mikeio.Dataset = data\n        self.name = name or str(item)\n        self.quantity = (\n            Quantity.from_mikeio_iteminfo(item_info) if quantity is None else quantity\n        )\n        self.filename = filename  # TODO: remove? backward compatibility\n\n    def __repr__(self) -> str:\n        res = []\n        res.append(f\"<{self.__class__.__name__}>: {self.name}\")\n        res.append(f\"Time: {self.time[0]} - {self.time[-1]}\")\n        res.append(f\"Quantity: {self.quantity}\")\n        if len(self.sel_items.aux) > 0:\n            res.append(f\"Auxiliary variables: {', '.join(self.sel_items.aux)}\")\n        return \"\\n\".join(res)\n\n    @property\n    def time(self) -> pd.DatetimeIndex:\n        return pd.DatetimeIndex(self.data.time)\n\n    def _in_domain(self, x: float, y: float) -> bool:\n        return self.data.geometry.contains([x, y])  # type: ignore\n\n    def extract(\n        self, observation: Observation, spatial_method: Optional[str] = None\n    ) -> PointModelResult | TrackModelResult:\n        \"\"\"Extract ModelResult at observation positions\n\n        Note: this method is typically not called directly, but by the match() method.\n\n        Parameters\n        ----------\n        observation : <PointObservation> or <TrackObservation>\n            positions (and times) at which modelresult should be extracted\n        spatial_method : Optional[str], optional\n            spatial selection/interpolation method, 'contained' (=isel),\n            'nearest', 'inverse_distance' (with 5 nearest points),\n            by default None = 'inverse_distance'\n\n        Returns\n        -------\n        PointModelResult or TrackModelResult\n            extracted modelresult with the same geometry as the observation\n        \"\"\"\n        method = self._parse_spatial_method(spatial_method)\n\n        _validate_overlap_in_time(self.time, observation)\n        if isinstance(observation, PointObservation):\n            return self._extract_point(observation, spatial_method=method)\n        elif isinstance(observation, TrackObservation):\n            return self._extract_track(observation, spatial_method=method)\n        else:\n            raise NotImplementedError(\n                f\"Extraction from {type(self.data)} to {type(observation)} is not implemented.\"\n            )\n\n    @staticmethod\n    def _parse_spatial_method(method: str | None) -> str | None:\n        METHOD_MAP = {\n            \"isel\": \"contained\",\n            \"contained\": \"contained\",\n            \"IDW\": \"inverse_distance\",\n            \"inverse_distance\": \"inverse_distance\",\n            \"nearest\": \"nearest\",\n            None: None,\n        }\n\n        if method not in METHOD_MAP:\n            raise ValueError(\n                f\"spatial_method for Dfsu must be 'nearest', 'contained', or 'inverse_distance'. Not {method}.\"\n            )\n        else:\n            return METHOD_MAP[method]\n\n    def _extract_point(\n        self, observation: PointObservation, spatial_method: Optional[str] = None\n    ) -> PointModelResult:\n        \"\"\"Spatially extract a PointModelResult from a DfsuModelResult\n        given a PointObservation. No time interpolation is done!\n\n        Note: 'inverse_distance' method uses 5 nearest points and is the default.\n        \"\"\"\n\n        method = spatial_method or \"inverse_distance\"\n        assert method in [\"nearest\", \"contained\", \"inverse_distance\"]\n        n_nearest = (\n            min(5, self.data.geometry.n_elements) if method == \"inverse_distance\" else 1\n        )\n\n        x, y, z = observation.x, observation.y, observation.z\n        if not self._in_domain(x, y):\n            raise ValueError(\n                f\"PointObservation '{observation.name}' ({x}, {y}) outside model domain!\"\n            )\n\n        if method == \"contained\":\n            signature = inspect.signature(self.data.geometry.find_index)\n            if \"z\" in signature.parameters and z is not None:\n                elemids = self.data.geometry.find_index(x=x, y=y, z=z)\n            else:\n                elemids = self.data.geometry.find_index(x=x, y=y)\n            if isinstance(self.data, mikeio.Dataset):\n                ds_model = self.data.isel(element=elemids)\n            else:  # Dfsu\n                ds_model = self.data.read(elements=elemids, items=self.sel_items.all)\n        else:\n            if z is not None:\n                raise NotImplementedError(\n                    \"Interpolation in 3d files is not supported, use spatial_method='contained' instead\"\n                )\n            if isinstance(self.data, mikeio.dfsu.Dfsu2DH):\n                elemids = self.data.geometry.find_nearest_elements(\n                    x, y, n_nearest=n_nearest\n                )\n                # sort elemids, to ensure consistent results with all versions of mikeio\n                if isinstance(elemids, np.ndarray):\n                    elemids = np.sort(elemids)\n\n                ds = self.data.read(elements=elemids, items=self.sel_items.all)\n                ds_model = (\n                    ds.interp(x=x, y=y, n_nearest=n_nearest) if n_nearest > 1 else ds\n                )\n            elif isinstance(self.data, mikeio.Dataset):\n                ds_model = self.data.interp(x=x, y=y, n_nearest=n_nearest)\n\n        assert isinstance(ds_model, mikeio.Dataset)\n\n        # TODO not sure why we rename here\n        assert self.name is not None\n        ds_model.rename({ds_model.items[0].name: self.name}, inplace=True)\n\n        return PointModelResult(\n            data=ds_model,\n            item=self.name,\n            x=ds_model.geometry.x,\n            y=ds_model.geometry.y,\n            name=self.name,\n            quantity=self.quantity,\n            aux_items=self.sel_items.aux,\n        )\n\n    def _extract_track(\n        self, observation: TrackObservation, spatial_method: Optional[str] = None\n    ) -> TrackModelResult:\n        \"\"\"Extract a TrackModelResult from a DfsuModelResult (when data is a Dfsu object),\n        given a TrackObservation.\n\n        Wraps MIKEIO's extract_track method (which has the default method='nearest').\n\n        MIKE IO's extract_track, inverse_distance method, uses 5 nearest points.\n        \"\"\"\n        method = spatial_method or \"inverse_distance\"\n        if method == \"contained\":\n            raise NotImplementedError(\n                \"spatial method 'contained' (=isel) not implemented for track extraction in MIKE IO\"\n            )\n        assert method in [\"nearest\", \"inverse_distance\"]\n\n        assert isinstance(\n            self.data, (mikeio.dfsu.Dfsu2DH, mikeio.DataArray, mikeio.Dataset)\n        )\n\n        track = observation.data.to_dataframe()\n\n        if isinstance(self.data, mikeio.DataArray):\n            ds_model = self.data.extract_track(track=track, method=method)\n            ds_model.rename({self.data.name: self.name}, inplace=True)\n            aux_items = None\n        else:\n            if isinstance(self.data, mikeio.dfsu.Dfsu2DH):\n                ds_model = self.data.extract_track(\n                    track=track, items=self.sel_items.all, method=method\n                )\n            elif isinstance(self.data, mikeio.Dataset):\n                ds_model = self.data[self.sel_items.all].extract_track(\n                    track=track, method=method\n                )\n            ds_model.rename({self.sel_items.values: self.name}, inplace=True)\n            aux_items = self.sel_items.aux\n\n        item_names = [i.name for i in ds_model.items]\n        x_name = \"Longitude\" if \"Longitude\" in item_names else \"x\"\n        y_name = \"Latitude\" if \"Latitude\" in item_names else \"y\"\n\n        return TrackModelResult(\n            data=ds_model.dropna(),  # TODO: not on aux cols\n            item=self.name,\n            x_item=x_name,\n            y_item=y_name,\n            name=self.name,\n            quantity=self.quantity,\n            aux_items=aux_items,\n        )\n
"},{"location":"api/model/dfsu/#modelskill.DfsuModelResult.extract","title":"extract","text":"
extract(observation, spatial_method=None)\n

Extract ModelResult at observation positions

Note: this method is typically not called directly, but by the match() method.

Parameters:

Name Type Description Default observation <PointObservation> or <TrackObservation>

positions (and times) at which modelresult should be extracted

required spatial_method Optional[str]

spatial selection/interpolation method, 'contained' (=isel), 'nearest', 'inverse_distance' (with 5 nearest points), by default None = 'inverse_distance'

None

Returns:

Type Description PointModelResult or TrackModelResult

extracted modelresult with the same geometry as the observation

Source code in modelskill/model/dfsu.py
def extract(\n    self, observation: Observation, spatial_method: Optional[str] = None\n) -> PointModelResult | TrackModelResult:\n    \"\"\"Extract ModelResult at observation positions\n\n    Note: this method is typically not called directly, but by the match() method.\n\n    Parameters\n    ----------\n    observation : <PointObservation> or <TrackObservation>\n        positions (and times) at which modelresult should be extracted\n    spatial_method : Optional[str], optional\n        spatial selection/interpolation method, 'contained' (=isel),\n        'nearest', 'inverse_distance' (with 5 nearest points),\n        by default None = 'inverse_distance'\n\n    Returns\n    -------\n    PointModelResult or TrackModelResult\n        extracted modelresult with the same geometry as the observation\n    \"\"\"\n    method = self._parse_spatial_method(spatial_method)\n\n    _validate_overlap_in_time(self.time, observation)\n    if isinstance(observation, PointObservation):\n        return self._extract_point(observation, spatial_method=method)\n    elif isinstance(observation, TrackObservation):\n        return self._extract_track(observation, spatial_method=method)\n    else:\n        raise NotImplementedError(\n            f\"Extraction from {type(self.data)} to {type(observation)} is not implemented.\"\n        )\n
"},{"location":"api/model/dummy/","title":"DummyModelResult","text":""},{"location":"api/model/dummy/#modelskill.DummyModelResult","title":"modelskill.DummyModelResult dataclass","text":"Source code in modelskill/model/dummy.py
@dataclass\nclass DummyModelResult:\n    name: str = \"dummy\"\n    data: float | None = None\n    strategy: Literal[\"mean\", \"constant\"] = \"constant\"\n    \"\"\"Dummy model result that always returns the same value.\n\n    Similar in spirit to <https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyRegressor.html>\n\n    Parameters\n    ----------\n    data : float, optional\n        The value to return if strategy is 'constant', by default None\n    name : str, optional\n        The name of the model result, by default 'dummy'\n    strategy : str, optional\n        The strategy to use, 'mean' uses the mean of the observation, 'constant' uses the value given in data, by default 'constant'\n\n    Examples\n    --------\n    >>> import pandas as pd\n    >>> import modelskill as ms\n    >>> obs = ms.PointObservation(pd.DataFrame([0.0, 1.0], index=pd.date_range(\"2000\", freq=\"H\", periods=2)), name=\"foo\")\n    >>> mr = ms.DummyModelResult(strategy='mean')\n    >>> pmr = mr.extract(obs)\n    >>> pmr.to_dataframe()\n                        dummy\n    time\n    2000-01-01 00:00:00    0.5\n    2000-01-01 01:00:00    0.5\n    \"\"\"\n\n    def __post_init__(self):\n        if self.strategy == \"constant\" and self.data is None:\n            raise ValueError(\"data must be given when strategy is 'constant'\")\n\n    def extract(\n        self,\n        observation: PointObservation | TrackObservation,\n        spatial_method: Optional[str] = None,\n    ) -> PointModelResult | TrackModelResult:\n        if spatial_method is not None:\n            raise NotImplementedError(\n                \"spatial interpolation not possible when matching point model results with point observations\"\n            )\n\n        da = observation.data[observation.name].copy()\n        if self.strategy == \"mean\":\n            da[:] = da.mean()\n        else:\n            da[:] = self.data\n\n        if isinstance(observation, PointObservation):\n            return PointModelResult(\n                data=da, x=observation.x, y=observation.y, name=self.name\n            )\n\n        elif isinstance(observation, TrackObservation):\n            data = pd.DataFrame(\n                {\n                    \"x\": observation.x,\n                    \"y\": observation.y,\n                    \"value\": da.values,\n                },\n                index=da.time,\n            )\n            return TrackModelResult(data=data, name=self.name)\n        else:\n            raise ValueError(\n                f\"observation must be a PointObservation or TrackObservation not {type(observation)}\"\n            )\n
"},{"location":"api/model/dummy/#modelskill.DummyModelResult.strategy","title":"strategy class-attribute instance-attribute","text":"
strategy = 'constant'\n

Dummy model result that always returns the same value.

Similar in spirit to https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyRegressor.html

Parameters:

Name Type Description Default data float

The value to return if strategy is 'constant', by default None

required name str

The name of the model result, by default 'dummy'

required strategy str

The strategy to use, 'mean' uses the mean of the observation, 'constant' uses the value given in data, by default 'constant'

required

Examples:

>>> import pandas as pd\n>>> import modelskill as ms\n>>> obs = ms.PointObservation(pd.DataFrame([0.0, 1.0], index=pd.date_range(\"2000\", freq=\"H\", periods=2)), name=\"foo\")\n>>> mr = ms.DummyModelResult(strategy='mean')\n>>> pmr = mr.extract(obs)\n>>> pmr.to_dataframe()\n                    dummy\ntime\n2000-01-01 00:00:00    0.5\n2000-01-01 01:00:00    0.5\n
"},{"location":"api/model/grid/","title":"GridModelResult","text":""},{"location":"api/model/grid/#modelskill.GridModelResult","title":"modelskill.GridModelResult","text":"

Bases: SpatialField

Construct a GridModelResult from a file or xarray.Dataset.

Parameters:

Name Type Description Default data GridType

the input data or file path

required name str

The name of the model result, by default None (will be set to file name or item name)

None item str or int

If multiple items/arrays are present in the input an item must be given (as either an index or a string), by default None

None quantity Quantity

Model quantity, for MIKE files this is inferred from the EUM information

None aux_items Optional[list[int | str]]

Auxiliary items, by default None

None Source code in modelskill/model/grid.py
class GridModelResult(SpatialField):\n    \"\"\"Construct a GridModelResult from a file or xarray.Dataset.\n\n    Parameters\n    ----------\n    data : types.GridType\n        the input data or file path\n    name : str, optional\n        The name of the model result,\n        by default None (will be set to file name or item name)\n    item : str or int, optional\n        If multiple items/arrays are present in the input an item\n        must be given (as either an index or a string), by default None\n    quantity : Quantity, optional\n        Model quantity, for MIKE files this is inferred from the EUM information\n    aux_items : Optional[list[int | str]], optional\n        Auxiliary items, by default None\n    \"\"\"\n\n    def __init__(\n        self,\n        data: GridType,\n        *,\n        name: Optional[str] = None,\n        item: str | int | None = None,\n        quantity: Optional[Quantity] = None,\n        aux_items: Optional[list[int | str]] = None,\n    ) -> None:\n        assert isinstance(\n            data, get_args(GridType)\n        ), \"Could not construct GridModelResult from provided data.\"\n\n        if isinstance(data, (str, Path)):\n            if \"*\" in str(data):\n                ds = xr.open_mfdataset(data)\n            else:\n                assert Path(data).exists(), f\"{data}: File does not exist.\"\n                ds = xr.open_dataset(data)\n\n        elif isinstance(data, Sequence) and all(\n            isinstance(file, (str, Path)) for file in data\n        ):\n            ds = xr.open_mfdataset(data)\n\n        elif isinstance(data, xr.DataArray):\n            if item is not None:\n                raise ValueError(f\"item must be None when data is a {type(data)}\")\n            if aux_items is not None:\n                raise ValueError(f\"aux_items must be None when data is a {type(data)}\")\n            if data.ndim < 2:\n                raise ValueError(f\"DataArray must at least 2D. Got {list(data.dims)}.\")\n            ds = data.to_dataset(name=name, promote_attrs=True)\n        elif isinstance(data, xr.Dataset):\n            assert len(data.coords) >= 2, \"Dataset must have at least 2 dimensions.\"\n            ds = data\n        else:\n            raise NotImplementedError(\n                f\"Could not construct GridModelResult from {type(data)}\"\n            )\n\n        sel_items = SelectedItems.parse(\n            list(ds.data_vars), item=item, aux_items=aux_items\n        )\n        name = name or sel_items.values\n        ds = rename_coords_xr(ds)\n\n        self.data: xr.Dataset = ds[sel_items.all]\n        self.name = name\n        self.sel_items = sel_items\n\n        # use long_name and units from data if not provided\n        if quantity is None:\n            da = self.data[sel_items.values]\n            quantity = Quantity.from_cf_attrs(da.attrs)\n\n        self.quantity = quantity\n\n    def __repr__(self) -> str:\n        res = []\n        res.append(f\"<{self.__class__.__name__}>: {self.name}\")\n        res.append(f\"Time: {self.time[0]} - {self.time[-1]}\")\n        res.append(f\"Quantity: {self.quantity}\")\n        if len(self.sel_items.aux) > 0:\n            res.append(f\"Auxiliary variables: {', '.join(self.sel_items.aux)}\")\n        return \"\\n\".join(res)\n\n    @property\n    def time(self) -> pd.DatetimeIndex:\n        return pd.DatetimeIndex(self.data.time)\n\n    def _in_domain(self, x: float, y: float) -> bool:\n        assert hasattr(self.data, \"x\") and hasattr(\n            self.data, \"y\"\n        ), \"Data has no x and/or y coordinates.\"\n        xmin = float(self.data.x.values.min())\n        xmax = float(self.data.x.values.max())\n        ymin = float(self.data.y.values.min())\n        ymax = float(self.data.y.values.max())\n        return (x >= xmin) & (x <= xmax) & (y >= ymin) & (y <= ymax)\n\n    def extract(\n        self,\n        observation: PointObservation | TrackObservation,\n        spatial_method: Optional[str] = None,\n    ) -> PointModelResult | TrackModelResult:\n        \"\"\"Extract ModelResult at observation positions\n\n        Note: this method is typically not called directly, but through the match() method.\n\n        Parameters\n        ----------\n        observation : <PointObservation> or <TrackObservation>\n            positions (and times) at which modelresult should be extracted\n        spatial_method : Optional[str], optional\n            method in xarray.Dataset.interp, typically either \"nearest\" or\n            \"linear\", by default None = 'linear'\n\n        Returns\n        -------\n        PointModelResult or TrackModelResult\n            extracted modelresult\n        \"\"\"\n        _validate_overlap_in_time(self.time, observation)\n        if isinstance(observation, PointObservation):\n            return self._extract_point(observation, spatial_method)\n        elif isinstance(observation, TrackObservation):\n            return self._extract_track(observation, spatial_method)\n        else:\n            raise NotImplementedError(\n                f\"Extraction from {type(self.data)} to {type(observation)} is not implemented.\"\n            )\n\n    def _extract_point(\n        self, observation: PointObservation, spatial_method: Optional[str] = None\n    ) -> PointModelResult:\n        \"\"\"Spatially extract a PointModelResult from a GridModelResult (when data is a xarray.Dataset),\n        given a PointObservation. No time interpolation is done!\"\"\"\n        method: str = spatial_method or \"linear\"\n\n        x, y, z = observation.x, observation.y, observation.z\n        if (x is None) or (y is None):\n            raise ValueError(\n                f\"PointObservation '{observation.name}' cannot be used for extraction \"\n                + f\"because it has None position x={x}, y={y}. Please provide position \"\n                + \"when creating PointObservation.\"\n            )\n        if not self._in_domain(x, y):\n            raise ValueError(\n                f\"PointObservation '{observation.name}' ({x}, {y}) is outside model domain!\"\n            )\n\n        assert isinstance(self.data, xr.Dataset)\n\n        # TODO: avoid runtrip to pandas if possible (potential loss of metadata)\n        if \"z\" in self.data.dims and z is not None:\n            ds = self.data.interp(\n                coords=dict(x=float(x), y=float(y), z=float(z)),\n                method=method,  # type: ignore\n            )\n        else:\n            ds = self.data.interp(coords=dict(x=float(x), y=float(y)), method=method)  # type: ignore\n        # TODO: exclude aux cols in dropna\n        df = ds.to_dataframe().drop(columns=[\"x\", \"y\"]).dropna()\n        if len(df) == 0:\n            raise ValueError(\n                f\"Spatial point extraction failed for PointObservation '{observation.name}' in GridModelResult '{self.name}'! (is point outside model domain? Consider spatial_method='nearest')\"\n            )\n        df = df.rename(columns={self.sel_items.values: self.name})\n\n        return PointModelResult(\n            data=df,\n            x=ds.x.item(),\n            y=ds.y.item(),\n            item=self.name,\n            name=self.name,\n            quantity=self.quantity,\n            aux_items=self.sel_items.aux,\n        )\n\n    def _extract_track(\n        self, observation: TrackObservation, spatial_method: Optional[str] = None\n    ) -> TrackModelResult:\n        \"\"\"Extract a TrackModelResult from a GridModelResult (when data is a xarray.Dataset),\n        given a TrackObservation.\"\"\"\n        method: str = spatial_method or \"linear\"\n\n        obs_df = observation.data.to_dataframe()\n\n        renamed_obs_data = rename_coords_pd(obs_df)\n        t = xr.DataArray(renamed_obs_data.index, dims=\"track\")\n        x = xr.DataArray(renamed_obs_data.x, dims=\"track\")\n        y = xr.DataArray(renamed_obs_data.y, dims=\"track\")\n\n        assert isinstance(self.data, xr.Dataset)\n        ds = self.data.interp(\n            coords=dict(time=t, x=x, y=y),\n            method=method,  # type: ignore\n        )\n        df = ds.to_dataframe().drop(columns=[\"time\"])\n        df = df.rename(columns={self.sel_items.values: self.name})\n\n        return TrackModelResult(\n            data=df.dropna(),  # TODO: exclude aux cols in dropna\n            item=self.name,\n            x_item=\"x\",\n            y_item=\"y\",\n            name=self.name,\n            quantity=self.quantity,\n            aux_items=self.sel_items.aux,\n        )\n
"},{"location":"api/model/grid/#modelskill.GridModelResult.extract","title":"extract","text":"
extract(observation, spatial_method=None)\n

Extract ModelResult at observation positions

Note: this method is typically not called directly, but through the match() method.

Parameters:

Name Type Description Default observation <PointObservation> or <TrackObservation>

positions (and times) at which modelresult should be extracted

required spatial_method Optional[str]

method in xarray.Dataset.interp, typically either \"nearest\" or \"linear\", by default None = 'linear'

None

Returns:

Type Description PointModelResult or TrackModelResult

extracted modelresult

Source code in modelskill/model/grid.py
def extract(\n    self,\n    observation: PointObservation | TrackObservation,\n    spatial_method: Optional[str] = None,\n) -> PointModelResult | TrackModelResult:\n    \"\"\"Extract ModelResult at observation positions\n\n    Note: this method is typically not called directly, but through the match() method.\n\n    Parameters\n    ----------\n    observation : <PointObservation> or <TrackObservation>\n        positions (and times) at which modelresult should be extracted\n    spatial_method : Optional[str], optional\n        method in xarray.Dataset.interp, typically either \"nearest\" or\n        \"linear\", by default None = 'linear'\n\n    Returns\n    -------\n    PointModelResult or TrackModelResult\n        extracted modelresult\n    \"\"\"\n    _validate_overlap_in_time(self.time, observation)\n    if isinstance(observation, PointObservation):\n        return self._extract_point(observation, spatial_method)\n    elif isinstance(observation, TrackObservation):\n        return self._extract_track(observation, spatial_method)\n    else:\n        raise NotImplementedError(\n            f\"Extraction from {type(self.data)} to {type(observation)} is not implemented.\"\n        )\n
"},{"location":"api/model/model_result/","title":"model_result()","text":""},{"location":"api/model/model_result/#modelskill.model_result","title":"modelskill.model_result","text":"
model_result(data, *, aux_items=None, gtype=None, **kwargs)\n

A factory function for creating an appropriate object based on the data input.

Parameters:

Name Type Description Default data DataInputType

The data to be used for creating the ModelResult object.

required aux_items Optional[list[int | str]]

Auxiliary items, by default None

None gtype Optional[Literal['point', 'track', 'unstructured', 'grid']]

The geometry type of the data. If not specified, it will be guessed from the data.

None **kwargs Any

Additional keyword arguments to be passed to the ModelResult constructor.

{}

Examples:

>>> import modelskill as ms\n>>> ms.model_result(\"Oresund2D.dfsu\", item=0)\n<DfsuModelResult> 'Oresund2D'\n>>> ms.model_result(\"ERA5_DutchCoast.nc\", item=\"swh\", name=\"ERA5\")\n<GridModelResult> 'ERA5'\n
Source code in modelskill/model/factory.py
def model_result(\n    data: DataInputType,\n    *,\n    aux_items: Optional[list[int | str]] = None,\n    gtype: Optional[Literal[\"point\", \"track\", \"unstructured\", \"grid\"]] = None,\n    **kwargs: Any,\n) -> Any:\n    \"\"\"A factory function for creating an appropriate object based on the data input.\n\n    Parameters\n    ----------\n    data : DataInputType\n        The data to be used for creating the ModelResult object.\n    aux_items : Optional[list[int | str]]\n        Auxiliary items, by default None\n    gtype : Optional[Literal[\"point\", \"track\", \"unstructured\", \"grid\"]]\n        The geometry type of the data. If not specified, it will be guessed from the data.\n    **kwargs\n        Additional keyword arguments to be passed to the ModelResult constructor.\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> ms.model_result(\"Oresund2D.dfsu\", item=0)\n    <DfsuModelResult> 'Oresund2D'\n    >>> ms.model_result(\"ERA5_DutchCoast.nc\", item=\"swh\", name=\"ERA5\")\n    <GridModelResult> 'ERA5'\n    \"\"\"\n    if gtype is None:\n        geometry = _guess_gtype(data)\n    else:\n        geometry = GeometryType.from_string(gtype)\n\n    return _modelresult_lookup[geometry](\n        data=data,\n        aux_items=aux_items,\n        **kwargs,\n    )\n
"},{"location":"api/model/point/","title":"PointModelResult","text":""},{"location":"api/model/point/#modelskill.PointModelResult","title":"modelskill.PointModelResult","text":"

Bases: TimeSeries, Alignable

Construct a PointModelResult from a 0d data source: dfs0 file, mikeio.Dataset/DataArray, pandas.DataFrame/Series or xarray.Dataset/DataArray

Parameters:

Name Type Description Default data (str, Path, Dataset, DataArray, DataFrame, Series, Dataset or DataArray)

filename (.dfs0 or .nc) or object with the data

required name Optional[str]

The name of the model result, by default None (will be set to file name or item name)

None x float

first coordinate of point position, inferred from data if not given, else None

None y float

second coordinate of point position, inferred from data if not given, else None

None z float

third coordinate of point position, inferred from data if not given, else None

None item str | int | None

If multiple items/arrays are present in the input an item must be given (as either an index or a string), by default None

None quantity Quantity

Model quantity, for MIKE files this is inferred from the EUM information

None aux_items Optional[list[int | str]]

Auxiliary items, by default None

None Source code in modelskill/model/point.py
class PointModelResult(TimeSeries, Alignable):\n    \"\"\"Construct a PointModelResult from a 0d data source:\n    dfs0 file, mikeio.Dataset/DataArray, pandas.DataFrame/Series\n    or xarray.Dataset/DataArray\n\n    Parameters\n    ----------\n    data : str, Path, mikeio.Dataset, mikeio.DataArray, pd.DataFrame, pd.Series, xr.Dataset or xr.DataArray\n        filename (.dfs0 or .nc) or object with the data\n    name : Optional[str], optional\n        The name of the model result,\n        by default None (will be set to file name or item name)\n    x : float, optional\n        first coordinate of point position, inferred from data if not given, else None\n    y : float, optional\n        second coordinate of point position, inferred from data if not given, else None\n    z : float, optional\n        third coordinate of point position, inferred from data if not given, else None\n    item : str | int | None, optional\n        If multiple items/arrays are present in the input an item\n        must be given (as either an index or a string), by default None\n    quantity : Quantity, optional\n        Model quantity, for MIKE files this is inferred from the EUM information\n    aux_items : Optional[list[int | str]], optional\n        Auxiliary items, by default None\n    \"\"\"\n\n    def __init__(\n        self,\n        data: PointType,\n        *,\n        name: Optional[str] = None,\n        x: Optional[float] = None,\n        y: Optional[float] = None,\n        z: Optional[float] = None,\n        item: str | int | None = None,\n        quantity: Optional[Quantity] = None,\n        aux_items: Optional[Sequence[int | str]] = None,\n    ) -> None:\n        if not self._is_input_validated(data):\n            data = _parse_point_input(\n                data,\n                name=name,\n                item=item,\n                quantity=quantity,\n                aux_items=aux_items,\n                x=x,\n                y=y,\n                z=z,\n            )\n\n        assert isinstance(data, xr.Dataset)\n\n        data_var = str(list(data.data_vars)[0])\n        data[data_var].attrs[\"kind\"] = \"model\"\n        super().__init__(data=data)\n\n    def extract(\n        self, obs: PointObservation, spatial_method: Optional[str] = None\n    ) -> PointModelResult:\n        if not isinstance(obs, PointObservation):\n            raise ValueError(f\"obs must be a PointObservation not {type(obs)}\")\n        if spatial_method is not None:\n            raise NotImplementedError(\n                \"spatial interpolation not possible when matching point model results with point observations\"\n            )\n        return self\n\n    def interp_time(self, observation: Observation, **kwargs: Any) -> PointModelResult:\n        \"\"\"\n        Interpolate model result to the time of the observation\n\n        wrapper around xarray.Dataset.interp()\n\n        Parameters\n        ----------\n        observation : Observation\n            The observation to interpolate to\n        **kwargs\n\n            Additional keyword arguments passed to xarray.interp\n\n        Returns\n        -------\n        PointModelResult\n            Interpolated model result\n        \"\"\"\n        ds = self.align(observation, **kwargs)\n        return PointModelResult(ds)\n\n    def align(\n        self,\n        observation: Observation,\n        *,\n        max_gap: float | None = None,\n        **kwargs: Any,\n    ) -> xr.Dataset:\n        new_time = observation.time\n\n        dati = self.data.dropna(\"time\").interp(\n            time=new_time, assume_sorted=True, **kwargs\n        )\n\n        pmr = PointModelResult(dati)\n        if max_gap is not None:\n            pmr = pmr._remove_model_gaps(mod_index=self.time, max_gap=max_gap)\n        return pmr.data\n\n    def _remove_model_gaps(\n        self,\n        mod_index: pd.DatetimeIndex,\n        max_gap: float | None = None,\n    ) -> PointModelResult:\n        \"\"\"Remove model gaps longer than max_gap from TimeSeries\"\"\"\n        max_gap_delta = pd.Timedelta(max_gap, \"s\")\n        valid_times = self._get_valid_times(mod_index, max_gap_delta)\n        ds = self.data.sel(time=valid_times)\n        return PointModelResult(ds)\n\n    def _get_valid_times(\n        self, mod_index: pd.DatetimeIndex, max_gap: pd.Timedelta\n    ) -> pd.DatetimeIndex:\n        \"\"\"Used only by _remove_model_gaps\"\"\"\n        obs_index = self.time\n        # init dataframe of available timesteps and their index\n        df = pd.DataFrame(index=mod_index)\n        df[\"idx\"] = range(len(df))\n\n        # for query times get available left and right index of source times\n        df = (\n            df.reindex(df.index.union(obs_index))\n            .interpolate(method=\"time\", limit_area=\"inside\")\n            .reindex(obs_index)\n            .dropna()\n        )\n        df[\"idxa\"] = np.floor(df.idx).astype(int)\n        df[\"idxb\"] = np.ceil(df.idx).astype(int)\n\n        # time of left and right source times and time delta\n        df[\"ta\"] = mod_index[df.idxa]\n        df[\"tb\"] = mod_index[df.idxb]\n        df[\"dt\"] = df.tb - df.ta\n\n        # valid query times where time delta is less than max_gap\n        valid_idx = df.dt <= max_gap\n        return df[valid_idx].index\n
"},{"location":"api/model/point/#modelskill.PointModelResult.gtype","title":"gtype property","text":"
gtype\n

Geometry type

"},{"location":"api/model/point/#modelskill.PointModelResult.n_points","title":"n_points property","text":"
n_points\n

Number of data points

"},{"location":"api/model/point/#modelskill.PointModelResult.name","title":"name property writable","text":"
name\n

Name of time series (value item name)

"},{"location":"api/model/point/#modelskill.PointModelResult.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerPlotter

Examples:

>>> obj.plot.timeseries()\n>>> obj.plot.hist()\n
"},{"location":"api/model/point/#modelskill.PointModelResult.quantity","title":"quantity property writable","text":"
quantity\n

Quantity of time series

"},{"location":"api/model/point/#modelskill.PointModelResult.time","title":"time property","text":"
time\n

Time index

"},{"location":"api/model/point/#modelskill.PointModelResult.values","title":"values property","text":"
values\n

Values as numpy array

"},{"location":"api/model/point/#modelskill.PointModelResult.x","title":"x property writable","text":"
x\n

x-coordinate

"},{"location":"api/model/point/#modelskill.PointModelResult.y","title":"y property writable","text":"
y\n

y-coordinate

"},{"location":"api/model/point/#modelskill.PointModelResult.equals","title":"equals","text":"
equals(other)\n

Check if two TimeSeries are equal

Source code in modelskill/timeseries/_timeseries.py
def equals(self, other: TimeSeries) -> bool:\n    \"\"\"Check if two TimeSeries are equal\"\"\"\n    return self.data.equals(other.data)\n
"},{"location":"api/model/point/#modelskill.PointModelResult.interp_time","title":"interp_time","text":"
interp_time(observation, **kwargs)\n

Interpolate model result to the time of the observation

wrapper around xarray.Dataset.interp()

Parameters:

Name Type Description Default observation Observation

The observation to interpolate to

required **kwargs Any

Additional keyword arguments passed to xarray.interp

{}

Returns:

Type Description PointModelResult

Interpolated model result

Source code in modelskill/model/point.py
def interp_time(self, observation: Observation, **kwargs: Any) -> PointModelResult:\n    \"\"\"\n    Interpolate model result to the time of the observation\n\n    wrapper around xarray.Dataset.interp()\n\n    Parameters\n    ----------\n    observation : Observation\n        The observation to interpolate to\n    **kwargs\n\n        Additional keyword arguments passed to xarray.interp\n\n    Returns\n    -------\n    PointModelResult\n        Interpolated model result\n    \"\"\"\n    ds = self.align(observation, **kwargs)\n    return PointModelResult(ds)\n
"},{"location":"api/model/point/#modelskill.PointModelResult.sel","title":"sel","text":"
sel(**kwargs)\n

Select data by label

Source code in modelskill/timeseries/_timeseries.py
def sel(self: T, **kwargs: Any) -> T:\n    \"\"\"Select data by label\"\"\"\n    return self.__class__(self.data.sel(**kwargs))\n
"},{"location":"api/model/point/#modelskill.PointModelResult.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/timeseries/_timeseries.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert matched data to pandas DataFrame\n\n    Include x, y coordinates only if gtype=track\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    if self.gtype == str(GeometryType.POINT):\n        # we remove the scalar coordinate variables as they\n        # will otherwise be columns in the dataframe\n        return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n    elif self.gtype == str(GeometryType.TRACK):\n        df = self.data.drop_vars([\"z\"]).to_dataframe()\n        # make sure that x, y cols are first\n        cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n        return df[cols]\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n
"},{"location":"api/model/point/#modelskill.PointModelResult.trim","title":"trim","text":"
trim(start_time=None, end_time=None, buffer='1s')\n

Trim observation data to a given time interval

Parameters:

Name Type Description Default start_time Timestamp

start time

None end_time Timestamp

end time

None buffer str

buffer time around start and end time, by default \"1s\"

'1s' Source code in modelskill/timeseries/_timeseries.py
def trim(\n    self: T,\n    start_time: Optional[pd.Timestamp] = None,\n    end_time: Optional[pd.Timestamp] = None,\n    buffer: str = \"1s\",\n) -> T:\n    \"\"\"Trim observation data to a given time interval\n\n    Parameters\n    ----------\n    start_time : pd.Timestamp\n        start time\n    end_time : pd.Timestamp\n        end time\n    buffer : str, optional\n        buffer time around start and end time, by default \"1s\"\n    \"\"\"\n    # Expand time interval with buffer\n    start_time = pd.Timestamp(start_time) - pd.Timedelta(buffer)\n    end_time = pd.Timestamp(end_time) + pd.Timedelta(buffer)\n\n    data = self.data.sel(time=slice(start_time, end_time))\n    if len(data.time) == 0:\n        raise ValueError(\n            f\"No data left after trimming to {start_time} - {end_time}\"\n        )\n    return self.__class__(data)\n
"},{"location":"api/model/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","title":"modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","text":"

Bases: TimeSeriesPlotter

Source code in modelskill/timeseries/_plotter.py
class MatplotlibTimeSeriesPlotter(TimeSeriesPlotter):\n    def __init__(self, ts) -> None:\n        self._ts = ts\n\n    def __call__(self, **kwargs):\n        # default to timeseries plot\n        self.timeseries(**kwargs)\n\n    def timeseries(\n        self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n    ):\n        \"\"\"Plot timeseries\n\n        Wraps pandas.DataFrame plot() method.\n\n        Parameters\n        ----------\n        title : str, optional\n            plot title, default: [name]\n        color : str, optional\n            plot color, by default '#d62728'\n        marker : str, optional\n            plot marker, by default '.'\n        linestyle : str, optional\n            line style, by default None\n        **kwargs\n            other keyword arguments to df.plot()\n        \"\"\"\n        kwargs[\"color\"] = self._ts._color if color is None else color\n        ax = self._ts._values_as_series.plot(\n            marker=marker, linestyle=linestyle, **kwargs\n        )\n\n        title = self._ts.name if title is None else title\n        ax.set_title(title)\n\n        ax.set_ylabel(str(self._ts.quantity))\n        return ax\n\n    def hist(self, bins=100, title=None, color=None, **kwargs):\n        \"\"\"Plot histogram of timeseries values\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            specification of bins, by default 100\n        title : str, optional\n            plot title, default: observation name\n        color : str, optional\n            plot color, by default \"#d62728\"\n        **kwargs\n            other keyword arguments to df.hist()\n\n        Returns\n        -------\n        matplotlib axes\n        \"\"\"\n        title = self._ts.name if title is None else title\n\n        kwargs[\"color\"] = self._ts._color if color is None else color\n\n        ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n        ax.set_title(title)\n        ax.set_xlabel(str(self._ts.quantity))\n        return ax\n
"},{"location":"api/model/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.hist","title":"hist","text":"
hist(bins=100, title=None, color=None, **kwargs)\n

Plot histogram of timeseries values

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

specification of bins, by default 100

100 title str

plot title, default: observation name

None color str

plot color, by default \"#d62728\"

None **kwargs

other keyword arguments to df.hist()

{}

Returns:

Type Description matplotlib axes Source code in modelskill/timeseries/_plotter.py
def hist(self, bins=100, title=None, color=None, **kwargs):\n    \"\"\"Plot histogram of timeseries values\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        specification of bins, by default 100\n    title : str, optional\n        plot title, default: observation name\n    color : str, optional\n        plot color, by default \"#d62728\"\n    **kwargs\n        other keyword arguments to df.hist()\n\n    Returns\n    -------\n    matplotlib axes\n    \"\"\"\n    title = self._ts.name if title is None else title\n\n    kwargs[\"color\"] = self._ts._color if color is None else color\n\n    ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n    ax.set_title(title)\n    ax.set_xlabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/model/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.timeseries","title":"timeseries","text":"
timeseries(title=None, color=None, marker='.', linestyle='None', **kwargs)\n

Plot timeseries

Wraps pandas.DataFrame plot() method.

Parameters:

Name Type Description Default title str

plot title, default: [name]

None color str

plot color, by default '#d62728'

None marker str

plot marker, by default '.'

'.' linestyle str

line style, by default None

'None' **kwargs

other keyword arguments to df.plot()

{} Source code in modelskill/timeseries/_plotter.py
def timeseries(\n    self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n):\n    \"\"\"Plot timeseries\n\n    Wraps pandas.DataFrame plot() method.\n\n    Parameters\n    ----------\n    title : str, optional\n        plot title, default: [name]\n    color : str, optional\n        plot color, by default '#d62728'\n    marker : str, optional\n        plot marker, by default '.'\n    linestyle : str, optional\n        line style, by default None\n    **kwargs\n        other keyword arguments to df.plot()\n    \"\"\"\n    kwargs[\"color\"] = self._ts._color if color is None else color\n    ax = self._ts._values_as_series.plot(\n        marker=marker, linestyle=linestyle, **kwargs\n    )\n\n    title = self._ts.name if title is None else title\n    ax.set_title(title)\n\n    ax.set_ylabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/model/track/","title":"TrackModelResult","text":""},{"location":"api/model/track/#modelskill.TrackModelResult","title":"modelskill.TrackModelResult","text":"

Bases: TimeSeries, Alignable

Construct a TrackModelResult from a dfs0 file, mikeio.Dataset, pandas.DataFrame or a xarray.Datasets

Parameters:

Name Type Description Default data TrackType

The input data or file path

required name Optional[str]

The name of the model result, by default None (will be set to file name or item name)

None item str | int | None

If multiple items/arrays are present in the input an item must be given (as either an index or a string), by default None

None x_item str | int | None

Item of the first coordinate of positions, by default None

0 y_item str | int | None

Item of the second coordinate of positions, by default None

1 quantity Quantity

Model quantity, for MIKE files this is inferred from the EUM information

None keep_duplicates (str, bool)

Strategy for handling duplicate timestamps (wraps xarray.Dataset.drop_duplicates) \"first\" to keep first occurrence, \"last\" to keep last occurrence, False to drop all duplicates, \"offset\" to add milliseconds to consecutive duplicates, by default \"first\"

'first' aux_items Optional[list[int | str]]

Auxiliary items, by default None

None Source code in modelskill/model/track.py
class TrackModelResult(TimeSeries, Alignable):\n    \"\"\"Construct a TrackModelResult from a dfs0 file,\n    mikeio.Dataset, pandas.DataFrame or a xarray.Datasets\n\n    Parameters\n    ----------\n    data : types.TrackType\n        The input data or file path\n    name : Optional[str], optional\n        The name of the model result,\n        by default None (will be set to file name or item name)\n    item : str | int | None, optional\n        If multiple items/arrays are present in the input an item\n        must be given (as either an index or a string), by default None\n    x_item : str | int | None, optional\n        Item of the first coordinate of positions, by default None\n    y_item : str | int | None, optional\n        Item of the second coordinate of positions, by default None\n    quantity : Quantity, optional\n        Model quantity, for MIKE files this is inferred from the EUM information\n    keep_duplicates : (str, bool), optional\n        Strategy for handling duplicate timestamps (wraps xarray.Dataset.drop_duplicates)\n        \"first\" to keep first occurrence, \"last\" to keep last occurrence,\n        False to drop all duplicates, \"offset\" to add milliseconds to\n        consecutive duplicates, by default \"first\"\n    aux_items : Optional[list[int | str]], optional\n        Auxiliary items, by default None\n    \"\"\"\n\n    def __init__(\n        self,\n        data: TrackType,\n        *,\n        name: Optional[str] = None,\n        item: str | int | None = None,\n        quantity: Optional[Quantity] = None,\n        x_item: str | int = 0,\n        y_item: str | int = 1,\n        keep_duplicates: str | bool = \"first\",\n        aux_items: Optional[Sequence[int | str]] = None,\n    ) -> None:\n        if not self._is_input_validated(data):\n            data = _parse_track_input(\n                data=data,\n                name=name,\n                item=item,\n                quantity=quantity,\n                x_item=x_item,\n                y_item=y_item,\n                keep_duplicates=keep_duplicates,\n                aux_items=aux_items,\n            )\n\n        assert isinstance(data, xr.Dataset)\n        data_var = str(list(data.data_vars)[0])\n        data[data_var].attrs[\"kind\"] = \"model\"\n        super().__init__(data=data)\n\n    def extract(\n        self, obs: TrackObservation, spatial_method: Optional[str] = None\n    ) -> TrackModelResult:\n        if not isinstance(obs, TrackObservation):\n            raise ValueError(f\"obs must be a TrackObservation not {type(obs)}\")\n        if spatial_method is not None:\n            raise NotImplementedError(\n                \"spatial interpolation not possible when matching track model results with track observations\"\n            )\n        return self\n\n    def align(self, observation: Observation, **kwargs: Any) -> xr.Dataset:\n        spatial_tolerance = 1e-3\n\n        mri = self\n        mod_df = mri.data.to_dataframe()\n        obs_df = observation.data.to_dataframe()\n\n        # 1. inner join on time\n        df = mod_df.join(obs_df, how=\"inner\", lsuffix=\"_mod\", rsuffix=\"_obs\")\n\n        # 2. remove model points outside observation track\n        n_points = len(df)\n        keep_x = np.abs((df.x_mod - df.x_obs)) < spatial_tolerance\n        keep_y = np.abs((df.y_mod - df.y_obs)) < spatial_tolerance\n        df = df[keep_x & keep_y]\n        if n_points_removed := n_points - len(df):\n            warnings.warn(\n                f\"Removed {n_points_removed} model points outside observation track (spatial_tolerance={spatial_tolerance})\"\n            )\n        return mri.data.sel(time=df.index)\n
"},{"location":"api/model/track/#modelskill.TrackModelResult.gtype","title":"gtype property","text":"
gtype\n

Geometry type

"},{"location":"api/model/track/#modelskill.TrackModelResult.n_points","title":"n_points property","text":"
n_points\n

Number of data points

"},{"location":"api/model/track/#modelskill.TrackModelResult.name","title":"name property writable","text":"
name\n

Name of time series (value item name)

"},{"location":"api/model/track/#modelskill.TrackModelResult.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerPlotter

Examples:

>>> obj.plot.timeseries()\n>>> obj.plot.hist()\n
"},{"location":"api/model/track/#modelskill.TrackModelResult.quantity","title":"quantity property writable","text":"
quantity\n

Quantity of time series

"},{"location":"api/model/track/#modelskill.TrackModelResult.time","title":"time property","text":"
time\n

Time index

"},{"location":"api/model/track/#modelskill.TrackModelResult.values","title":"values property","text":"
values\n

Values as numpy array

"},{"location":"api/model/track/#modelskill.TrackModelResult.x","title":"x property writable","text":"
x\n

x-coordinate

"},{"location":"api/model/track/#modelskill.TrackModelResult.y","title":"y property writable","text":"
y\n

y-coordinate

"},{"location":"api/model/track/#modelskill.TrackModelResult.equals","title":"equals","text":"
equals(other)\n

Check if two TimeSeries are equal

Source code in modelskill/timeseries/_timeseries.py
def equals(self, other: TimeSeries) -> bool:\n    \"\"\"Check if two TimeSeries are equal\"\"\"\n    return self.data.equals(other.data)\n
"},{"location":"api/model/track/#modelskill.TrackModelResult.sel","title":"sel","text":"
sel(**kwargs)\n

Select data by label

Source code in modelskill/timeseries/_timeseries.py
def sel(self: T, **kwargs: Any) -> T:\n    \"\"\"Select data by label\"\"\"\n    return self.__class__(self.data.sel(**kwargs))\n
"},{"location":"api/model/track/#modelskill.TrackModelResult.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/timeseries/_timeseries.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert matched data to pandas DataFrame\n\n    Include x, y coordinates only if gtype=track\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    if self.gtype == str(GeometryType.POINT):\n        # we remove the scalar coordinate variables as they\n        # will otherwise be columns in the dataframe\n        return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n    elif self.gtype == str(GeometryType.TRACK):\n        df = self.data.drop_vars([\"z\"]).to_dataframe()\n        # make sure that x, y cols are first\n        cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n        return df[cols]\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n
"},{"location":"api/model/track/#modelskill.TrackModelResult.trim","title":"trim","text":"
trim(start_time=None, end_time=None, buffer='1s')\n

Trim observation data to a given time interval

Parameters:

Name Type Description Default start_time Timestamp

start time

None end_time Timestamp

end time

None buffer str

buffer time around start and end time, by default \"1s\"

'1s' Source code in modelskill/timeseries/_timeseries.py
def trim(\n    self: T,\n    start_time: Optional[pd.Timestamp] = None,\n    end_time: Optional[pd.Timestamp] = None,\n    buffer: str = \"1s\",\n) -> T:\n    \"\"\"Trim observation data to a given time interval\n\n    Parameters\n    ----------\n    start_time : pd.Timestamp\n        start time\n    end_time : pd.Timestamp\n        end time\n    buffer : str, optional\n        buffer time around start and end time, by default \"1s\"\n    \"\"\"\n    # Expand time interval with buffer\n    start_time = pd.Timestamp(start_time) - pd.Timedelta(buffer)\n    end_time = pd.Timestamp(end_time) + pd.Timedelta(buffer)\n\n    data = self.data.sel(time=slice(start_time, end_time))\n    if len(data.time) == 0:\n        raise ValueError(\n            f\"No data left after trimming to {start_time} - {end_time}\"\n        )\n    return self.__class__(data)\n
"},{"location":"api/model/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","title":"modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","text":"

Bases: TimeSeriesPlotter

Source code in modelskill/timeseries/_plotter.py
class MatplotlibTimeSeriesPlotter(TimeSeriesPlotter):\n    def __init__(self, ts) -> None:\n        self._ts = ts\n\n    def __call__(self, **kwargs):\n        # default to timeseries plot\n        self.timeseries(**kwargs)\n\n    def timeseries(\n        self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n    ):\n        \"\"\"Plot timeseries\n\n        Wraps pandas.DataFrame plot() method.\n\n        Parameters\n        ----------\n        title : str, optional\n            plot title, default: [name]\n        color : str, optional\n            plot color, by default '#d62728'\n        marker : str, optional\n            plot marker, by default '.'\n        linestyle : str, optional\n            line style, by default None\n        **kwargs\n            other keyword arguments to df.plot()\n        \"\"\"\n        kwargs[\"color\"] = self._ts._color if color is None else color\n        ax = self._ts._values_as_series.plot(\n            marker=marker, linestyle=linestyle, **kwargs\n        )\n\n        title = self._ts.name if title is None else title\n        ax.set_title(title)\n\n        ax.set_ylabel(str(self._ts.quantity))\n        return ax\n\n    def hist(self, bins=100, title=None, color=None, **kwargs):\n        \"\"\"Plot histogram of timeseries values\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            specification of bins, by default 100\n        title : str, optional\n            plot title, default: observation name\n        color : str, optional\n            plot color, by default \"#d62728\"\n        **kwargs\n            other keyword arguments to df.hist()\n\n        Returns\n        -------\n        matplotlib axes\n        \"\"\"\n        title = self._ts.name if title is None else title\n\n        kwargs[\"color\"] = self._ts._color if color is None else color\n\n        ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n        ax.set_title(title)\n        ax.set_xlabel(str(self._ts.quantity))\n        return ax\n
"},{"location":"api/model/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.hist","title":"hist","text":"
hist(bins=100, title=None, color=None, **kwargs)\n

Plot histogram of timeseries values

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

specification of bins, by default 100

100 title str

plot title, default: observation name

None color str

plot color, by default \"#d62728\"

None **kwargs

other keyword arguments to df.hist()

{}

Returns:

Type Description matplotlib axes Source code in modelskill/timeseries/_plotter.py
def hist(self, bins=100, title=None, color=None, **kwargs):\n    \"\"\"Plot histogram of timeseries values\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        specification of bins, by default 100\n    title : str, optional\n        plot title, default: observation name\n    color : str, optional\n        plot color, by default \"#d62728\"\n    **kwargs\n        other keyword arguments to df.hist()\n\n    Returns\n    -------\n    matplotlib axes\n    \"\"\"\n    title = self._ts.name if title is None else title\n\n    kwargs[\"color\"] = self._ts._color if color is None else color\n\n    ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n    ax.set_title(title)\n    ax.set_xlabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/model/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.timeseries","title":"timeseries","text":"
timeseries(title=None, color=None, marker='.', linestyle='None', **kwargs)\n

Plot timeseries

Wraps pandas.DataFrame plot() method.

Parameters:

Name Type Description Default title str

plot title, default: [name]

None color str

plot color, by default '#d62728'

None marker str

plot marker, by default '.'

'.' linestyle str

line style, by default None

'None' **kwargs

other keyword arguments to df.plot()

{} Source code in modelskill/timeseries/_plotter.py
def timeseries(\n    self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n):\n    \"\"\"Plot timeseries\n\n    Wraps pandas.DataFrame plot() method.\n\n    Parameters\n    ----------\n    title : str, optional\n        plot title, default: [name]\n    color : str, optional\n        plot color, by default '#d62728'\n    marker : str, optional\n        plot marker, by default '.'\n    linestyle : str, optional\n        line style, by default None\n    **kwargs\n        other keyword arguments to df.plot()\n    \"\"\"\n    kwargs[\"color\"] = self._ts._color if color is None else color\n    ax = self._ts._values_as_series.plot(\n        marker=marker, linestyle=linestyle, **kwargs\n    )\n\n    title = self._ts.name if title is None else title\n    ax.set_title(title)\n\n    ax.set_ylabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/observation/","title":"Observations","text":"

ModelSkill supports two types of observations:

  • PointObservation - a point timeseries from a dfs0/nc file or a DataFrame
  • TrackObservation - a track (moving point) timeseries from a dfs0/nc file or a DataFrame

An observation can be created by explicitly invoking one of the above classes or using the observation() function which will return the appropriate type based on the input data (if possible).

"},{"location":"api/observation/observation/","title":"observation()","text":""},{"location":"api/observation/observation/#modelskill.observation","title":"modelskill.observation","text":"
observation(data, *, gtype=None, **kwargs)\n

A factory function for creating an appropriate observation object based on the data and args.

If 'x' or 'y' is given, a PointObservation is created. If 'x_item' or 'y_item' is given, a TrackObservation is created.

Parameters:

Name Type Description Default data DataInputType

The data to be used for creating the Observation object.

required gtype Optional[Literal['point', 'track']]

The geometry type of the data. If not specified, it will be guessed from the data.

None **kwargs

Additional keyword arguments to be passed to the Observation constructor.

{}

Examples:

>>> import modelskill as ms\n>>> o_pt = ms.observation(df, item=0, x=366844, y=6154291, name=\"Klagshamn\")\n>>> o_tr = ms.observation(\"lon_after_lat.dfs0\", item=\"wl\", x_item=1, y_item=0)\n
Source code in modelskill/obs.py
def observation(\n    data: DataInputType,\n    *,\n    gtype: Optional[Literal[\"point\", \"track\"]] = None,\n    **kwargs,\n):\n    \"\"\"A factory function for creating an appropriate observation object\n    based on the data and args.\n\n    If 'x' or 'y' is given, a PointObservation is created.\n    If 'x_item' or 'y_item' is given, a TrackObservation is created.\n\n    Parameters\n    ----------\n    data : DataInputType\n        The data to be used for creating the Observation object.\n    gtype : Optional[Literal[\"point\", \"track\"]]\n        The geometry type of the data. If not specified, it will be guessed from the data.\n    **kwargs\n        Additional keyword arguments to be passed to the Observation constructor.\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> o_pt = ms.observation(df, item=0, x=366844, y=6154291, name=\"Klagshamn\")\n    >>> o_tr = ms.observation(\"lon_after_lat.dfs0\", item=\"wl\", x_item=1, y_item=0)\n    \"\"\"\n    if gtype is None:\n        geometry = _guess_gtype(**kwargs)\n    else:\n        geometry = GeometryType.from_string(gtype)\n\n    return _obs_class_lookup[geometry](\n        data=data,\n        **kwargs,\n    )\n
"},{"location":"api/observation/point/","title":"PointObservation","text":""},{"location":"api/observation/point/#modelskill.PointObservation","title":"modelskill.PointObservation","text":"

Bases: Observation

Class for observations of fixed locations

Create a PointObservation from a dfs0 file or a pd.DataFrame.

Parameters:

Name Type Description Default data (str, Path, Dataset, DataArray, DataFrame, Series, Dataset or DataArray)

filename (.dfs0 or .nc) or object with the data

required item (int, str)

index or name of the wanted item/column, by default None if data contains more than one item, item must be given

None x float

x-coordinate of the observation point, inferred from data if not given, else None

None y float

y-coordinate of the observation point, inferred from data if not given, else None

None z float

z-coordinate of the observation point, inferred from data if not given, else None

None name str

user-defined name for easy identification in plots etc, by default file basename

None quantity Quantity

The quantity of the observation, for validation with model results For MIKE dfs files this is inferred from the EUM information

None aux_items list

list of names or indices of auxiliary items, by default None

None attrs dict

additional attributes to be added to the data, by default None

None weight float

weighting factor for skill scores, by default 1.0

1.0

Examples:

>>> import modelskill as ms\n>>> o1 = ms.PointObservation(\"klagshamn.dfs0\", item=0, x=366844, y=6154291, name=\"Klagshamn\")\n>>> o2 = ms.PointObservation(\"klagshamn.dfs0\", item=\"Water Level\", x=366844, y=6154291)\n>>> o3 = ms.PointObservation(df, item=0, x=366844, y=6154291, name=\"Klagshamn\")\n>>> o4 = ms.PointObservation(df[\"Water Level\"], x=366844, y=6154291)\n
Source code in modelskill/obs.py
class PointObservation(Observation):\n    \"\"\"Class for observations of fixed locations\n\n    Create a PointObservation from a dfs0 file or a pd.DataFrame.\n\n    Parameters\n    ----------\n    data : str, Path, mikeio.Dataset, mikeio.DataArray, pd.DataFrame, pd.Series, xr.Dataset or xr.DataArray\n        filename (.dfs0 or .nc) or object with the data\n    item : (int, str), optional\n        index or name of the wanted item/column, by default None\n        if data contains more than one item, item must be given\n    x : float, optional\n        x-coordinate of the observation point, inferred from data if not given, else None\n    y : float, optional\n        y-coordinate of the observation point, inferred from data if not given, else None\n    z : float, optional\n        z-coordinate of the observation point, inferred from data if not given, else None\n    name : str, optional\n        user-defined name for easy identification in plots etc, by default file basename\n    quantity : Quantity, optional\n        The quantity of the observation, for validation with model results\n        For MIKE dfs files this is inferred from the EUM information\n    aux_items : list, optional\n        list of names or indices of auxiliary items, by default None\n    attrs : dict, optional\n        additional attributes to be added to the data, by default None\n    weight : float, optional\n        weighting factor for skill scores, by default 1.0\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> o1 = ms.PointObservation(\"klagshamn.dfs0\", item=0, x=366844, y=6154291, name=\"Klagshamn\")\n    >>> o2 = ms.PointObservation(\"klagshamn.dfs0\", item=\"Water Level\", x=366844, y=6154291)\n    >>> o3 = ms.PointObservation(df, item=0, x=366844, y=6154291, name=\"Klagshamn\")\n    >>> o4 = ms.PointObservation(df[\"Water Level\"], x=366844, y=6154291)\n    \"\"\"\n\n    def __init__(\n        self,\n        data: PointType,\n        *,\n        item: Optional[int | str] = None,\n        x: Optional[float] = None,\n        y: Optional[float] = None,\n        z: Optional[float] = None,\n        name: Optional[str] = None,\n        weight: float = 1.0,\n        quantity: Optional[Quantity] = None,\n        aux_items: Optional[list[int | str]] = None,\n        attrs: Optional[dict] = None,\n    ) -> None:\n        if not self._is_input_validated(data):\n            data = _parse_point_input(\n                data,\n                name=name,\n                item=item,\n                quantity=quantity,\n                aux_items=aux_items,\n                x=x,\n                y=y,\n                z=z,\n            )\n\n        assert isinstance(data, xr.Dataset)\n        super().__init__(data=data, weight=weight, attrs=attrs)\n\n    @property\n    def z(self):\n        \"\"\"z-coordinate of observation point\"\"\"\n        return self._coordinate_values(\"z\")\n\n    @z.setter\n    def z(self, value):\n        self.data[\"z\"] = value\n
"},{"location":"api/observation/point/#modelskill.PointObservation.attrs","title":"attrs property writable","text":"
attrs\n

Attributes of the observation

"},{"location":"api/observation/point/#modelskill.PointObservation.gtype","title":"gtype property","text":"
gtype\n

Geometry type

"},{"location":"api/observation/point/#modelskill.PointObservation.n_points","title":"n_points property","text":"
n_points\n

Number of data points

"},{"location":"api/observation/point/#modelskill.PointObservation.name","title":"name property writable","text":"
name\n

Name of time series (value item name)

"},{"location":"api/observation/point/#modelskill.PointObservation.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerPlotter

Examples:

>>> obj.plot.timeseries()\n>>> obj.plot.hist()\n
"},{"location":"api/observation/point/#modelskill.PointObservation.quantity","title":"quantity property writable","text":"
quantity\n

Quantity of time series

"},{"location":"api/observation/point/#modelskill.PointObservation.time","title":"time property","text":"
time\n

Time index

"},{"location":"api/observation/point/#modelskill.PointObservation.values","title":"values property","text":"
values\n

Values as numpy array

"},{"location":"api/observation/point/#modelskill.PointObservation.weight","title":"weight property writable","text":"
weight\n

Weighting factor for skill scores

"},{"location":"api/observation/point/#modelskill.PointObservation.x","title":"x property writable","text":"
x\n

x-coordinate

"},{"location":"api/observation/point/#modelskill.PointObservation.y","title":"y property writable","text":"
y\n

y-coordinate

"},{"location":"api/observation/point/#modelskill.PointObservation.z","title":"z property writable","text":"
z\n

z-coordinate of observation point

"},{"location":"api/observation/point/#modelskill.PointObservation.equals","title":"equals","text":"
equals(other)\n

Check if two TimeSeries are equal

Source code in modelskill/timeseries/_timeseries.py
def equals(self, other: TimeSeries) -> bool:\n    \"\"\"Check if two TimeSeries are equal\"\"\"\n    return self.data.equals(other.data)\n
"},{"location":"api/observation/point/#modelskill.PointObservation.sel","title":"sel","text":"
sel(**kwargs)\n

Select data by label

Source code in modelskill/timeseries/_timeseries.py
def sel(self: T, **kwargs: Any) -> T:\n    \"\"\"Select data by label\"\"\"\n    return self.__class__(self.data.sel(**kwargs))\n
"},{"location":"api/observation/point/#modelskill.PointObservation.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/timeseries/_timeseries.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert matched data to pandas DataFrame\n\n    Include x, y coordinates only if gtype=track\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    if self.gtype == str(GeometryType.POINT):\n        # we remove the scalar coordinate variables as they\n        # will otherwise be columns in the dataframe\n        return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n    elif self.gtype == str(GeometryType.TRACK):\n        df = self.data.drop_vars([\"z\"]).to_dataframe()\n        # make sure that x, y cols are first\n        cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n        return df[cols]\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n
"},{"location":"api/observation/point/#modelskill.PointObservation.trim","title":"trim","text":"
trim(start_time=None, end_time=None, buffer='1s')\n

Trim observation data to a given time interval

Parameters:

Name Type Description Default start_time Timestamp

start time

None end_time Timestamp

end time

None buffer str

buffer time around start and end time, by default \"1s\"

'1s' Source code in modelskill/timeseries/_timeseries.py
def trim(\n    self: T,\n    start_time: Optional[pd.Timestamp] = None,\n    end_time: Optional[pd.Timestamp] = None,\n    buffer: str = \"1s\",\n) -> T:\n    \"\"\"Trim observation data to a given time interval\n\n    Parameters\n    ----------\n    start_time : pd.Timestamp\n        start time\n    end_time : pd.Timestamp\n        end time\n    buffer : str, optional\n        buffer time around start and end time, by default \"1s\"\n    \"\"\"\n    # Expand time interval with buffer\n    start_time = pd.Timestamp(start_time) - pd.Timedelta(buffer)\n    end_time = pd.Timestamp(end_time) + pd.Timedelta(buffer)\n\n    data = self.data.sel(time=slice(start_time, end_time))\n    if len(data.time) == 0:\n        raise ValueError(\n            f\"No data left after trimming to {start_time} - {end_time}\"\n        )\n    return self.__class__(data)\n
"},{"location":"api/observation/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","title":"modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","text":"

Bases: TimeSeriesPlotter

Source code in modelskill/timeseries/_plotter.py
class MatplotlibTimeSeriesPlotter(TimeSeriesPlotter):\n    def __init__(self, ts) -> None:\n        self._ts = ts\n\n    def __call__(self, **kwargs):\n        # default to timeseries plot\n        self.timeseries(**kwargs)\n\n    def timeseries(\n        self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n    ):\n        \"\"\"Plot timeseries\n\n        Wraps pandas.DataFrame plot() method.\n\n        Parameters\n        ----------\n        title : str, optional\n            plot title, default: [name]\n        color : str, optional\n            plot color, by default '#d62728'\n        marker : str, optional\n            plot marker, by default '.'\n        linestyle : str, optional\n            line style, by default None\n        **kwargs\n            other keyword arguments to df.plot()\n        \"\"\"\n        kwargs[\"color\"] = self._ts._color if color is None else color\n        ax = self._ts._values_as_series.plot(\n            marker=marker, linestyle=linestyle, **kwargs\n        )\n\n        title = self._ts.name if title is None else title\n        ax.set_title(title)\n\n        ax.set_ylabel(str(self._ts.quantity))\n        return ax\n\n    def hist(self, bins=100, title=None, color=None, **kwargs):\n        \"\"\"Plot histogram of timeseries values\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            specification of bins, by default 100\n        title : str, optional\n            plot title, default: observation name\n        color : str, optional\n            plot color, by default \"#d62728\"\n        **kwargs\n            other keyword arguments to df.hist()\n\n        Returns\n        -------\n        matplotlib axes\n        \"\"\"\n        title = self._ts.name if title is None else title\n\n        kwargs[\"color\"] = self._ts._color if color is None else color\n\n        ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n        ax.set_title(title)\n        ax.set_xlabel(str(self._ts.quantity))\n        return ax\n
"},{"location":"api/observation/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.hist","title":"hist","text":"
hist(bins=100, title=None, color=None, **kwargs)\n

Plot histogram of timeseries values

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

specification of bins, by default 100

100 title str

plot title, default: observation name

None color str

plot color, by default \"#d62728\"

None **kwargs

other keyword arguments to df.hist()

{}

Returns:

Type Description matplotlib axes Source code in modelskill/timeseries/_plotter.py
def hist(self, bins=100, title=None, color=None, **kwargs):\n    \"\"\"Plot histogram of timeseries values\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        specification of bins, by default 100\n    title : str, optional\n        plot title, default: observation name\n    color : str, optional\n        plot color, by default \"#d62728\"\n    **kwargs\n        other keyword arguments to df.hist()\n\n    Returns\n    -------\n    matplotlib axes\n    \"\"\"\n    title = self._ts.name if title is None else title\n\n    kwargs[\"color\"] = self._ts._color if color is None else color\n\n    ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n    ax.set_title(title)\n    ax.set_xlabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/observation/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.timeseries","title":"timeseries","text":"
timeseries(title=None, color=None, marker='.', linestyle='None', **kwargs)\n

Plot timeseries

Wraps pandas.DataFrame plot() method.

Parameters:

Name Type Description Default title str

plot title, default: [name]

None color str

plot color, by default '#d62728'

None marker str

plot marker, by default '.'

'.' linestyle str

line style, by default None

'None' **kwargs

other keyword arguments to df.plot()

{} Source code in modelskill/timeseries/_plotter.py
def timeseries(\n    self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n):\n    \"\"\"Plot timeseries\n\n    Wraps pandas.DataFrame plot() method.\n\n    Parameters\n    ----------\n    title : str, optional\n        plot title, default: [name]\n    color : str, optional\n        plot color, by default '#d62728'\n    marker : str, optional\n        plot marker, by default '.'\n    linestyle : str, optional\n        line style, by default None\n    **kwargs\n        other keyword arguments to df.plot()\n    \"\"\"\n    kwargs[\"color\"] = self._ts._color if color is None else color\n    ax = self._ts._values_as_series.plot(\n        marker=marker, linestyle=linestyle, **kwargs\n    )\n\n    title = self._ts.name if title is None else title\n    ax.set_title(title)\n\n    ax.set_ylabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/observation/track/","title":"TrackObservation","text":""},{"location":"api/observation/track/#modelskill.TrackObservation","title":"modelskill.TrackObservation","text":"

Bases: Observation

Class for observation with locations moving in space, e.g. satellite altimetry

The data needs in addition to the datetime of each single observation point also, x and y coordinates.

Create TrackObservation from dfs0 or DataFrame

Parameters:

Name Type Description Default data (str, Path, Dataset, DataFrame, Dataset)

path to dfs0 file or object with track data

required item (str, int)

item name or index of values, by default None if data contains more than one item, item must be given

None name str

user-defined name for easy identification in plots etc, by default file basename

None x_item (str, int)

item name or index of x-coordinate, by default 0

0 y_item (str, int)

item name or index of y-coordinate, by default 1

1 keep_duplicates (str, bool)

strategy for handling duplicate timestamps (xarray.Dataset.drop_duplicates): \"first\" to keep first occurrence, \"last\" to keep last occurrence, False to drop all duplicates, \"offset\" to add milliseconds to consecutive duplicates, by default \"first\"

'first' quantity Quantity

The quantity of the observation, for validation with model results For MIKE dfs files this is inferred from the EUM information

None aux_items list

list of names or indices of auxiliary items, by default None

None attrs dict

additional attributes to be added to the data, by default None

None weight float

weighting factor for skill scores, by default 1.0

1.0

Examples:

>>> import modelskill as ms\n>>> o1 = ms.TrackObservation(\"track.dfs0\", item=2, name=\"c2\")\n
>>> o1 = ms.TrackObservation(\"track.dfs0\", item=\"wind_speed\", name=\"c2\")\n
>>> o1 = ms.TrackObservation(\"lon_after_lat.dfs0\", item=\"wl\", x_item=1, y_item=0)\n
>>> o1 = ms.TrackObservation(\"track_wl.dfs0\", item=\"wl\", x_item=\"lon\", y_item=\"lat\")\n
>>> df = pd.DataFrame(\n...         {\n...             \"t\": pd.date_range(\"2010-01-01\", freq=\"10s\", periods=n),\n...             \"x\": np.linspace(0, 10, n),\n...             \"y\": np.linspace(45000, 45100, n),\n...             \"swh\": [0.1, 0.3, 0.4, 0.5, 0.3],\n...         }\n... )\n>>> df = df.set_index(\"t\")\n>>> df\n                    x        y  swh\nt\n2010-01-01 00:00:00   0.0  45000.0  0.1\n2010-01-01 00:00:10   2.5  45025.0  0.3\n2010-01-01 00:00:20   5.0  45050.0  0.4\n2010-01-01 00:00:30   7.5  45075.0  0.5\n2010-01-01 00:00:40  10.0  45100.0  0.3\n>>> t1 = TrackObservation(df, name=\"fake\")\n>>> t1.n_points\n5\n>>> t1.values\narray([0.1, 0.3, 0.4, 0.5, 0.3])\n>>> t1.time\nDatetimeIndex(['2010-01-01 00:00:00', '2010-01-01 00:00:10',\n           '2010-01-01 00:00:20', '2010-01-01 00:00:30',\n           '2010-01-01 00:00:40'],\n          dtype='datetime64[ns]', name='t', freq=None)\n>>> t1.x\narray([ 0. ,  2.5,  5. ,  7.5, 10. ])\n>>> t1.y\narray([45000., 45025., 45050., 45075., 45100.])\n
Source code in modelskill/obs.py
class TrackObservation(Observation):\n    \"\"\"Class for observation with locations moving in space, e.g. satellite altimetry\n\n    The data needs in addition to the datetime of each single observation point also, x and y coordinates.\n\n    Create TrackObservation from dfs0 or DataFrame\n\n    Parameters\n    ----------\n    data : (str, Path, mikeio.Dataset, pd.DataFrame, xr.Dataset)\n        path to dfs0 file or object with track data\n    item : (str, int), optional\n        item name or index of values, by default None\n        if data contains more than one item, item must be given\n    name : str, optional\n        user-defined name for easy identification in plots etc, by default file basename\n    x_item : (str, int), optional\n        item name or index of x-coordinate, by default 0\n    y_item : (str, int), optional\n        item name or index of y-coordinate, by default 1\n    keep_duplicates : (str, bool), optional\n        strategy for handling duplicate timestamps (xarray.Dataset.drop_duplicates):\n        \"first\" to keep first occurrence, \"last\" to keep last occurrence,\n        False to drop all duplicates, \"offset\" to add milliseconds to\n        consecutive duplicates, by default \"first\"\n    quantity : Quantity, optional\n        The quantity of the observation, for validation with model results\n        For MIKE dfs files this is inferred from the EUM information\n    aux_items : list, optional\n        list of names or indices of auxiliary items, by default None\n    attrs : dict, optional\n        additional attributes to be added to the data, by default None\n    weight : float, optional\n        weighting factor for skill scores, by default 1.0\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> o1 = ms.TrackObservation(\"track.dfs0\", item=2, name=\"c2\")\n\n    >>> o1 = ms.TrackObservation(\"track.dfs0\", item=\"wind_speed\", name=\"c2\")\n\n    >>> o1 = ms.TrackObservation(\"lon_after_lat.dfs0\", item=\"wl\", x_item=1, y_item=0)\n\n    >>> o1 = ms.TrackObservation(\"track_wl.dfs0\", item=\"wl\", x_item=\"lon\", y_item=\"lat\")\n\n    >>> df = pd.DataFrame(\n    ...         {\n    ...             \"t\": pd.date_range(\"2010-01-01\", freq=\"10s\", periods=n),\n    ...             \"x\": np.linspace(0, 10, n),\n    ...             \"y\": np.linspace(45000, 45100, n),\n    ...             \"swh\": [0.1, 0.3, 0.4, 0.5, 0.3],\n    ...         }\n    ... )\n    >>> df = df.set_index(\"t\")\n    >>> df\n                        x        y  swh\n    t\n    2010-01-01 00:00:00   0.0  45000.0  0.1\n    2010-01-01 00:00:10   2.5  45025.0  0.3\n    2010-01-01 00:00:20   5.0  45050.0  0.4\n    2010-01-01 00:00:30   7.5  45075.0  0.5\n    2010-01-01 00:00:40  10.0  45100.0  0.3\n    >>> t1 = TrackObservation(df, name=\"fake\")\n    >>> t1.n_points\n    5\n    >>> t1.values\n    array([0.1, 0.3, 0.4, 0.5, 0.3])\n    >>> t1.time\n    DatetimeIndex(['2010-01-01 00:00:00', '2010-01-01 00:00:10',\n               '2010-01-01 00:00:20', '2010-01-01 00:00:30',\n               '2010-01-01 00:00:40'],\n              dtype='datetime64[ns]', name='t', freq=None)\n    >>> t1.x\n    array([ 0. ,  2.5,  5. ,  7.5, 10. ])\n    >>> t1.y\n    array([45000., 45025., 45050., 45075., 45100.])\n\n    \"\"\"\n\n    def __init__(\n        self,\n        data: TrackType,\n        *,\n        item: Optional[int | str] = None,\n        name: Optional[str] = None,\n        weight: float = 1.0,\n        x_item: Optional[int | str] = 0,\n        y_item: Optional[int | str] = 1,\n        keep_duplicates: bool | str = \"first\",\n        offset_duplicates: float = 0.001,\n        quantity: Optional[Quantity] = None,\n        aux_items: Optional[list[int | str]] = None,\n        attrs: Optional[dict] = None,\n    ) -> None:\n        if not self._is_input_validated(data):\n            if offset_duplicates != 0.001:\n                warnings.warn(\n                    \"The 'offset_duplicates' argument is deprecated, use 'keep_duplicates' argument.\",\n                    FutureWarning,\n                )\n            data = _parse_track_input(\n                data=data,\n                name=name,\n                item=item,\n                quantity=quantity,\n                x_item=x_item,\n                y_item=y_item,\n                keep_duplicates=keep_duplicates,\n                offset_duplicates=offset_duplicates,\n                aux_items=aux_items,\n            )\n        assert isinstance(data, xr.Dataset)\n        super().__init__(data=data, weight=weight, attrs=attrs)\n
"},{"location":"api/observation/track/#modelskill.TrackObservation.attrs","title":"attrs property writable","text":"
attrs\n

Attributes of the observation

"},{"location":"api/observation/track/#modelskill.TrackObservation.gtype","title":"gtype property","text":"
gtype\n

Geometry type

"},{"location":"api/observation/track/#modelskill.TrackObservation.n_points","title":"n_points property","text":"
n_points\n

Number of data points

"},{"location":"api/observation/track/#modelskill.TrackObservation.name","title":"name property writable","text":"
name\n

Name of time series (value item name)

"},{"location":"api/observation/track/#modelskill.TrackObservation.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerPlotter

Examples:

>>> obj.plot.timeseries()\n>>> obj.plot.hist()\n
"},{"location":"api/observation/track/#modelskill.TrackObservation.quantity","title":"quantity property writable","text":"
quantity\n

Quantity of time series

"},{"location":"api/observation/track/#modelskill.TrackObservation.time","title":"time property","text":"
time\n

Time index

"},{"location":"api/observation/track/#modelskill.TrackObservation.values","title":"values property","text":"
values\n

Values as numpy array

"},{"location":"api/observation/track/#modelskill.TrackObservation.weight","title":"weight property writable","text":"
weight\n

Weighting factor for skill scores

"},{"location":"api/observation/track/#modelskill.TrackObservation.x","title":"x property writable","text":"
x\n

x-coordinate

"},{"location":"api/observation/track/#modelskill.TrackObservation.y","title":"y property writable","text":"
y\n

y-coordinate

"},{"location":"api/observation/track/#modelskill.TrackObservation.equals","title":"equals","text":"
equals(other)\n

Check if two TimeSeries are equal

Source code in modelskill/timeseries/_timeseries.py
def equals(self, other: TimeSeries) -> bool:\n    \"\"\"Check if two TimeSeries are equal\"\"\"\n    return self.data.equals(other.data)\n
"},{"location":"api/observation/track/#modelskill.TrackObservation.sel","title":"sel","text":"
sel(**kwargs)\n

Select data by label

Source code in modelskill/timeseries/_timeseries.py
def sel(self: T, **kwargs: Any) -> T:\n    \"\"\"Select data by label\"\"\"\n    return self.__class__(self.data.sel(**kwargs))\n
"},{"location":"api/observation/track/#modelskill.TrackObservation.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/timeseries/_timeseries.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert matched data to pandas DataFrame\n\n    Include x, y coordinates only if gtype=track\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    if self.gtype == str(GeometryType.POINT):\n        # we remove the scalar coordinate variables as they\n        # will otherwise be columns in the dataframe\n        return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n    elif self.gtype == str(GeometryType.TRACK):\n        df = self.data.drop_vars([\"z\"]).to_dataframe()\n        # make sure that x, y cols are first\n        cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n        return df[cols]\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n
"},{"location":"api/observation/track/#modelskill.TrackObservation.trim","title":"trim","text":"
trim(start_time=None, end_time=None, buffer='1s')\n

Trim observation data to a given time interval

Parameters:

Name Type Description Default start_time Timestamp

start time

None end_time Timestamp

end time

None buffer str

buffer time around start and end time, by default \"1s\"

'1s' Source code in modelskill/timeseries/_timeseries.py
def trim(\n    self: T,\n    start_time: Optional[pd.Timestamp] = None,\n    end_time: Optional[pd.Timestamp] = None,\n    buffer: str = \"1s\",\n) -> T:\n    \"\"\"Trim observation data to a given time interval\n\n    Parameters\n    ----------\n    start_time : pd.Timestamp\n        start time\n    end_time : pd.Timestamp\n        end time\n    buffer : str, optional\n        buffer time around start and end time, by default \"1s\"\n    \"\"\"\n    # Expand time interval with buffer\n    start_time = pd.Timestamp(start_time) - pd.Timedelta(buffer)\n    end_time = pd.Timestamp(end_time) + pd.Timedelta(buffer)\n\n    data = self.data.sel(time=slice(start_time, end_time))\n    if len(data.time) == 0:\n        raise ValueError(\n            f\"No data left after trimming to {start_time} - {end_time}\"\n        )\n    return self.__class__(data)\n
"},{"location":"api/observation/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","title":"modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","text":"

Bases: TimeSeriesPlotter

Source code in modelskill/timeseries/_plotter.py
class MatplotlibTimeSeriesPlotter(TimeSeriesPlotter):\n    def __init__(self, ts) -> None:\n        self._ts = ts\n\n    def __call__(self, **kwargs):\n        # default to timeseries plot\n        self.timeseries(**kwargs)\n\n    def timeseries(\n        self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n    ):\n        \"\"\"Plot timeseries\n\n        Wraps pandas.DataFrame plot() method.\n\n        Parameters\n        ----------\n        title : str, optional\n            plot title, default: [name]\n        color : str, optional\n            plot color, by default '#d62728'\n        marker : str, optional\n            plot marker, by default '.'\n        linestyle : str, optional\n            line style, by default None\n        **kwargs\n            other keyword arguments to df.plot()\n        \"\"\"\n        kwargs[\"color\"] = self._ts._color if color is None else color\n        ax = self._ts._values_as_series.plot(\n            marker=marker, linestyle=linestyle, **kwargs\n        )\n\n        title = self._ts.name if title is None else title\n        ax.set_title(title)\n\n        ax.set_ylabel(str(self._ts.quantity))\n        return ax\n\n    def hist(self, bins=100, title=None, color=None, **kwargs):\n        \"\"\"Plot histogram of timeseries values\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            specification of bins, by default 100\n        title : str, optional\n            plot title, default: observation name\n        color : str, optional\n            plot color, by default \"#d62728\"\n        **kwargs\n            other keyword arguments to df.hist()\n\n        Returns\n        -------\n        matplotlib axes\n        \"\"\"\n        title = self._ts.name if title is None else title\n\n        kwargs[\"color\"] = self._ts._color if color is None else color\n\n        ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n        ax.set_title(title)\n        ax.set_xlabel(str(self._ts.quantity))\n        return ax\n
"},{"location":"api/observation/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.hist","title":"hist","text":"
hist(bins=100, title=None, color=None, **kwargs)\n

Plot histogram of timeseries values

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

specification of bins, by default 100

100 title str

plot title, default: observation name

None color str

plot color, by default \"#d62728\"

None **kwargs

other keyword arguments to df.hist()

{}

Returns:

Type Description matplotlib axes Source code in modelskill/timeseries/_plotter.py
def hist(self, bins=100, title=None, color=None, **kwargs):\n    \"\"\"Plot histogram of timeseries values\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        specification of bins, by default 100\n    title : str, optional\n        plot title, default: observation name\n    color : str, optional\n        plot color, by default \"#d62728\"\n    **kwargs\n        other keyword arguments to df.hist()\n\n    Returns\n    -------\n    matplotlib axes\n    \"\"\"\n    title = self._ts.name if title is None else title\n\n    kwargs[\"color\"] = self._ts._color if color is None else color\n\n    ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n    ax.set_title(title)\n    ax.set_xlabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/observation/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.timeseries","title":"timeseries","text":"
timeseries(title=None, color=None, marker='.', linestyle='None', **kwargs)\n

Plot timeseries

Wraps pandas.DataFrame plot() method.

Parameters:

Name Type Description Default title str

plot title, default: [name]

None color str

plot color, by default '#d62728'

None marker str

plot marker, by default '.'

'.' linestyle str

line style, by default None

'None' **kwargs

other keyword arguments to df.plot()

{} Source code in modelskill/timeseries/_plotter.py
def timeseries(\n    self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n):\n    \"\"\"Plot timeseries\n\n    Wraps pandas.DataFrame plot() method.\n\n    Parameters\n    ----------\n    title : str, optional\n        plot title, default: [name]\n    color : str, optional\n        plot color, by default '#d62728'\n    marker : str, optional\n        plot marker, by default '.'\n    linestyle : str, optional\n        line style, by default None\n    **kwargs\n        other keyword arguments to df.plot()\n    \"\"\"\n    kwargs[\"color\"] = self._ts._color if color is None else color\n    ax = self._ts._values_as_series.plot(\n        marker=marker, linestyle=linestyle, **kwargs\n    )\n\n    title = self._ts.name if title is None else title\n    ax.set_title(title)\n\n    ax.set_ylabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"user-guide/","title":"User Guide","text":"

ModelSkill compares model results with observations. The workflow can be split in two phases:

  1. Matching - making sure that observations and model results are in the same space and time
  2. Analysis - plots and statistics of the matched data

If the observations and model results are already matched (i.e. are stored in the same data source), the from_matched() function can be used to go directly to the analysis phase. If not, the match() function can be used to match the observations and model results in space and time.

"},{"location":"user-guide/data-structures/","title":"Data Structures","text":"

The main data structures in ModelSkill can be grouped into three categories:

  • Primary data (observations and model results)
  • Comparer objects
  • Skill objects

All objects share some common principles:

  • The data container is accesssible via the data attribute.
  • The data container is an xarray object (except for the SkillTable object, which is a pandas object).
  • The main data selection method is sel, which is a wrapper around xarray.Dataset.sel.
  • All plotting are accessible via the plot accessor of the object.
"},{"location":"user-guide/data-structures/#observations-and-model-results","title":"Observations and model results","text":"

The primary data of ModelSkill are the data that needs to be compared: observations and model results. The underlying data structures are very similar and can be grouped according to the spatial dimensionality (gtype) of the data:

  • point: 0D time series data
  • track: 0D time series data at moving locations (trajectories)
  • grid: gridded 2D data
  • dfsu: flexible mesh 2D data

Point and track data are both TimeSeries objects, while grid and dfsu data are both SpatialField objects. TimeSeries objects are ready to be compared whereas data from SpatialField object needs to be extracted first (the extracted object will be of the TimeSeries type).

TimeSeries objects contains its data in an xarray.Dataset with the actual data in the first DataArray and optional auxilliary data in the following DataArrays. The DataArrays have a kind attribute with either observation or model.

"},{"location":"user-guide/data-structures/#comparer-objects","title":"Comparer objects","text":"

Comparer objects are results of a matching procedure (between observations and model results) or constructed directly from already matched data. A comparison of a single observation and one or more model results are stored in a Comparer object. A comparison of multiple observations and one or more model results are stored in a ComparerCollection object which is a collection of Comparer objects.

The matched data in a Comparer is stored in an xarray.Dataset which can be accessed via the data attribute. The Dataset has an attribute gtype which is a string describing the type of data (e.g. point, track). The first DataArray in the Dataset is the observation data, the next DataArrays are model result data and optionally additional DataArrays are auxilliarye data. Each of the DataArrays have a kind attribute with either observation, model or aux.

Both Comparer and ComparerCollection have a plot accessor for plotting the data (e.g. cmp.plot.timeseries() or cmp.plot.scatter()).

"},{"location":"user-guide/data-structures/#skill-objects","title":"Skill objects","text":"

Calling a skill method on a comparer object will return a skill object with skill scores (statistics) from comparing observation and model result data using different metrics (e.g. root mean square error). Two skill objects are currently implemented: SkillTable and SkillGrid. The first is relevant for all ModelSkill users while the latter is relevant for users of the track data (e.g. MetOcean studies using satellite altimetry data).

If c is a comparer object, then the following skill methods are available:

  • c.skill() -> SkillTable
  • c.mean_skill() -> SkillTable
  • c.gridded_skill() -> SkillGrid
"},{"location":"user-guide/data-structures/#skilltable","title":"SkillTable","text":""},{"location":"user-guide/getting-started/","title":"Getting started","text":"

This page describes the typical ModelSkill workflow for comparing model results and observations.

"},{"location":"user-guide/getting-started/#workflow","title":"Workflow","text":"

The typical ModelSkill workflow consists of these four steps:

  1. Define Observations
  2. Define ModelResults
  3. Match observations and ModelResults in space and time
  4. Do analysis, plotting, etc with a Comparer
"},{"location":"user-guide/getting-started/#define-observations","title":"Define Observations","text":"

The first step is to define the measurements to be used for the skill assessment. Two types of observation are available:

  • PointObservation
  • TrackObservation

Let's assume that we have one PointObservation and one TrackObservation (name is used to identify the observation, similar to the name of the model above).

hkna = ms.PointObservation(\"HKNA_Hm0.dfs0\", item=0,\n                            x=4.2420, y=52.6887,\n                            name=\"HKNA\")\nc2 = ms.TrackObservation(\"Alti_c2_Dutch.dfs0\", item=3,\n                          name=\"c2\")\n

In this case both observations are provided as .dfs0 files but pandas dataframes are also supported in case data are stored in another file format.

Both PointObservation and TrackObservation need the path of the data file, the item number (or item name) and a name. A PointObservation further needs to be initialized with it\\'s x-, y-position.

"},{"location":"user-guide/getting-started/#define-modelresults","title":"Define ModelResults","text":"

The result of a simulation is stored in one or more result files, e.g. dfsu, dfs0, nc, csv.

The name is used to identify the model result in the plots and tables.

import modelskill as ms\nmr = ms.DfsuModelResult(\"SW/HKZN_local_2017_DutchCoast.dfsu\", \n                         item=\"Sign. Wave Height\",\n                         name='HKZN_local')\n
"},{"location":"user-guide/getting-started/#match-observations-and-modelresults","title":"Match observations and ModelResults","text":"

This match() method returns a Comparer (a single observation) or a ComparerCollection (multiple observations) for further analysis and plotting.

cc = ms.match([hkna, c2], mr)\n
"},{"location":"user-guide/getting-started/#do-analysis-plotting-etc-with-a-comparer","title":"Do analysis, plotting, etc with a Comparer","text":"

The object returned by the match() method is a Comparer/ComparerCollection. It holds the matched observation and model data and has methods for plotting and skill assessment.

The primary comparer methods are:

  • skill() which returns a SkillTable with the skill scores
  • various plot methods of the comparer objects (e.g. plot.scatter(), plot.timeseries())
  • sel() method for selecting data
"},{"location":"user-guide/getting-started/#save-load-the-comparercollection","title":"Save / load the ComparerCollection","text":"

It can be useful to save the comparer collection for later use. This can be done using the save() method:

cc.save(\"my_comparer_collection.msk\")\n

The comparer collection can be loaded again from disk, using the load() method:

cc = ms.load(\"my_comparer_collection.msk\")\n
"},{"location":"user-guide/getting-started/#filtering","title":"Filtering","text":"

In order to select only a subset of the data for analysis, the comparer has a sel() method which returns a new comparer with the selected data.

This method allow filtering of the data in several ways:

  • on observation by specifying name or index of one or more observations
  • on model (if more than one is compared) by giving name or index
  • temporal using the time (or start and end) arguments
  • spatial using the area argument given as a bounding box or a polygon
"},{"location":"user-guide/matching/","title":"Matching","text":"

Once observations and model results have been defined, the next step is to match them. This is done using the match() function which handles the allignment of the observation and model result data in space and time. Note that if the data is already matched, the from_matched() function can be used to create a Comparer directly from the matched data and the matching described here is not needed.

The observation is considered the truth and the model result data is therefore interpolated to the observation data positions.

The matching process will be different depending on the geometry of observation and model result:

  • Geometries are the same (e.g. both are point time series): only temporal matching is needed
  • Geometries are different (e.g. observation is a point time series and model result is a grid): data is first spatially extracted from the model result and then matched in time.
"},{"location":"user-guide/matching/#temporal-matching","title":"Temporal matching","text":"

Temporal matching is done by interpolating the model result data to the observation data time points; it is carried out after spatial matching when applicable. The interpolation is linear in time and done inside the match() function.

"},{"location":"user-guide/matching/#matching-of-time-series","title":"Matching of time series","text":"

If observation and model result are of the same geometry, the matching is done one observation at a time. Several model results can be matched to the same observation. The result of the matching process is a Comparer object which contains the matched data.

In the most simple cases, one observation to one model result, the match() function can be used directly, without creating Observation and ModelResult objects first:

>>> cmp = ms.match('obs.dfs0', 'model.dfs0', obs_item='obs_WL', mod_item='WL')\n

In all other cases, the observations and model results needs to be defined first.

>>> o = ms.observation('obs.dfs0', item='waterlevel')\n>>> mr1 = ms.model_result('model1.dfs0', item='WL1')\n>>> mr2 = ms.model_result('model2.dfs0', item='WL2')\n>>> cmp = ms.match(o, [mr1, mr2])\n

In most cases, several observations needs to matched with several model results. This can be done by constructing a list of Comparer objects and then combining them into a ComparerCollection:

>>> cmps = []\n>>> for o in observations:\n>>>     mr1 = ...\n>>>     mr2 = ...\n>>>     cmps.append(ms.match(o, [mr1, mr2]))\n>>> cc = ms.ComparerCollection(cmps)\n
"},{"location":"user-guide/matching/#matching-with-dfsu-or-grid-model-result","title":"Matching with dfsu or grid model result","text":"

If the model result is a SpatialField, i.e., either a GridModelResult or a DfsuModelResult, and the observation is of lower dimension (e.g. point), then the model result needs to be extracted before matching can be done. This can be done \"offline\" before using ModelSkill, e.g., using MIKE tools or MIKE IO, or as part of the matching process using ModelSkill. We will here focus on the latter.

In this situation, multiple observations can be matched to the same model result, in which case the match function returns a ComparerCollection instead of a Comparer which is the returned object for single observation matching.

>>> o1 = ms.observation('obs1.dfs0', item='waterlevel')\n>>> o2 = ms.observation('obs2.dfs0', item='waterlevel')\n>>> mr = ms.model_result('model.dfsu', item='WaterLevel')\n>>> cc = ms.match([o1, o2], mr)   # returns a ComparerCollection\n

Matching PointObservation with SpatialField model results consists of two steps:

  1. Extracting data from the model result at the spatial position of the observation, which returns a PointModelResult
  2. Matching the extracted data with the observation data in time

Matching TrackObservation with SpatialField model results is for technical reasons handled in one step, i.e., the data is extracted in both space and time.

The spatial matching method (selection or interpolation) can be specified using the spatial_method argument of the match() function. The default method depends on the type of observation and model result as specified in the sections below.

"},{"location":"user-guide/matching/#extracting-data-from-a-dfsumodelresult","title":"Extracting data from a DfsuModelResult","text":"

Extracting data for a specific point position from the flexible mesh dfsu files can be done in several ways (specified by the spatial_method argument of the match() function):

  • Selection of the \"contained\" element
  • Selection of the \"nearest\" element (often the same as the contained element, but not always)
  • Interpolation with \"inverse_distance\" weighting (IDW) using the five nearest elements (default)

The default (inverse_distance) is not necessarily the best method in all cases. When the extracted position is close to the model boundary, \"contained\" may be a better choice.

>>> cc = ms.match([o1, o2], mr_dfsu, spatial_method='contained')   \n

Note that extraction of track data does not currently support the \"contained\" method.

Note that the extraction of point data from 3D dfsu files is not yet fully supported. It is recommended to extract the data \"offline\" prior to using ModelSkill.

"},{"location":"user-guide/matching/#extracting-data-from-a-gridmodelresult","title":"Extracting data from a GridModelResult","text":"

Extracting data from a GridModelResult is done through xarray's interp() function. The spatial_method argument of the match() function is passed on to the interp() function as the method argument. The default method is \"linear\" which is the recommended method for most cases. Close to land where the grid model result data is often missing, \"nearest\" may be a better choice.

>>> cc = ms.match([o1, o2], mr_netcdf, spatial_method='nearest')   \n
"},{"location":"user-guide/matching/#event-based-matching-and-handling-of-gaps","title":"Event-based matching and handling of gaps","text":"

If the model result data contains gaps either because only events are stored or because of missing data, the max_model_gap argument of the match() function can be used to specify the maximum allowed gap (in seconds) in the model result data. This will avoid interpolating model data over long gaps in the model result data!

"},{"location":"user-guide/matching/#multiple-model-results-with-different-temporal-coverage","title":"Multiple model results with different temporal coverage","text":"

If the model results have different temporal coverage, the match() function will only match the overlapping time period to ensure that the model results are comparable. The Comparer object will contain the matched data for the overlapping period only.

"},{"location":"user-guide/overview/","title":"Overview","text":"

ModelSkill compares model results with observations. The workflow can be split in two phases:

  1. Matching - making sure that observations and model results are in the same space and time
  2. Analysis - plots and statistics of the matched data

If the observations and model results are already matched (i.e. are stored in the same data source), the from_matched() function can be used to go directly to the analysis phase. If not, the match() function can be used to match the observations and model results in space and time.

"},{"location":"user-guide/overview/#matching","title":"Matching","text":"

If the observations and model results are not in the same data source (e.g. dfs0 file), they will need to be defined and then matched in space and time with the match() function. In simple cases, observations and model results can be defined directly in the match() function:

import modelskill as ms\ncmp = ms.match(\"obs.dfs0\", \"model.dfs0\", obs_item=\"obs_WL\", mod_item=\"WL\")\n

But in most cases, the observations and model results will need to be defined separately first.

"},{"location":"user-guide/overview/#define-observations","title":"Define observations","text":"

The observations can be defined as either a PointObservation or a TrackObservation (a moving point).

o1 = ms.PointObservation(\"stn1.dfs0\", item=\"obs_WL\")\no2 = ms.PointObservation(\"stn2.dfs0\", item=\"obs_WL\")\n

The item needs to be specified as either the item number or the item name if the input file contains multiple items. Several other parameters can be specified, such as the name of the observation, the x- and y-position, and the quantity type and unit of the observation.

"},{"location":"user-guide/overview/#define-model-results","title":"Define model results","text":"

A model result will either be a simple point/track like the observations, or spatial field (e.g. 2d dfsu file) from which the model results will be extracted at the observation positions. The following types are available:

  • PointModelResult - a point result from a dfs0/nc file or a DataFrame
  • TrackModelResult - a track result from a dfs0/nc file or a DataFrame
  • GridModelResult - a spatial field from a dfs2/nc file or a Xarray Dataset
  • DfsuModelResult - a spatial field from a dfsu file
mr1 = ms.PointModelResult(\"model.dfs0\", item=\"WL_stn1\")\nmr2 = ms.PointModelResult(\"model.dfs0\", item=\"WL_stn2\")\n
"},{"location":"user-guide/overview/#match-observations-and-model-results","title":"Match observations and model results","text":"

The match() function will interpolate the model results to the time (and space) of the observations and return a collection of Comparer objects that can be used for analysis.

cc1 = ms.match(o1, mr1)\ncc2 = ms.match(o2, mr2)\ncc = cc1 + cc2\n
"},{"location":"user-guide/overview/#analysis","title":"Analysis","text":"

Once the observations and model results are matched, the Comparer object can be used for analysis and plotting.

"},{"location":"user-guide/plotting/","title":"Plotting","text":""},{"location":"user-guide/plotting/#plotting-observations-and-model-results","title":"Plotting observations and model results","text":"

PointObservations and PointModelResults can be plotted using their plot accessor:

>>> o.plot.timeseries()\n>>> mr.plot.timeseries()\n>>> mr.plot.hist()\n

Only the observation time series is shown here:

"},{"location":"user-guide/plotting/#plotting-temporal-coverage","title":"Plotting temporal coverage","text":"

The temporal coverage of observations and model results can be plotted using the temporal_coverage function in the plotting module:

>>> o1 = ms.PointObservation('HKNA.dfs0', item=0, x=4.2420, y=52.6887)\n>>> o2 = ms.PointObservation('EPL.dfs0', item=0, x=3.2760, y=51.9990)\n>>> o3 = ms.TrackObservation(\"Alti_c2.dfs0\", item=3)\n>>> mr = ms.DfsuModelResult('HKZN_local.dfsu', item=0)\n>>> ms.plotting.temporal_coverage(obs=[o1, o2, o3], mod=mr)\n

"},{"location":"user-guide/plotting/#plotting-spatial-overview","title":"Plotting spatial overview","text":"

The spatial coverage of observations and model results can be plotted using the spatial_overview function in the plotting module:

>>> ms.plotting.spatial_overview([o1, o2, o3], mr)\n

"},{"location":"user-guide/plotting/#plotting-compared-data","title":"Plotting compared data","text":"

The plot accessor on a Comparer or ComparerCollection object can be used to plot the compared data:

>>> cmp.plot.timeseries()\n>>> cc.plot.timeseries()\n>>> cc.plot.scatter()\n
"},{"location":"user-guide/plotting/#plotting-taylor-diagrams","title":"Plotting Taylor diagrams","text":"

A Taylor diagram shows how well a model result matches an observation in terms of correlation, standard deviation and root mean square error. The taylor plot can be accessed through the Comparer plot accessor or the ComparerCollection plot accessor:

>>> cc = ms.match([o1, o2, o3], [mr_CMEMS, mr_ERA5, mr_MIKE21SW])\n>>> cc.plot.taylor()\n

The radial distance from the point to the observation point is the standard deviation ratio, the angle is the correlation coefficient and the distance from the observation point to the model point is the root mean square error ratio. The closer the model point is to the observation point, the better the model result matches the observation. The closer the model point is to the origin, the better the model result matches the observation in terms of standard deviation and root mean square error. The closer the model point is to the horizontal axis, the better the model result matches the observation in terms of correlation.

"},{"location":"user-guide/plotting/#plotting-directional-data-eg-wind-or-currents","title":"Plotting directional data (e.g. wind or currents)","text":"

Directional data can be plotted using the wind_rose function in the plotting module. The function takes an array-like structure with speed and direction as columns (from one or two sources) and plots a wind rose:

>>> df = pd.read_csv('wind.csv', index_col=0, parse_dates=True)\n>>> ms.plotting.wind_rose(df)\n

"},{"location":"user-guide/selecting-data/","title":"Selecting/filtering data","text":"

The primary data filtering method of ModelSkill is the sel() method which is accesible on most ModelSkill data structures. The sel() method is a wrapper around xarray.Dataset.sel() and can be used to select data based on time, location and/or variable. The sel() method returns a new data structure of the same type with the selected data.

"},{"location":"user-guide/selecting-data/#timeseries-data","title":"TimeSeries data","text":"

Point and track timeseries data of both observation and model result kinds are stored in TimeSeries objects which uses xarray.Dataset as data container. The sel() method can be used to select data based on time and returns a new TimeSeries object with the selected data.

>>> o = ms.observation('obs.nc', item='waterlevel')\n>>> o_1month = o.sel(time=slice('2018-01-01', '2018-02-01'))\n
"},{"location":"user-guide/selecting-data/#comparer-objects","title":"Comparer objects","text":"

Comparer and ComparerCollection contain matched data from observations and model results. The sel() method can be used to select data based on time, model, quantity or other criteria and returns a new comparer object with the selected data.

>>> cmp = ms.match(o, [m1, m2])\n>>> cmp_1month = cmp.sel(time=slice('2018-01-01', '2018-02-01'))\n>>> cmp_m1 = cmp.sel(model='m1')\n
"},{"location":"user-guide/selecting-data/#skill-objects","title":"Skill objects","text":"

The skill() and mean_skill() methods return a SkillTable object with skill scores from comparing observation and model result data using different metrics (e.g. root mean square error). The data of the SkillTable object is stored in a (MultiIndex) pandas.DataFrame which can be accessed via the data attribute. The sel() method can be used to select specific rows and returns a new SkillTable object with the selected data.

>>> sk = cmp.skill()\n>>> sk_m1 = sk.sel(model='m1')\n
"},{"location":"user-guide/skill/","title":"Skill","text":"

Matched data can be analysed statistically using the skill() function. The function returns a Skill object which contains the statistical results. The Skill object can be printed to the console or saved to a file using the save() function.

```python

"},{"location":"user-guide/terminology/","title":"Terminology","text":"

ModelSkill is a library for assessing the skill of numerical models. It provides tools for comparing model results with observations, plotting the results and calculating validation metrics. This page defines some of the key terms used in the documentation.

"},{"location":"user-guide/terminology/#skill","title":"Skill","text":"

Skill refers to the ability of a numerical model to accurately represent the real-world phenomenon it aims to simulate. It is a measure of how well the model performs in reproducing the observed system. Skill can be assessed using various metrics, such as accuracy, precision, and reliability, depending on the specific goals of the model and the nature of the data. In ModelSkill, skill is also a specific method on Comparer objects that returns a SkillTable with aggregated skill scores per observation and model for a list of selected metrics.

"},{"location":"user-guide/terminology/#validation","title":"Validation","text":"

Validation is the process of assessing the model's performance by comparing its output to real-world observations or data collected from the system being modeled. It helps ensure that the model accurately represents the system it simulates. Validation is typically performed before the model is used for prediction or decision-making.

"},{"location":"user-guide/terminology/#calibration","title":"Calibration","text":"

Calibration is the process of adjusting the model's parameters or settings to improve its performance. It involves fine-tuning the model to better match observed data. Calibration aims to reduce discrepancies between model predictions and actual measurements. At the end of the calibration process, the calibrated model should be validated with independent data.

"},{"location":"user-guide/terminology/#performance","title":"Performance","text":"

Performance is a measure of how well a numerical model operates in reproducing the observed system. It can be assessed using various metrics, such as accuracy, precision, and reliability, depending on the specific goals of the model and the nature of the data. In this context, performance is synonymous with skill.

"},{"location":"user-guide/terminology/#timeseries","title":"Timeseries","text":"

A timeseries is a sequence of data points in time. In ModelSkill, The data can either be from observations or model results. Timeseries can univariate or multivariate; ModelSkill primarily supports univariate timeseries. Multivariate timeseries can be assessed one variable at a time. Timeseries can also have different spatial dimensions, such as point, track, line, or area.

"},{"location":"user-guide/terminology/#observation","title":"Observation","text":"

An observation refers to real-world data or measurements collected from the system you are modeling. Observations serve as a reference for assessing the model's performance. These data points are used to compare with the model's predictions during validation and calibration. Observations are usually based on field measurements or laboratory experiments, but for the purposes of model validation, they can also be derived from other models (e.g. a reference model). ModelSkill supports point and track observation types.

"},{"location":"user-guide/terminology/#measurement","title":"Measurement","text":"

A measurement is called observation in ModelSkill.

"},{"location":"user-guide/terminology/#model-result","title":"Model result","text":"

A model result is the output of any type of numerical model. It is the data generated by the model during a simulation. Model results can be compared with observations to assess the model's performance. In the context of validation, the term \"model result\" is often used interchangeably with \"model output\" or \"model prediction\". ModelSkill supports point, track, dfsu and grid model result types.

"},{"location":"user-guide/terminology/#metric","title":"Metric","text":"

A metric is a quantitative measure (a mathematical expression) used to evaluate the performance of a numerical model. Metrics provide a standardized way to assess the model's accuracy, precision, and other attributes. A metric aggregates the skill of a model into a single number. See list of metrics supported by ModelSkill.

"},{"location":"user-guide/terminology/#score","title":"Score","text":"

A score is a numerical value that summarizes the model's performance based on chosen metrics. Scores can be used to rank or compare different models or model configurations. In the context of validation, the \"skill score\" or \"validation score\" often quantifies the model's overall performance. The score of a model is a single number, calculated as a weighted average for all time-steps, observations and variables. If you want to perform automated calibration, you can use the score as the objective function. In ModelSkill, score is also a specific method on Comparer objects that returns a single number aggregated score using a specific metric.

"},{"location":"user-guide/terminology/#matched-data","title":"Matched data","text":"

In ModelSkill, observations and model results are matched when they refer to the same positions in space and time. If the observations and model results are already matched, the from_matched function can be used to create a Comparer directly. Otherwise, the match function can be used to match the observations and model results in space and time.

"},{"location":"user-guide/terminology/#match","title":"match()","text":"

The function match is used to match a model result with observations. It returns a Comparer object or a ComparerCollection object.

"},{"location":"user-guide/terminology/#comparer","title":"Comparer","text":"

A Comparer is an object that stores the matched observation and model result data for a single observation. It is used to calculate validation metrics and generate plots. A Comparer can be created using the match function.

"},{"location":"user-guide/terminology/#comparercollection","title":"ComparerCollection","text":"

A ComparerCollection is a collection of Comparers. It is used to compare multiple observations with one or more model results. A ComparerCollection can be created using the match function or by passing a list of Comparers to the ComparerCollection constructor.

"},{"location":"user-guide/terminology/#connector","title":"Connector","text":"

In past versions of FMSkill/ModelSkill, the Connector class was used to connect observations and model results. This class has been deprecated and is no longer in use.

"},{"location":"user-guide/terminology/#abbreviations","title":"Abbreviations","text":"Abbreviation Meaning ms ModelSkill o or obs Observation mr or mod Model result cmp Comparer cc ComparerCollection sk SkillTable mtr Metric q Quantity"},{"location":"user-guide/vision/","title":"Vision","text":"

ModelSkill would like to be your modelling companion. It should be indispensable good such that you want to use it every time you do a MIKE simulation.

"},{"location":"user-guide/vision/#objective","title":"Objective","text":"

We want ModelSkill to make it easy to

  • assess the skill of a model by comparing with measurements
  • assess model skill also when result is split on several files (2d, 3d, yearly, ...)
  • compare the skill of different calibration runs
  • compare your model with other models
  • use a wide range of common evaluation metrics
  • create common plots such as time series, scatter and taylor diagrams
  • do aggregations - assess for all observations, geographic areas, monthly, ...
  • do filtering - assess for a subset of observations, geographic areas, ...
  • make fast comparisons (optimized code)

And it should be

  • Difficult to make mistakes by verifying input
  • Trustworthy by having >95% test coverage
  • Easy to install ($ pip install modelskill)
  • Easy to get started by providing many notebook examples and documentation
"},{"location":"user-guide/vision/#scope","title":"Scope","text":"

ModelSkill wants to balance general and specific needs:

  • It should be general enough to cover >90% of MIKE simulations

  • It should be general enough to cover generic modelling irrespective of software.

  • But specific enough to be useful

    • Support dfs files (using mikeio)
    • Handle circular variables such as wave direction
"},{"location":"user-guide/vision/#limitations","title":"Limitations","text":"

ModelSkill does not wish to cover

  • Extreme value analysis
  • Deterministic wave analysis such as crossing analysis
  • Rare alternative file types
  • Rarely used model result types
  • Rare observation types
  • Anything project specific
"},{"location":"user-guide/vision/#future","title":"Future","text":""},{"location":"user-guide/vision/#forecast-skill","title":"Forecast skill","text":"

It should be possible to compare forecasts with observations using forecast lead time as a dimension. Planned 2024.

"},{"location":"user-guide/vision/#better-support-for-3d-data","title":"Better support for 3D data","text":"

Currently 3D data is supported only as point data and only if data has already been extracted from model result files. It should be possible to extract date from 3D files directly. Furthermore, vertical columns data should be supported as an observation type with z as a dimension. Planned 2024.

"},{"location":"user-guide/vision/#web-app","title":"Web app","text":"

Create a web app that wraps this library.

"},{"location":"user-guide/vision/#automatic-reports","title":"Automatic reports","text":"

Both static as markdown, docx, pptx and interactive as html.

"}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":""},{"location":"#modelskill-assess-the-skill-of-your-mike-model","title":"ModelSkill: Assess the skill of your MIKE model","text":"

Compare results from MIKE simulations with observations. ModelSkill would like to be your companion during the different phases of a MIKE modelling workflow.

Useful links: Terminology | Overview | Plotting | Issues | Discussion

  • Set up in 5 minutes

    Install ModelSkill with pip and get up and running in minutes

    Getting started

  • It's just Python

    Focus on your modelling and less on generate a validation report

    API Reference

  • Made to measure

    Choose between different skill metrics and customizable tables and charts

    Metrics

  • Open Source, MIT

    ModelSkill is licensed under MIT and available on GitHub

    License

"},{"location":"license/","title":"License","text":"

MIT License

Copyright (c) 2024 DHI

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

"},{"location":"api/","title":"API Documentation","text":"

Obtain a comparer object in one of the following ways:

  • From matched data with from_matched()
  • After defining observations and model results using the match() function.
  • From a config file with from_config()

Do analysis and plotting with the returned Comparer (a single observation) or ComparerCollection (multiple observations):

  • skill() - returns a SkillTable with the skill scores
  • plot using the various plot methods of the comparer objects
    • plot.scatter()
    • plot.timeseries()
    • plot.kde()
    • plot.qq()
    • plot.hist()
"},{"location":"api/comparer/","title":"Comparer","text":"

The Comparer class is the main class of the ModelSkill package. It is returned by match(), from_matched() or as an element in a ComparerCollection. It holds the matched observation and model data for a single observation and has methods for plotting and skill assessment.

Main functionality:

  • selecting/filtering data
    • sel()
    • query()
  • skill assessment
    • skill()
    • gridded_skill() (for track observations)
  • plotting
    • plot.timeseries()
    • plot.scatter()
    • plot.kde()
    • plot.qq()
    • plot.hist()
    • plot.box()
  • load/save/export data
    • load()
    • save()
    • to_dataframe()
"},{"location":"api/comparer/#modelskill.Comparer","title":"modelskill.Comparer","text":"

Bases: Scoreable

Comparer class for comparing model and observation data.

Typically, the Comparer is part of a ComparerCollection, created with the match function.

Parameters:

Name Type Description Default matched_data Dataset

Matched data

required raw_mod_data dict of modelskill.TimeSeries

Raw model data. If None, observation and modeldata must be provided.

None

Examples:

>>> import modelskill as ms\n>>> cmp1 = ms.match(observation, modeldata)\n>>> cmp2 = ms.from_matched(matched_data)\n
See Also

modelskill.match, modelskill.from_matched

Source code in modelskill/comparison/_comparison.py
class Comparer(Scoreable):\n    \"\"\"\n    Comparer class for comparing model and observation data.\n\n    Typically, the Comparer is part of a ComparerCollection,\n    created with the `match` function.\n\n    Parameters\n    ----------\n    matched_data : xr.Dataset\n        Matched data\n    raw_mod_data : dict of modelskill.TimeSeries, optional\n        Raw model data. If None, observation and modeldata must be provided.\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cmp1 = ms.match(observation, modeldata)\n    >>> cmp2 = ms.from_matched(matched_data)\n\n    See Also\n    --------\n    modelskill.match, modelskill.from_matched\n    \"\"\"\n\n    data: xr.Dataset\n    raw_mod_data: Dict[str, TimeSeries]\n    _obs_str = \"Observation\"\n    plotter = ComparerPlotter\n\n    def __init__(\n        self,\n        matched_data: xr.Dataset,\n        raw_mod_data: Optional[Dict[str, TimeSeries]] = None,\n    ) -> None:\n        self.data = _parse_dataset(matched_data)\n        self.raw_mod_data = (\n            raw_mod_data\n            if raw_mod_data is not None\n            else {\n                # key: ModelResult(value, gtype=self.data.gtype, name=key, x=self.x, y=self.y)\n                key: TimeSeries(self.data[[key]])\n                for key, value in matched_data.data_vars.items()\n                if value.attrs[\"kind\"] == \"model\"\n            }\n        )\n        # TODO: validate that the names in raw_mod_data are the same as in matched_data\n        assert isinstance(self.raw_mod_data, dict)\n        for k in self.raw_mod_data.keys():\n            v = self.raw_mod_data[k]\n            if not isinstance(v, TimeSeries):\n                try:\n                    self.raw_mod_data[k] = TimeSeries(v)\n                except Exception:\n                    raise ValueError(\n                        f\"raw_mod_data[{k}] could not be converted to a TimeSeries object\"\n                    )\n            else:\n                assert isinstance(\n                    v, TimeSeries\n                ), f\"raw_mod_data[{k}] must be a TimeSeries object\"\n\n        self.plot = Comparer.plotter(self)\n        \"\"\"Plot using the ComparerPlotter\n\n        Examples\n        --------\n        >>> cmp.plot.timeseries()\n        >>> cmp.plot.scatter()\n        >>> cmp.plot.qq()\n        >>> cmp.plot.hist()\n        >>> cmp.plot.kde()\n        >>> cmp.plot.box()\n        >>> cmp.plot.residual_hist()\n        >>> cmp.plot.taylor()        \n        \"\"\"\n\n    @staticmethod\n    def from_matched_data(\n        data: xr.Dataset | pd.DataFrame,\n        raw_mod_data: Optional[Dict[str, TimeSeries]] = None,\n        obs_item: str | int | None = None,\n        mod_items: Optional[Iterable[str | int]] = None,\n        aux_items: Optional[Iterable[str | int]] = None,\n        name: Optional[str] = None,\n        weight: float = 1.0,\n        x: Optional[float] = None,\n        y: Optional[float] = None,\n        z: Optional[float] = None,\n        x_item: str | int | None = None,\n        y_item: str | int | None = None,\n        quantity: Optional[Quantity] = None,\n    ) -> \"Comparer\":\n        \"\"\"Initialize from compared data\"\"\"\n        if not isinstance(data, xr.Dataset):\n            # TODO: handle raw_mod_data by accessing data.attrs[\"kind\"] and only remove nan after\n            data = _matched_data_to_xarray(\n                data,\n                obs_item=obs_item,\n                mod_items=mod_items,\n                aux_items=aux_items,\n                name=name,\n                x=x,\n                y=y,\n                z=z,\n                x_item=x_item,\n                y_item=y_item,\n                quantity=quantity,\n            )\n            data.attrs[\"weight\"] = weight\n        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n\n    def __repr__(self):\n        out = [\n            \"<Comparer>\",\n            f\"Quantity: {self.quantity}\",\n            f\"Observation: {self.name}, n_points={self.n_points}\",\n            \"Model(s):\",\n        ]\n        for index, model in enumerate(self.mod_names):\n            out.append(f\"{index}: {model}\")\n\n        for var in self.aux_names:\n            out.append(f\" Auxiliary: {var}\")\n        return str.join(\"\\n\", out)\n\n    @property\n    def name(self) -> str:\n        \"\"\"Name of comparer (=name of observation)\"\"\"\n        return str(self.data.attrs[\"name\"])\n\n    @name.setter\n    def name(self, name: str) -> None:\n        if name in _RESERVED_NAMES:\n            raise ValueError(\n                f\"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!\"\n            )\n        self.data.attrs[\"name\"] = name\n\n    @property\n    def gtype(self) -> str:\n        \"\"\"Geometry type\"\"\"\n        return str(self.data.attrs[\"gtype\"])\n\n    @property\n    def quantity(self) -> Quantity:\n        \"\"\"Quantity object\"\"\"\n        return Quantity(\n            name=self.data[self._obs_str].attrs[\"long_name\"],\n            unit=self.data[self._obs_str].attrs[\"units\"],\n            is_directional=bool(\n                self.data[self._obs_str].attrs.get(\"is_directional\", False)\n            ),\n        )\n\n    @quantity.setter\n    def quantity(self, quantity: Quantity) -> None:\n        assert isinstance(quantity, Quantity), \"value must be a Quantity object\"\n        self.data[self._obs_str].attrs[\"long_name\"] = quantity.name\n        self.data[self._obs_str].attrs[\"units\"] = quantity.unit\n        self.data[self._obs_str].attrs[\"is_directional\"] = int(quantity.is_directional)\n\n    @property\n    def n_points(self) -> int:\n        \"\"\"number of compared points\"\"\"\n        return len(self.data[self._obs_str]) if self.data else 0\n\n    @property\n    def time(self) -> pd.DatetimeIndex:\n        \"\"\"time of compared data as pandas DatetimeIndex\"\"\"\n        return self.data.time.to_index()\n\n    # TODO: Should we keep these? (renamed to start_time and end_time)\n    # @property\n    # def start(self) -> pd.Timestamp:\n    #     \"\"\"start pd.Timestamp of compared data\"\"\"\n    #     return self.time[0]\n\n    # @property\n    # def end(self) -> pd.Timestamp:\n    #     \"\"\"end pd.Timestamp of compared data\"\"\"\n    #     return self.time[-1]\n\n    @property\n    def x(self) -> Any:\n        \"\"\"x-coordinate\"\"\"\n        return self._coordinate_values(\"x\")\n\n    @property\n    def y(self) -> Any:\n        \"\"\"y-coordinate\"\"\"\n        return self._coordinate_values(\"y\")\n\n    @property\n    def z(self) -> Any:\n        \"\"\"z-coordinate\"\"\"\n        return self._coordinate_values(\"z\")\n\n    def _coordinate_values(self, coord: str) -> Any:\n        vals = self.data[coord].values\n        return np.atleast_1d(vals)[0] if vals.ndim == 0 else vals\n\n    @property\n    def n_models(self) -> int:\n        \"\"\"Number of model results\"\"\"\n        return len(self.mod_names)\n\n    @property\n    def mod_names(self) -> List[str]:\n        \"\"\"List of model result names\"\"\"\n        return list(self.raw_mod_data.keys())\n\n    def __contains__(self, key: str) -> bool:\n        return key in self.data.data_vars\n\n    @property\n    def aux_names(self) -> List[str]:\n        \"\"\"List of auxiliary data names\"\"\"\n        # we don't require the kind attribute to be \"auxiliary\"\n        return list(\n            [\n                k\n                for k, v in self.data.data_vars.items()\n                if v.attrs[\"kind\"] not in [\"observation\", \"model\"]\n            ]\n        )\n\n    # TODO: always \"Observation\", necessary to have this property?\n    @property\n    def _obs_name(self) -> str:\n        return self._obs_str\n\n    @property\n    def weight(self) -> float:\n        \"\"\"Weight of observation (used in ComparerCollection score() and mean_skill())\"\"\"\n        return float(self.data.attrs[\"weight\"])\n\n    @weight.setter\n    def weight(self, value: float) -> None:\n        self.data.attrs[\"weight\"] = float(value)\n\n    @property\n    def _unit_text(self) -> str:\n        # Quantity name and unit as text suitable for plot labels\n        return f\"{self.quantity.name} [{self.quantity.unit}]\"\n\n    @property\n    def attrs(self) -> dict[str, Any]:\n        \"\"\"Attributes of the observation\"\"\"\n        return self.data.attrs\n\n    @attrs.setter\n    def attrs(self, value: dict[str, Serializable]) -> None:\n        self.data.attrs = value\n\n    # TODO: is this the best way to copy (self.data.copy.. )\n    def __copy__(self) -> \"Comparer\":\n        return deepcopy(self)\n\n    def copy(self) -> \"Comparer\":\n        return self.__copy__()\n\n    def rename(\n        self, mapping: Mapping[str, str], errors: Literal[\"raise\", \"ignore\"] = \"raise\"\n    ) -> \"Comparer\":\n        \"\"\"Rename observation, model or auxiliary data variables\n\n        Parameters\n        ----------\n        mapping : dict\n            mapping of old names to new names\n        errors : {'raise', 'ignore'}, optional\n            If 'raise', raise a KeyError if any of the old names\n            do not exist in the data. By default 'raise'.\n\n        Returns\n        -------\n        Comparer\n\n        Examples\n        --------\n        >>> cmp = ms.match(observation, modeldata)\n        >>> cmp.mod_names\n        ['model1']\n        >>> cmp2 = cmp.rename({'model1': 'model2'})\n        >>> cmp2.mod_names\n        ['model2']\n        \"\"\"\n        if errors not in [\"raise\", \"ignore\"]:\n            raise ValueError(\"errors must be 'raise' or 'ignore'\")\n\n        allowed_keys = [self.name] + self.mod_names + self.aux_names\n        if errors == \"raise\":\n            for k in mapping.keys():\n                if k not in allowed_keys:\n                    raise KeyError(f\"Unknown key: {k}; must be one of {allowed_keys}\")\n        else:\n            # \"ignore\": silently remove keys that are not in allowed_keys\n            mapping = {k: v for k, v in mapping.items() if k in allowed_keys}\n\n        if any([k in _RESERVED_NAMES for k in mapping.values()]):\n            # TODO: also check for duplicates\n            raise ValueError(\n                f\"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!\"\n            )\n\n        # rename observation\n        obs_name = mapping.get(self.name, self.name)\n        ma_mapping = {k: v for k, v in mapping.items() if k != self.name}\n\n        data = self.data.rename(ma_mapping)\n        data.attrs[\"name\"] = obs_name\n        raw_mod_data = dict()\n        for k, v in self.raw_mod_data.items():\n            if k in ma_mapping:\n                # copy is needed here as the same raw data could be\n                # used for multiple Comparers!\n                v2 = v.copy()\n                v2.data = v2.data.rename({k: ma_mapping[k]})\n                raw_mod_data[ma_mapping[k]] = v2\n            else:\n                raw_mod_data[k] = v\n\n        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n\n    def _to_observation(self) -> PointObservation | TrackObservation:\n        \"\"\"Convert to Observation\"\"\"\n        if self.gtype == \"point\":\n            df = self.data.drop_vars([\"x\", \"y\", \"z\"])[self._obs_str].to_dataframe()\n            return PointObservation(\n                data=df,\n                name=self.name,\n                x=self.x,\n                y=self.y,\n                z=self.z,\n                quantity=self.quantity,\n                # TODO: add attrs\n            )\n        elif self.gtype == \"track\":\n            df = self.data.drop_vars([\"z\"])[[self._obs_str]].to_dataframe()\n            return TrackObservation(\n                data=df,\n                item=0,\n                x_item=1,\n                y_item=2,\n                name=self.name,\n                quantity=self.quantity,\n                # TODO: add attrs\n            )\n        else:\n            raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n\n    def __iadd__(self, other: Comparer):  # type: ignore\n        from ..matching import match_space_time\n\n        missing_models = set(self.mod_names) - set(other.mod_names)\n        if len(missing_models) == 0:\n            # same obs name and same model names\n            self.data = xr.concat([self.data, other.data], dim=\"time\").drop_duplicates(\n                \"time\"\n            )\n        else:\n            self.raw_mod_data.update(other.raw_mod_data)\n            matched = match_space_time(\n                observation=self._to_observation(),\n                raw_mod_data=self.raw_mod_data,  # type: ignore\n            )\n            self.data = matched\n\n        return self\n\n    def __add__(\n        self, other: Union[\"Comparer\", \"ComparerCollection\"]\n    ) -> \"ComparerCollection\" | \"Comparer\":\n        from ._collection import ComparerCollection\n        from ..matching import match_space_time\n\n        if not isinstance(other, (Comparer, ComparerCollection)):\n            raise TypeError(f\"Cannot add {type(other)} to {type(self)}\")\n\n        if isinstance(other, Comparer) and (self.name == other.name):\n            missing_models = set(self.mod_names) - set(other.mod_names)\n            if len(missing_models) == 0:\n                # same obs name and same model names\n                cmp = self.copy()\n                cmp.data = xr.concat(\n                    [cmp.data, other.data], dim=\"time\"\n                ).drop_duplicates(\"time\")\n\n            else:\n                raw_mod_data = self.raw_mod_data.copy()\n                raw_mod_data.update(other.raw_mod_data)  # TODO!\n                matched = match_space_time(\n                    observation=self._to_observation(),\n                    raw_mod_data=raw_mod_data,  # type: ignore\n                )\n                cmp = Comparer(matched_data=matched, raw_mod_data=raw_mod_data)\n\n            return cmp\n        else:\n            if isinstance(other, Comparer):\n                return ComparerCollection([self, other])\n            elif isinstance(other, ComparerCollection):\n                return ComparerCollection([self, *other])\n\n    def sel(\n        self,\n        model: Optional[IdxOrNameTypes] = None,\n        start: Optional[TimeTypes] = None,\n        end: Optional[TimeTypes] = None,\n        time: Optional[TimeTypes] = None,\n        area: Optional[List[float]] = None,\n    ) -> \"Comparer\":\n        \"\"\"Select data based on model, time and/or area.\n\n        Parameters\n        ----------\n        model : str or int or list of str or list of int, optional\n            Model name or index. If None, all models are selected.\n        start : str or datetime, optional\n            Start time. If None, all times are selected.\n        end : str or datetime, optional\n            End time. If None, all times are selected.\n        time : str or datetime, optional\n            Time. If None, all times are selected.\n        area : list of float, optional\n            bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.\n\n        Returns\n        -------\n        Comparer\n            New Comparer with selected data.\n        \"\"\"\n        if (time is not None) and ((start is not None) or (end is not None)):\n            raise ValueError(\"Cannot use both time and start/end\")\n\n        d = self.data\n        raw_mod_data = self.raw_mod_data\n        if model is not None:\n            if isinstance(model, (str, int)):\n                models = [model]\n            else:\n                models = list(model)\n            mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]\n            dropped_models = [m for m in self.mod_names if m not in mod_names]\n            d = d.drop_vars(dropped_models)\n            raw_mod_data = {m: raw_mod_data[m] for m in mod_names}\n        if (start is not None) or (end is not None):\n            # TODO: can this be done without to_index? (simplify)\n            d = d.sel(time=d.time.to_index().to_frame().loc[start:end].index)  # type: ignore\n\n            # Note: if user asks for a specific time, we also filter raw\n            raw_mod_data = {\n                k: v.sel(time=slice(start, end)) for k, v in raw_mod_data.items()\n            }  # type: ignore\n        if time is not None:\n            d = d.sel(time=time)\n\n            # Note: if user asks for a specific time, we also filter raw\n            raw_mod_data = {k: v.sel(time=time) for k, v in raw_mod_data.items()}\n        if area is not None:\n            if _area_is_bbox(area):\n                x0, y0, x1, y1 = area\n                mask = (d.x > x0) & (d.x < x1) & (d.y > y0) & (d.y < y1)\n            elif _area_is_polygon(area):\n                polygon = np.array(area)\n                xy = np.column_stack((d.x, d.y))\n                mask = _inside_polygon(polygon, xy)\n            else:\n                raise ValueError(\"area supports bbox [x0,y0,x1,y1] and closed polygon\")\n            if self.gtype == \"point\":\n                # if False, return empty data\n                d = d if mask else d.isel(time=slice(None, 0))\n            else:\n                d = d.isel(time=mask)\n        return Comparer.from_matched_data(data=d, raw_mod_data=raw_mod_data)\n\n    def where(\n        self,\n        cond: Union[bool, np.ndarray, xr.DataArray],\n    ) -> \"Comparer\":\n        \"\"\"Return a new Comparer with values where cond is True\n\n        Parameters\n        ----------\n        cond : bool, np.ndarray, xr.DataArray\n            This selects the values to return.\n\n        Returns\n        -------\n        Comparer\n            New Comparer with values where cond is True and other otherwise.\n\n        Examples\n        --------\n        >>> c2 = c.where(c.data.Observation > 0)\n        \"\"\"\n        d = self.data.where(cond, other=np.nan)\n        d = d.dropna(dim=\"time\", how=\"all\")\n        return Comparer.from_matched_data(d, self.raw_mod_data)\n\n    def query(self, query: str) -> \"Comparer\":\n        \"\"\"Return a new Comparer with values where query cond is True\n\n        Parameters\n        ----------\n        query : str\n            Query string, see pandas.DataFrame.query\n\n        Returns\n        -------\n        Comparer\n            New Comparer with values where cond is True and other otherwise.\n\n        Examples\n        --------\n        >>> c2 = c.query(\"Observation > 0\")\n        \"\"\"\n        d = self.data.query({\"time\": query})\n        d = d.dropna(dim=\"time\", how=\"all\")\n        return Comparer.from_matched_data(d, self.raw_mod_data)\n\n    def _to_long_dataframe(\n        self, attrs_keys: Iterable[str] | None = None\n    ) -> pd.DataFrame:\n        \"\"\"Return a copy of the data as a long-format pandas DataFrame (for groupby operations)\"\"\"\n\n        data = self.data.drop_vars(\"z\", errors=\"ignore\")\n\n        # this step is necessary since we keep arbitrary derived data in the dataset, but not z\n        # i.e. using a hardcoded whitelist of variables to keep is less flexible\n        id_vars = [v for v in data.variables if v not in self.mod_names]\n\n        attrs = (\n            {key: data.attrs.get(key, False) for key in attrs_keys}\n            if attrs_keys\n            else {}\n        )\n\n        df = (\n            data.to_dataframe()\n            .reset_index()\n            .melt(\n                value_vars=self.mod_names,\n                var_name=\"model\",\n                value_name=\"mod_val\",\n                id_vars=id_vars,\n            )\n            .rename(columns={self._obs_str: \"obs_val\"})\n            .assign(observation=self.name)\n            .assign(**attrs)\n            .astype({\"model\": \"category\", \"observation\": \"category\"})\n        )\n\n        return df\n\n    def skill(\n        self,\n        by: str | Iterable[str] | None = None,\n        metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n        **kwargs: Any,\n    ) -> SkillTable:\n        \"\"\"Skill assessment of model(s)\n\n        Parameters\n        ----------\n        by : str or List[str], optional\n            group by, by default [\"model\"]\n\n            - by column name\n            - by temporal bin of the DateTimeIndex via the freq-argument\n            (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n            - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n            syntax 'dt:month'. The dt-argument is different from the freq-argument\n            in that it gives month-of-year rather than month-of-data.\n        metrics : list, optional\n            list of modelskill.metrics, by default modelskill.options.metrics.list\n\n        Returns\n        -------\n        SkillTable\n            skill assessment object\n\n        See also\n        --------\n        sel\n            a method for filtering/selecting data\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cc = ms.match(c2, mod)\n        >>> cc['c2'].skill().round(2)\n                       n  bias  rmse  urmse   mae    cc    si    r2\n        observation\n        c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n\n        >>> cc['c2'].skill(by='freq:D').round(2)\n                     n  bias  rmse  urmse   mae    cc    si    r2\n        2017-10-27  72 -0.19  0.31   0.25  0.26  0.48  0.12  0.98\n        2017-10-28   0   NaN   NaN    NaN   NaN   NaN   NaN   NaN\n        2017-10-29  41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n        \"\"\"\n        metrics = _parse_metric(metrics, directional=self.quantity.is_directional)\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        if kwargs != {}:\n            raise AttributeError(f\"Unknown keyword arguments: {kwargs}\")\n\n        cmp = self.sel(\n            model=model,\n            start=start,\n            end=end,\n            area=area,\n        )\n        if cmp.n_points == 0:\n            raise ValueError(\"No data selected for skill assessment\")\n\n        by = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=1)\n\n        df = cmp._to_long_dataframe()\n        res = _groupby_df(df, by=by, metrics=metrics)\n        res[\"x\"] = np.nan if self.gtype == \"track\" else cmp.x\n        res[\"y\"] = np.nan if self.gtype == \"track\" else cmp.y\n        res = self._add_as_col_if_not_in_index(df, skilldf=res)\n        return SkillTable(res)\n\n    def _add_as_col_if_not_in_index(\n        self, df: pd.DataFrame, skilldf: pd.DataFrame\n    ) -> pd.DataFrame:\n        \"\"\"Add a field to skilldf if unique in df\"\"\"\n        FIELDS = (\"observation\", \"model\")\n\n        for field in FIELDS:\n            if (field == \"model\") and (self.n_models <= 1):\n                continue\n            if field not in skilldf.index.names:\n                unames = df[field].unique()\n                if len(unames) == 1:\n                    skilldf.insert(loc=0, column=field, value=unames[0])\n        return skilldf\n\n    def score(\n        self,\n        metric: str | Callable = mtr.rmse,\n        **kwargs: Any,\n    ) -> Dict[str, float]:\n        \"\"\"Model skill score\n\n        Parameters\n        ----------\n        metric : list, optional\n            a single metric from modelskill.metrics, by default rmse\n\n        Returns\n        -------\n        dict[str, float]\n            skill score as a single number (for each model)\n\n        See also\n        --------\n        skill\n            a method for skill assessment returning a pd.DataFrame\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cmp = ms.match(c2, mod)\n        >>> cmp.score()\n        {'mod': 0.3517964910888918}\n\n        >>> cmp.score(metric=\"mape\")\n        {'mod': 11.567399646108198}\n        \"\"\"\n        metric = _parse_metric(metric)[0]\n        if not (callable(metric) or isinstance(metric, str)):\n            raise ValueError(\"metric must be a string or a function\")\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        sk = self.skill(\n            by=[\"model\", \"observation\"],\n            metrics=[metric],\n            model=model,  # deprecated\n            start=start,  # deprecated\n            end=end,  # deprecated\n            area=area,  # deprecated\n        )\n        df = sk.to_dataframe()\n\n        metric_name = metric if isinstance(metric, str) else metric.__name__\n        ser = df.reset_index().groupby(\"model\", observed=True)[metric_name].mean()\n        score = {str(k): float(v) for k, v in ser.items()}\n        return score\n\n    def gridded_skill(\n        self,\n        bins: int = 5,\n        binsize: float | None = None,\n        by: str | Iterable[str] | None = None,\n        metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n        n_min: int | None = None,\n        **kwargs: Any,\n    ):\n        \"\"\"Aggregated spatial skill assessment of model(s) on a regular spatial grid.\n\n        Parameters\n        ----------\n        bins: int, list of scalars, or IntervalIndex, or tuple of, optional\n            criteria to bin x and y by, argument bins to pd.cut(), default 5\n            define different bins for x and y a tuple\n            e.g.: bins = 5, bins = (5,[2,3,5])\n        binsize : float, optional\n            bin size for x and y dimension, overwrites bins\n            creates bins with reference to round(mean(x)), round(mean(y))\n        by : (str, List[str]), optional\n            group by column name or by temporal bin via the freq-argument\n            (using pandas pd.Grouper(freq)),\n            e.g.: 'freq:M' = monthly; 'freq:D' daily\n            by default [\"model\",\"observation\"]\n        metrics : list, optional\n            list of modelskill.metrics, by default modelskill.options.metrics.list\n        n_min : int, optional\n            minimum number of observations in a grid cell;\n            cells with fewer observations get a score of `np.nan`\n\n        Returns\n        -------\n        SkillGrid\n            skill assessment as a SkillGrid object\n\n        See also\n        --------\n        skill\n            a method for aggregated skill assessment\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cmp = ms.match(c2, mod)   # satellite altimeter vs. model\n        >>> cmp.gridded_skill(metrics='bias')\n        <xarray.Dataset>\n        Dimensions:      (x: 5, y: 5)\n        Coordinates:\n            observation   'alti'\n        * x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n        * y            (y) float64 50.6 51.66 52.7 53.75 54.8\n        Data variables:\n            n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n            bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n\n        >>> gs = cc.gridded_skill(binsize=0.5)\n        >>> gs.data.coords\n        Coordinates:\n            observation   'alti'\n        * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n        * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n        \"\"\"\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        cmp = self.sel(\n            model=model,\n            start=start,\n            end=end,\n            area=area,\n        )\n\n        metrics = _parse_metric(metrics)\n        if cmp.n_points == 0:\n            raise ValueError(\"No data to compare\")\n\n        df = cmp._to_long_dataframe()\n        df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)\n\n        agg_cols = _parse_groupby(by=by, n_mod=cmp.n_models, n_qnt=1)\n        if \"x\" not in agg_cols:\n            agg_cols.insert(0, \"x\")\n        if \"y\" not in agg_cols:\n            agg_cols.insert(0, \"y\")\n\n        df = df.drop(columns=[\"x\", \"y\"]).rename(columns=dict(xBin=\"x\", yBin=\"y\"))\n        res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)\n        ds = res.to_xarray().squeeze()\n\n        # change categorial index to coordinates\n        for dim in (\"x\", \"y\"):\n            ds[dim] = ds[dim].astype(float)\n\n        return SkillGrid(ds)\n\n    @property\n    def _residual(self) -> np.ndarray:\n        df = self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n        obs = df[self._obs_str].values\n        mod = df[self.mod_names].values\n        return mod - np.vstack(obs)\n\n    def remove_bias(\n        self, correct: Literal[\"Model\", \"Observation\"] = \"Model\"\n    ) -> Comparer:\n        cmp = self.copy()\n\n        bias = cmp._residual.mean(axis=0)\n        if correct == \"Model\":\n            for j in range(cmp.n_models):\n                mod_name = cmp.mod_names[j]\n                mod_ts = cmp.raw_mod_data[mod_name]\n                with xr.set_options(keep_attrs=True):  # type: ignore\n                    mod_ts.data[mod_name].values = mod_ts.values - bias[j]\n                    cmp.data[mod_name].values = cmp.data[mod_name].values - bias[j]\n        elif correct == \"Observation\":\n            # what if multiple models?\n            with xr.set_options(keep_attrs=True):  # type: ignore\n                cmp.data[cmp._obs_str].values = cmp.data[cmp._obs_str].values + bias\n        else:\n            raise ValueError(\n                f\"Unknown correct={correct}. Only know 'Model' and 'Observation'\"\n            )\n        return cmp\n\n    def to_dataframe(self) -> pd.DataFrame:\n        \"\"\"Convert matched data to pandas DataFrame\n\n        Include x, y coordinates only if gtype=track\n\n        Returns\n        -------\n        pd.DataFrame\n            data as a pandas DataFrame\n        \"\"\"\n        if self.gtype == str(GeometryType.POINT):\n            # we remove the scalar coordinate variables as they\n            # will otherwise be columns in the dataframe\n            return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n        elif self.gtype == str(GeometryType.TRACK):\n            df = self.data.drop_vars([\"z\"]).to_dataframe()\n            # make sure that x, y cols are first\n            cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n            return df[cols]\n        else:\n            raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n\n    def save(self, filename: Union[str, Path]) -> None:\n        \"\"\"Save to netcdf file\n\n        Parameters\n        ----------\n        filename : str or Path\n            filename\n        \"\"\"\n        ds = self.data\n\n        # add self.raw_mod_data to ds with prefix 'raw_' to avoid name conflicts\n        # an alternative strategy would be to use NetCDF groups\n        # https://docs.xarray.dev/en/stable/user-guide/io.html#groups\n\n        # There is no need to save raw data for track data, since it is identical to the matched data\n        if self.gtype == \"point\":\n            ds = self.data.copy()  # copy needed to avoid modifying self.data\n\n            for key, ts_mod in self.raw_mod_data.items():\n                ts_mod = ts_mod.copy()\n                #  rename time to unique name\n                ts_mod.data = ts_mod.data.rename({\"time\": \"_time_raw_\" + key})\n                # da = ds_mod.to_xarray()[key]\n                ds[\"_raw_\" + key] = ts_mod.data[key]\n\n        ds.to_netcdf(filename)\n\n    @staticmethod\n    def load(filename: Union[str, Path]) -> \"Comparer\":\n        \"\"\"Load from netcdf file\n\n        Parameters\n        ----------\n        filename : str or Path\n            filename\n\n        Returns\n        -------\n        Comparer\n        \"\"\"\n        with xr.open_dataset(filename) as ds:\n            data = ds.load()\n\n        if data.gtype == \"track\":\n            return Comparer(matched_data=data)\n\n        if data.gtype == \"point\":\n            raw_mod_data: Dict[str, TimeSeries] = {}\n\n            for var in data.data_vars:\n                var_name = str(var)\n                if var_name[:5] == \"_raw_\":\n                    new_key = var_name[5:]  # remove prefix '_raw_'\n                    ds = data[[var_name]].rename(\n                        {\"_time_raw_\" + new_key: \"time\", var_name: new_key}\n                    )\n                    ts = PointObservation(data=ds, name=new_key)\n                    # TODO: name of time?\n                    # ts.name = new_key\n                    # df = (\n                    #     data[var_name]\n                    #     .to_dataframe()\n                    #     .rename(\n                    #         columns={\"_time_raw_\" + new_key: \"time\", var_name: new_key}\n                    #     )\n                    # )\n                    raw_mod_data[new_key] = ts\n\n                    # data = data.drop(var_name).drop(\"_time_raw_\" + new_key)\n\n            # filter variables, only keep the ones with a 'time' dimension\n            data = data[[v for v in data.data_vars if \"time\" in data[v].dims]]\n\n            return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n\n        else:\n            raise NotImplementedError(f\"Unknown gtype: {data.gtype}\")\n\n    # =============== Deprecated methods ===============\n\n    def spatial_skill(\n        self,\n        bins=5,\n        binsize=None,\n        by=None,\n        metrics=None,\n        n_min=None,\n        **kwargs,\n    ):\n        # deprecated\n        warnings.warn(\n            \"spatial_skill is deprecated, use gridded_skill instead\", FutureWarning\n        )\n        return self.gridded_skill(\n            bins=bins,\n            binsize=binsize,\n            by=by,\n            metrics=metrics,\n            n_min=n_min,\n            **kwargs,\n        )\n\n    # TODO remove plotting methods in v1.1\n    def scatter(\n        self,\n        *,\n        bins=120,\n        quantiles=None,\n        fit_to_quantiles=False,\n        show_points=None,\n        show_hist=None,\n        show_density=None,\n        norm=None,\n        backend=\"matplotlib\",\n        figsize=(8, 8),\n        xlim=None,\n        ylim=None,\n        reg_method=\"ols\",\n        title=None,\n        xlabel=None,\n        ylabel=None,\n        skill_table=None,\n        **kwargs,\n    ):\n        warnings.warn(\n            \"This method is deprecated, use plot.scatter instead\", FutureWarning\n        )\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)\n\n        # self.plot.scatter(\n        self.sel(\n            model=model,\n            start=start,\n            end=end,\n            area=area,\n        ).plot.scatter(\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            norm=norm,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            **kwargs,\n        )\n\n    def taylor(\n        self,\n        normalize_std=False,\n        figsize=(7, 7),\n        marker=\"o\",\n        marker_size=6.0,\n        title=\"Taylor diagram\",\n        **kwargs,\n    ):\n        warnings.warn(\"taylor is deprecated, use plot.taylor instead\", FutureWarning)\n\n        self.plot.taylor(\n            normalize_std=normalize_std,\n            figsize=figsize,\n            marker=marker,\n            marker_size=marker_size,\n            title=title,\n            **kwargs,\n        )\n\n    def hist(\n        self, *, model=None, bins=100, title=None, density=True, alpha=0.5, **kwargs\n    ):\n        warnings.warn(\"hist is deprecated. Use plot.hist instead.\", FutureWarning)\n        return self.plot.hist(\n            model=model, bins=bins, title=title, density=density, alpha=alpha, **kwargs\n        )\n\n    def kde(self, ax=None, **kwargs) -> Axes:\n        warnings.warn(\"kde is deprecated. Use plot.kde instead.\", FutureWarning)\n\n        return self.plot.kde(ax=ax, **kwargs)\n\n    def plot_timeseries(\n        self, title=None, *, ylim=None, figsize=None, backend=\"matplotlib\", **kwargs\n    ):\n        warnings.warn(\n            \"plot_timeseries is deprecated. Use plot.timeseries instead.\", FutureWarning\n        )\n\n        return self.plot.timeseries(\n            title=title, ylim=ylim, figsize=figsize, backend=backend, **kwargs\n        )\n\n    def residual_hist(self, bins=100, title=None, color=None, **kwargs):\n        warnings.warn(\n            \"residual_hist is deprecated. Use plot.residual_hist instead.\",\n            FutureWarning,\n        )\n\n        return self.plot.residual_hist(bins=bins, title=title, color=color, **kwargs)\n
"},{"location":"api/comparer/#modelskill.Comparer.attrs","title":"attrs property writable","text":"
attrs\n

Attributes of the observation

"},{"location":"api/comparer/#modelskill.Comparer.aux_names","title":"aux_names property","text":"
aux_names\n

List of auxiliary data names

"},{"location":"api/comparer/#modelskill.Comparer.gtype","title":"gtype property","text":"
gtype\n

Geometry type

"},{"location":"api/comparer/#modelskill.Comparer.mod_names","title":"mod_names property","text":"
mod_names\n

List of model result names

"},{"location":"api/comparer/#modelskill.Comparer.n_models","title":"n_models property","text":"
n_models\n

Number of model results

"},{"location":"api/comparer/#modelskill.Comparer.n_points","title":"n_points property","text":"
n_points\n

number of compared points

"},{"location":"api/comparer/#modelskill.Comparer.name","title":"name property writable","text":"
name\n

Name of comparer (=name of observation)

"},{"location":"api/comparer/#modelskill.Comparer.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerPlotter

Examples:

>>> cmp.plot.timeseries()\n>>> cmp.plot.scatter()\n>>> cmp.plot.qq()\n>>> cmp.plot.hist()\n>>> cmp.plot.kde()\n>>> cmp.plot.box()\n>>> cmp.plot.residual_hist()\n>>> cmp.plot.taylor()\n
"},{"location":"api/comparer/#modelskill.Comparer.quantity","title":"quantity property writable","text":"
quantity\n

Quantity object

"},{"location":"api/comparer/#modelskill.Comparer.time","title":"time property","text":"
time\n

time of compared data as pandas DatetimeIndex

"},{"location":"api/comparer/#modelskill.Comparer.weight","title":"weight property writable","text":"
weight\n

Weight of observation (used in ComparerCollection score() and mean_skill())

"},{"location":"api/comparer/#modelskill.Comparer.x","title":"x property","text":"
x\n

x-coordinate

"},{"location":"api/comparer/#modelskill.Comparer.y","title":"y property","text":"
y\n

y-coordinate

"},{"location":"api/comparer/#modelskill.Comparer.z","title":"z property","text":"
z\n

z-coordinate

"},{"location":"api/comparer/#modelskill.Comparer.from_matched_data","title":"from_matched_data staticmethod","text":"
from_matched_data(data, raw_mod_data=None, obs_item=None, mod_items=None, aux_items=None, name=None, weight=1.0, x=None, y=None, z=None, x_item=None, y_item=None, quantity=None)\n

Initialize from compared data

Source code in modelskill/comparison/_comparison.py
@staticmethod\ndef from_matched_data(\n    data: xr.Dataset | pd.DataFrame,\n    raw_mod_data: Optional[Dict[str, TimeSeries]] = None,\n    obs_item: str | int | None = None,\n    mod_items: Optional[Iterable[str | int]] = None,\n    aux_items: Optional[Iterable[str | int]] = None,\n    name: Optional[str] = None,\n    weight: float = 1.0,\n    x: Optional[float] = None,\n    y: Optional[float] = None,\n    z: Optional[float] = None,\n    x_item: str | int | None = None,\n    y_item: str | int | None = None,\n    quantity: Optional[Quantity] = None,\n) -> \"Comparer\":\n    \"\"\"Initialize from compared data\"\"\"\n    if not isinstance(data, xr.Dataset):\n        # TODO: handle raw_mod_data by accessing data.attrs[\"kind\"] and only remove nan after\n        data = _matched_data_to_xarray(\n            data,\n            obs_item=obs_item,\n            mod_items=mod_items,\n            aux_items=aux_items,\n            name=name,\n            x=x,\n            y=y,\n            z=z,\n            x_item=x_item,\n            y_item=y_item,\n            quantity=quantity,\n        )\n        data.attrs[\"weight\"] = weight\n    return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n
"},{"location":"api/comparer/#modelskill.Comparer.gridded_skill","title":"gridded_skill","text":"
gridded_skill(bins=5, binsize=None, by=None, metrics=None, n_min=None, **kwargs)\n

Aggregated spatial skill assessment of model(s) on a regular spatial grid.

Parameters:

Name Type Description Default bins int

criteria to bin x and y by, argument bins to pd.cut(), default 5 define different bins for x and y a tuple e.g.: bins = 5, bins = (5,[2,3,5])

5 binsize float

bin size for x and y dimension, overwrites bins creates bins with reference to round(mean(x)), round(mean(y))

None by (str, List[str])

group by column name or by temporal bin via the freq-argument (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily by default [\"model\",\"observation\"]

None metrics list

list of modelskill.metrics, by default modelskill.options.metrics.list

None n_min int

minimum number of observations in a grid cell; cells with fewer observations get a score of np.nan

None

Returns:

Type Description SkillGrid

skill assessment as a SkillGrid object

See also

skill a method for aggregated skill assessment

Examples:

>>> import modelskill as ms\n>>> cmp = ms.match(c2, mod)   # satellite altimeter vs. model\n>>> cmp.gridded_skill(metrics='bias')\n<xarray.Dataset>\nDimensions:      (x: 5, y: 5)\nCoordinates:\n    observation   'alti'\n* x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n* y            (y) float64 50.6 51.66 52.7 53.75 54.8\nData variables:\n    n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n    bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n
>>> gs = cc.gridded_skill(binsize=0.5)\n>>> gs.data.coords\nCoordinates:\n    observation   'alti'\n* x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n* y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n
Source code in modelskill/comparison/_comparison.py
def gridded_skill(\n    self,\n    bins: int = 5,\n    binsize: float | None = None,\n    by: str | Iterable[str] | None = None,\n    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n    n_min: int | None = None,\n    **kwargs: Any,\n):\n    \"\"\"Aggregated spatial skill assessment of model(s) on a regular spatial grid.\n\n    Parameters\n    ----------\n    bins: int, list of scalars, or IntervalIndex, or tuple of, optional\n        criteria to bin x and y by, argument bins to pd.cut(), default 5\n        define different bins for x and y a tuple\n        e.g.: bins = 5, bins = (5,[2,3,5])\n    binsize : float, optional\n        bin size for x and y dimension, overwrites bins\n        creates bins with reference to round(mean(x)), round(mean(y))\n    by : (str, List[str]), optional\n        group by column name or by temporal bin via the freq-argument\n        (using pandas pd.Grouper(freq)),\n        e.g.: 'freq:M' = monthly; 'freq:D' daily\n        by default [\"model\",\"observation\"]\n    metrics : list, optional\n        list of modelskill.metrics, by default modelskill.options.metrics.list\n    n_min : int, optional\n        minimum number of observations in a grid cell;\n        cells with fewer observations get a score of `np.nan`\n\n    Returns\n    -------\n    SkillGrid\n        skill assessment as a SkillGrid object\n\n    See also\n    --------\n    skill\n        a method for aggregated skill assessment\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cmp = ms.match(c2, mod)   # satellite altimeter vs. model\n    >>> cmp.gridded_skill(metrics='bias')\n    <xarray.Dataset>\n    Dimensions:      (x: 5, y: 5)\n    Coordinates:\n        observation   'alti'\n    * x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n    * y            (y) float64 50.6 51.66 52.7 53.75 54.8\n    Data variables:\n        n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n        bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n\n    >>> gs = cc.gridded_skill(binsize=0.5)\n    >>> gs.data.coords\n    Coordinates:\n        observation   'alti'\n    * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n    * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n    \"\"\"\n\n    # TODO remove in v1.1\n    model, start, end, area = _get_deprecated_args(kwargs)\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    cmp = self.sel(\n        model=model,\n        start=start,\n        end=end,\n        area=area,\n    )\n\n    metrics = _parse_metric(metrics)\n    if cmp.n_points == 0:\n        raise ValueError(\"No data to compare\")\n\n    df = cmp._to_long_dataframe()\n    df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)\n\n    agg_cols = _parse_groupby(by=by, n_mod=cmp.n_models, n_qnt=1)\n    if \"x\" not in agg_cols:\n        agg_cols.insert(0, \"x\")\n    if \"y\" not in agg_cols:\n        agg_cols.insert(0, \"y\")\n\n    df = df.drop(columns=[\"x\", \"y\"]).rename(columns=dict(xBin=\"x\", yBin=\"y\"))\n    res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)\n    ds = res.to_xarray().squeeze()\n\n    # change categorial index to coordinates\n    for dim in (\"x\", \"y\"):\n        ds[dim] = ds[dim].astype(float)\n\n    return SkillGrid(ds)\n
"},{"location":"api/comparer/#modelskill.Comparer.load","title":"load staticmethod","text":"
load(filename)\n

Load from netcdf file

Parameters:

Name Type Description Default filename str or Path

filename

required

Returns:

Type Description Comparer Source code in modelskill/comparison/_comparison.py
@staticmethod\ndef load(filename: Union[str, Path]) -> \"Comparer\":\n    \"\"\"Load from netcdf file\n\n    Parameters\n    ----------\n    filename : str or Path\n        filename\n\n    Returns\n    -------\n    Comparer\n    \"\"\"\n    with xr.open_dataset(filename) as ds:\n        data = ds.load()\n\n    if data.gtype == \"track\":\n        return Comparer(matched_data=data)\n\n    if data.gtype == \"point\":\n        raw_mod_data: Dict[str, TimeSeries] = {}\n\n        for var in data.data_vars:\n            var_name = str(var)\n            if var_name[:5] == \"_raw_\":\n                new_key = var_name[5:]  # remove prefix '_raw_'\n                ds = data[[var_name]].rename(\n                    {\"_time_raw_\" + new_key: \"time\", var_name: new_key}\n                )\n                ts = PointObservation(data=ds, name=new_key)\n                # TODO: name of time?\n                # ts.name = new_key\n                # df = (\n                #     data[var_name]\n                #     .to_dataframe()\n                #     .rename(\n                #         columns={\"_time_raw_\" + new_key: \"time\", var_name: new_key}\n                #     )\n                # )\n                raw_mod_data[new_key] = ts\n\n                # data = data.drop(var_name).drop(\"_time_raw_\" + new_key)\n\n        # filter variables, only keep the ones with a 'time' dimension\n        data = data[[v for v in data.data_vars if \"time\" in data[v].dims]]\n\n        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {data.gtype}\")\n
"},{"location":"api/comparer/#modelskill.Comparer.query","title":"query","text":"
query(query)\n

Return a new Comparer with values where query cond is True

Parameters:

Name Type Description Default query str

Query string, see pandas.DataFrame.query

required

Returns:

Type Description Comparer

New Comparer with values where cond is True and other otherwise.

Examples:

>>> c2 = c.query(\"Observation > 0\")\n
Source code in modelskill/comparison/_comparison.py
def query(self, query: str) -> \"Comparer\":\n    \"\"\"Return a new Comparer with values where query cond is True\n\n    Parameters\n    ----------\n    query : str\n        Query string, see pandas.DataFrame.query\n\n    Returns\n    -------\n    Comparer\n        New Comparer with values where cond is True and other otherwise.\n\n    Examples\n    --------\n    >>> c2 = c.query(\"Observation > 0\")\n    \"\"\"\n    d = self.data.query({\"time\": query})\n    d = d.dropna(dim=\"time\", how=\"all\")\n    return Comparer.from_matched_data(d, self.raw_mod_data)\n
"},{"location":"api/comparer/#modelskill.Comparer.rename","title":"rename","text":"
rename(mapping, errors='raise')\n

Rename observation, model or auxiliary data variables

Parameters:

Name Type Description Default mapping dict

mapping of old names to new names

required errors ('raise', 'ignore')

If 'raise', raise a KeyError if any of the old names do not exist in the data. By default 'raise'.

'raise'

Returns:

Type Description Comparer

Examples:

>>> cmp = ms.match(observation, modeldata)\n>>> cmp.mod_names\n['model1']\n>>> cmp2 = cmp.rename({'model1': 'model2'})\n>>> cmp2.mod_names\n['model2']\n
Source code in modelskill/comparison/_comparison.py
def rename(\n    self, mapping: Mapping[str, str], errors: Literal[\"raise\", \"ignore\"] = \"raise\"\n) -> \"Comparer\":\n    \"\"\"Rename observation, model or auxiliary data variables\n\n    Parameters\n    ----------\n    mapping : dict\n        mapping of old names to new names\n    errors : {'raise', 'ignore'}, optional\n        If 'raise', raise a KeyError if any of the old names\n        do not exist in the data. By default 'raise'.\n\n    Returns\n    -------\n    Comparer\n\n    Examples\n    --------\n    >>> cmp = ms.match(observation, modeldata)\n    >>> cmp.mod_names\n    ['model1']\n    >>> cmp2 = cmp.rename({'model1': 'model2'})\n    >>> cmp2.mod_names\n    ['model2']\n    \"\"\"\n    if errors not in [\"raise\", \"ignore\"]:\n        raise ValueError(\"errors must be 'raise' or 'ignore'\")\n\n    allowed_keys = [self.name] + self.mod_names + self.aux_names\n    if errors == \"raise\":\n        for k in mapping.keys():\n            if k not in allowed_keys:\n                raise KeyError(f\"Unknown key: {k}; must be one of {allowed_keys}\")\n    else:\n        # \"ignore\": silently remove keys that are not in allowed_keys\n        mapping = {k: v for k, v in mapping.items() if k in allowed_keys}\n\n    if any([k in _RESERVED_NAMES for k in mapping.values()]):\n        # TODO: also check for duplicates\n        raise ValueError(\n            f\"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!\"\n        )\n\n    # rename observation\n    obs_name = mapping.get(self.name, self.name)\n    ma_mapping = {k: v for k, v in mapping.items() if k != self.name}\n\n    data = self.data.rename(ma_mapping)\n    data.attrs[\"name\"] = obs_name\n    raw_mod_data = dict()\n    for k, v in self.raw_mod_data.items():\n        if k in ma_mapping:\n            # copy is needed here as the same raw data could be\n            # used for multiple Comparers!\n            v2 = v.copy()\n            v2.data = v2.data.rename({k: ma_mapping[k]})\n            raw_mod_data[ma_mapping[k]] = v2\n        else:\n            raw_mod_data[k] = v\n\n    return Comparer(matched_data=data, raw_mod_data=raw_mod_data)\n
"},{"location":"api/comparer/#modelskill.Comparer.save","title":"save","text":"
save(filename)\n

Save to netcdf file

Parameters:

Name Type Description Default filename str or Path

filename

required Source code in modelskill/comparison/_comparison.py
def save(self, filename: Union[str, Path]) -> None:\n    \"\"\"Save to netcdf file\n\n    Parameters\n    ----------\n    filename : str or Path\n        filename\n    \"\"\"\n    ds = self.data\n\n    # add self.raw_mod_data to ds with prefix 'raw_' to avoid name conflicts\n    # an alternative strategy would be to use NetCDF groups\n    # https://docs.xarray.dev/en/stable/user-guide/io.html#groups\n\n    # There is no need to save raw data for track data, since it is identical to the matched data\n    if self.gtype == \"point\":\n        ds = self.data.copy()  # copy needed to avoid modifying self.data\n\n        for key, ts_mod in self.raw_mod_data.items():\n            ts_mod = ts_mod.copy()\n            #  rename time to unique name\n            ts_mod.data = ts_mod.data.rename({\"time\": \"_time_raw_\" + key})\n            # da = ds_mod.to_xarray()[key]\n            ds[\"_raw_\" + key] = ts_mod.data[key]\n\n    ds.to_netcdf(filename)\n
"},{"location":"api/comparer/#modelskill.Comparer.score","title":"score","text":"
score(metric=mtr.rmse, **kwargs)\n

Model skill score

Parameters:

Name Type Description Default metric list

a single metric from modelskill.metrics, by default rmse

rmse

Returns:

Type Description dict[str, float]

skill score as a single number (for each model)

See also

skill a method for skill assessment returning a pd.DataFrame

Examples:

>>> import modelskill as ms\n>>> cmp = ms.match(c2, mod)\n>>> cmp.score()\n{'mod': 0.3517964910888918}\n
>>> cmp.score(metric=\"mape\")\n{'mod': 11.567399646108198}\n
Source code in modelskill/comparison/_comparison.py
def score(\n    self,\n    metric: str | Callable = mtr.rmse,\n    **kwargs: Any,\n) -> Dict[str, float]:\n    \"\"\"Model skill score\n\n    Parameters\n    ----------\n    metric : list, optional\n        a single metric from modelskill.metrics, by default rmse\n\n    Returns\n    -------\n    dict[str, float]\n        skill score as a single number (for each model)\n\n    See also\n    --------\n    skill\n        a method for skill assessment returning a pd.DataFrame\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cmp = ms.match(c2, mod)\n    >>> cmp.score()\n    {'mod': 0.3517964910888918}\n\n    >>> cmp.score(metric=\"mape\")\n    {'mod': 11.567399646108198}\n    \"\"\"\n    metric = _parse_metric(metric)[0]\n    if not (callable(metric) or isinstance(metric, str)):\n        raise ValueError(\"metric must be a string or a function\")\n\n    # TODO remove in v1.1\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    sk = self.skill(\n        by=[\"model\", \"observation\"],\n        metrics=[metric],\n        model=model,  # deprecated\n        start=start,  # deprecated\n        end=end,  # deprecated\n        area=area,  # deprecated\n    )\n    df = sk.to_dataframe()\n\n    metric_name = metric if isinstance(metric, str) else metric.__name__\n    ser = df.reset_index().groupby(\"model\", observed=True)[metric_name].mean()\n    score = {str(k): float(v) for k, v in ser.items()}\n    return score\n
"},{"location":"api/comparer/#modelskill.Comparer.sel","title":"sel","text":"
sel(model=None, start=None, end=None, time=None, area=None)\n

Select data based on model, time and/or area.

Parameters:

Name Type Description Default model str or int or list of str or list of int

Model name or index. If None, all models are selected.

None start str or datetime

Start time. If None, all times are selected.

None end str or datetime

End time. If None, all times are selected.

None time str or datetime

Time. If None, all times are selected.

None area list of float

bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.

None

Returns:

Type Description Comparer

New Comparer with selected data.

Source code in modelskill/comparison/_comparison.py
def sel(\n    self,\n    model: Optional[IdxOrNameTypes] = None,\n    start: Optional[TimeTypes] = None,\n    end: Optional[TimeTypes] = None,\n    time: Optional[TimeTypes] = None,\n    area: Optional[List[float]] = None,\n) -> \"Comparer\":\n    \"\"\"Select data based on model, time and/or area.\n\n    Parameters\n    ----------\n    model : str or int or list of str or list of int, optional\n        Model name or index. If None, all models are selected.\n    start : str or datetime, optional\n        Start time. If None, all times are selected.\n    end : str or datetime, optional\n        End time. If None, all times are selected.\n    time : str or datetime, optional\n        Time. If None, all times are selected.\n    area : list of float, optional\n        bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.\n\n    Returns\n    -------\n    Comparer\n        New Comparer with selected data.\n    \"\"\"\n    if (time is not None) and ((start is not None) or (end is not None)):\n        raise ValueError(\"Cannot use both time and start/end\")\n\n    d = self.data\n    raw_mod_data = self.raw_mod_data\n    if model is not None:\n        if isinstance(model, (str, int)):\n            models = [model]\n        else:\n            models = list(model)\n        mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]\n        dropped_models = [m for m in self.mod_names if m not in mod_names]\n        d = d.drop_vars(dropped_models)\n        raw_mod_data = {m: raw_mod_data[m] for m in mod_names}\n    if (start is not None) or (end is not None):\n        # TODO: can this be done without to_index? (simplify)\n        d = d.sel(time=d.time.to_index().to_frame().loc[start:end].index)  # type: ignore\n\n        # Note: if user asks for a specific time, we also filter raw\n        raw_mod_data = {\n            k: v.sel(time=slice(start, end)) for k, v in raw_mod_data.items()\n        }  # type: ignore\n    if time is not None:\n        d = d.sel(time=time)\n\n        # Note: if user asks for a specific time, we also filter raw\n        raw_mod_data = {k: v.sel(time=time) for k, v in raw_mod_data.items()}\n    if area is not None:\n        if _area_is_bbox(area):\n            x0, y0, x1, y1 = area\n            mask = (d.x > x0) & (d.x < x1) & (d.y > y0) & (d.y < y1)\n        elif _area_is_polygon(area):\n            polygon = np.array(area)\n            xy = np.column_stack((d.x, d.y))\n            mask = _inside_polygon(polygon, xy)\n        else:\n            raise ValueError(\"area supports bbox [x0,y0,x1,y1] and closed polygon\")\n        if self.gtype == \"point\":\n            # if False, return empty data\n            d = d if mask else d.isel(time=slice(None, 0))\n        else:\n            d = d.isel(time=mask)\n    return Comparer.from_matched_data(data=d, raw_mod_data=raw_mod_data)\n
"},{"location":"api/comparer/#modelskill.Comparer.skill","title":"skill","text":"
skill(by=None, metrics=None, **kwargs)\n

Skill assessment of model(s)

Parameters:

Name Type Description Default by str or List[str]

group by, by default [\"model\"]

  • by column name
  • by temporal bin of the DateTimeIndex via the freq-argument (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily
  • by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the syntax 'dt:month'. The dt-argument is different from the freq-argument in that it gives month-of-year rather than month-of-data.
None metrics list

list of modelskill.metrics, by default modelskill.options.metrics.list

None

Returns:

Type Description SkillTable

skill assessment object

See also

sel a method for filtering/selecting data

Examples:

>>> import modelskill as ms\n>>> cc = ms.match(c2, mod)\n>>> cc['c2'].skill().round(2)\n               n  bias  rmse  urmse   mae    cc    si    r2\nobservation\nc2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n
>>> cc['c2'].skill(by='freq:D').round(2)\n             n  bias  rmse  urmse   mae    cc    si    r2\n2017-10-27  72 -0.19  0.31   0.25  0.26  0.48  0.12  0.98\n2017-10-28   0   NaN   NaN    NaN   NaN   NaN   NaN   NaN\n2017-10-29  41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n
Source code in modelskill/comparison/_comparison.py
def skill(\n    self,\n    by: str | Iterable[str] | None = None,\n    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n    **kwargs: Any,\n) -> SkillTable:\n    \"\"\"Skill assessment of model(s)\n\n    Parameters\n    ----------\n    by : str or List[str], optional\n        group by, by default [\"model\"]\n\n        - by column name\n        - by temporal bin of the DateTimeIndex via the freq-argument\n        (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n        - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n        syntax 'dt:month'. The dt-argument is different from the freq-argument\n        in that it gives month-of-year rather than month-of-data.\n    metrics : list, optional\n        list of modelskill.metrics, by default modelskill.options.metrics.list\n\n    Returns\n    -------\n    SkillTable\n        skill assessment object\n\n    See also\n    --------\n    sel\n        a method for filtering/selecting data\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.match(c2, mod)\n    >>> cc['c2'].skill().round(2)\n                   n  bias  rmse  urmse   mae    cc    si    r2\n    observation\n    c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n\n    >>> cc['c2'].skill(by='freq:D').round(2)\n                 n  bias  rmse  urmse   mae    cc    si    r2\n    2017-10-27  72 -0.19  0.31   0.25  0.26  0.48  0.12  0.98\n    2017-10-28   0   NaN   NaN    NaN   NaN   NaN   NaN   NaN\n    2017-10-29  41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n    \"\"\"\n    metrics = _parse_metric(metrics, directional=self.quantity.is_directional)\n\n    # TODO remove in v1.1\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    if kwargs != {}:\n        raise AttributeError(f\"Unknown keyword arguments: {kwargs}\")\n\n    cmp = self.sel(\n        model=model,\n        start=start,\n        end=end,\n        area=area,\n    )\n    if cmp.n_points == 0:\n        raise ValueError(\"No data selected for skill assessment\")\n\n    by = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=1)\n\n    df = cmp._to_long_dataframe()\n    res = _groupby_df(df, by=by, metrics=metrics)\n    res[\"x\"] = np.nan if self.gtype == \"track\" else cmp.x\n    res[\"y\"] = np.nan if self.gtype == \"track\" else cmp.y\n    res = self._add_as_col_if_not_in_index(df, skilldf=res)\n    return SkillTable(res)\n
"},{"location":"api/comparer/#modelskill.Comparer.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/comparison/_comparison.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert matched data to pandas DataFrame\n\n    Include x, y coordinates only if gtype=track\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    if self.gtype == str(GeometryType.POINT):\n        # we remove the scalar coordinate variables as they\n        # will otherwise be columns in the dataframe\n        return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n    elif self.gtype == str(GeometryType.TRACK):\n        df = self.data.drop_vars([\"z\"]).to_dataframe()\n        # make sure that x, y cols are first\n        cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n        return df[cols]\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n
"},{"location":"api/comparer/#modelskill.Comparer.where","title":"where","text":"
where(cond)\n

Return a new Comparer with values where cond is True

Parameters:

Name Type Description Default cond (bool, ndarray, DataArray)

This selects the values to return.

required

Returns:

Type Description Comparer

New Comparer with values where cond is True and other otherwise.

Examples:

>>> c2 = c.where(c.data.Observation > 0)\n
Source code in modelskill/comparison/_comparison.py
def where(\n    self,\n    cond: Union[bool, np.ndarray, xr.DataArray],\n) -> \"Comparer\":\n    \"\"\"Return a new Comparer with values where cond is True\n\n    Parameters\n    ----------\n    cond : bool, np.ndarray, xr.DataArray\n        This selects the values to return.\n\n    Returns\n    -------\n    Comparer\n        New Comparer with values where cond is True and other otherwise.\n\n    Examples\n    --------\n    >>> c2 = c.where(c.data.Observation > 0)\n    \"\"\"\n    d = self.data.where(cond, other=np.nan)\n    d = d.dropna(dim=\"time\", how=\"all\")\n    return Comparer.from_matched_data(d, self.raw_mod_data)\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter","title":"modelskill.comparison._comparer_plotter.ComparerPlotter","text":"

Plotter class for Comparer

Examples:

>>> cmp.plot.scatter()\n>>> cmp.plot.timeseries()\n>>> cmp.plot.hist()\n>>> cmp.plot.kde()\n>>> cmp.plot.qq()\n>>> cmp.plot.box()\n
Source code in modelskill/comparison/_comparer_plotter.py
class ComparerPlotter:\n    \"\"\"Plotter class for Comparer\n\n    Examples\n    --------\n    >>> cmp.plot.scatter()\n    >>> cmp.plot.timeseries()\n    >>> cmp.plot.hist()\n    >>> cmp.plot.kde()\n    >>> cmp.plot.qq()\n    >>> cmp.plot.box()\n    \"\"\"\n\n    def __init__(self, comparer: Comparer) -> None:\n        self.comparer = comparer\n        self.is_directional = comparer.quantity.is_directional\n\n    def __call__(\n        self, *args, **kwargs\n    ) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n        \"\"\"Plot scatter plot of modelled vs observed data\"\"\"\n        return self.scatter(*args, **kwargs)\n\n    def timeseries(\n        self,\n        *,\n        title: str | None = None,\n        ylim: Tuple[float, float] | None = None,\n        ax=None,\n        figsize: Tuple[float, float] | None = None,\n        backend: str = \"matplotlib\",\n        **kwargs,\n    ):\n        \"\"\"Timeseries plot showing compared data: observation vs modelled\n\n        Parameters\n        ----------\n        title : str, optional\n            plot title, by default None\n        ylim : (float, float), optional\n            plot range for the model (ymin, ymax), by default None\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        figsize : (float, float), optional\n            figure size, by default None\n        backend : str, optional\n            use \"plotly\" (interactive) or \"matplotlib\" backend,\n            by default \"matplotlib\"\n        **kwargs\n            other keyword arguments to fig.update_layout (plotly backend)\n\n        Returns\n        -------\n        matplotlib.axes.Axes or plotly.graph_objects.Figure\n        \"\"\"\n        from ._comparison import MOD_COLORS\n\n        cmp = self.comparer\n\n        if title is None:\n            title = cmp.name\n\n        if backend == \"matplotlib\":\n            fig, ax = _get_fig_ax(ax, figsize)\n            for j in range(cmp.n_models):\n                key = cmp.mod_names[j]\n                mod = cmp.raw_mod_data[key]._values_as_series\n                mod.plot(ax=ax, color=MOD_COLORS[j])\n\n            ax.scatter(\n                cmp.time,\n                cmp.data[cmp._obs_name].values,\n                marker=\".\",\n                color=cmp.data[cmp._obs_name].attrs[\"color\"],\n            )\n            ax.set_ylabel(cmp._unit_text)\n            ax.legend([*cmp.mod_names, cmp._obs_name])\n            ax.set_ylim(ylim)\n            if self.is_directional:\n                _ytick_directional(ax, ylim)\n            ax.set_title(title)\n            return ax\n\n        elif backend == \"plotly\":  # pragma: no cover\n            import plotly.graph_objects as go  # type: ignore\n\n            mod_scatter_list = []\n            for j in range(cmp.n_models):\n                key = cmp.mod_names[j]\n                mod = cmp.raw_mod_data[key]._values_as_series\n                mod_scatter_list.append(\n                    go.Scatter(\n                        x=mod.index,\n                        y=mod.values,\n                        name=key,\n                        line=dict(color=MOD_COLORS[j]),\n                    )\n                )\n\n            fig = go.Figure(\n                [\n                    *mod_scatter_list,\n                    go.Scatter(\n                        x=cmp.time,\n                        y=cmp.data[cmp._obs_name].values,\n                        name=cmp._obs_name,\n                        mode=\"markers\",\n                        marker=dict(color=cmp.data[cmp._obs_name].attrs[\"color\"]),\n                    ),\n                ]\n            )\n\n            fig.update_layout(title=title, yaxis_title=cmp._unit_text, **kwargs)\n            fig.update_yaxes(range=ylim)\n\n            return fig\n        else:\n            raise ValueError(f\"Plotting backend: {backend} not supported\")\n\n    def hist(\n        self,\n        bins: int | Sequence = 100,\n        *,\n        model: str | int | None = None,\n        title: str | None = None,\n        ax=None,\n        figsize: Tuple[float, float] | None = None,\n        density: bool = True,\n        alpha: float = 0.5,\n        **kwargs,\n    ):\n        \"\"\"Plot histogram of model data and observations.\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            number of bins, by default 100\n        title : str, optional\n            plot title, default: [model name] vs [observation name]\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        figsize : tuple, optional\n            figure size, by default None\n        density: bool, optional\n            If True, draw and return a probability density\n        alpha : float, optional\n            alpha transparency fraction, by default 0.5\n        **kwargs\n            other keyword arguments to df.plot.hist()\n\n        Returns\n        -------\n        matplotlib axes\n\n        See also\n        --------\n        pandas.Series.plot.hist\n        matplotlib.axes.Axes.hist\n        \"\"\"\n        cmp = self.comparer\n\n        if model is None:\n            mod_names = cmp.mod_names\n        else:\n            warnings.warn(\n                \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.hist()\",\n                FutureWarning,\n            )\n            model_list = [model] if isinstance(model, (str, int)) else model\n            mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]\n\n        axes = []\n        for mod_name in mod_names:\n            ax_mod = self._hist_one_model(\n                mod_name=mod_name,\n                bins=bins,\n                title=title,\n                ax=ax,\n                figsize=figsize,\n                density=density,\n                alpha=alpha,\n                **kwargs,\n            )\n            axes.append(ax_mod)\n\n        return axes[0] if len(axes) == 1 else axes\n\n    def _hist_one_model(\n        self,\n        *,\n        mod_name: str,\n        bins: int | Sequence | None,\n        title: str | None,\n        ax,\n        figsize: Tuple[float, float] | None,\n        density: bool | None,\n        alpha: float | None,\n        **kwargs,\n    ):\n        from ._comparison import MOD_COLORS  # TODO move to here\n\n        cmp = self.comparer\n        assert mod_name in cmp.mod_names, f\"Model {mod_name} not found in comparer\"\n        mod_idx = _get_idx(mod_name, cmp.mod_names)\n\n        title = f\"{mod_name} vs {cmp.name}\" if title is None else title\n\n        _, ax = _get_fig_ax(ax, figsize)\n\n        kwargs[\"alpha\"] = alpha\n        kwargs[\"density\"] = density\n        kwargs[\"ax\"] = ax\n\n        ax = (\n            cmp.data[mod_name]\n            .to_series()\n            .hist(bins=bins, color=MOD_COLORS[mod_idx], **kwargs)\n        )\n\n        cmp.data[cmp._obs_name].to_series().hist(\n            bins=bins, color=cmp.data[cmp._obs_name].attrs[\"color\"], **kwargs\n        )\n        ax.legend([mod_name, cmp._obs_name])\n        ax.set_title(title)\n        ax.set_xlabel(f\"{cmp._unit_text}\")\n        if density:\n            ax.set_ylabel(\"density\")\n        else:\n            ax.set_ylabel(\"count\")\n\n        if self.is_directional:\n            _xtick_directional(ax)\n\n        return ax\n\n    def kde(self, ax=None, title=None, figsize=None, **kwargs) -> matplotlib.axes.Axes:\n        \"\"\"Plot kde (kernel density estimates of distributions) of model data and observations.\n\n        Wraps pandas.DataFrame kde() method.\n\n        Parameters\n        ----------\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        title : str, optional\n            plot title, default: \"KDE plot for [observation name]\"\n        figsize : tuple, optional\n            figure size, by default None\n        **kwargs\n            other keyword arguments to df.plot.kde()\n\n        Returns\n        -------\n        matplotlib.axes.Axes\n\n        Examples\n        --------\n        >>> cmp.plot.kde()\n        >>> cmp.plot.kde(bw_method=0.3)\n        >>> cmp.plot.kde(ax=ax, bw_method='silverman')\n        >>> cmp.plot.kde(xlim=[0,None], title=\"Density plot\");\n\n        See also\n        --------\n        pandas.Series.plot.kde\n        \"\"\"\n        cmp = self.comparer\n\n        _, ax = _get_fig_ax(ax, figsize)\n\n        cmp.data.Observation.to_series().plot.kde(\n            ax=ax, linestyle=\"dashed\", label=\"Observation\", **kwargs\n        )\n\n        for model in cmp.mod_names:\n            cmp.data[model].to_series().plot.kde(ax=ax, label=model, **kwargs)\n\n        ax.set_xlabel(cmp._unit_text)  # TODO\n\n        ax.legend()\n\n        # remove y-axis, ticks and label\n        ax.yaxis.set_visible(False)\n        ax.tick_params(axis=\"y\", which=\"both\", length=0)\n        ax.set_ylabel(\"\")\n        title = f\"KDE plot for {cmp.name}\" if title is None else title\n        ax.set_title(title)\n\n        # remove box around plot\n        ax.spines[\"top\"].set_visible(False)\n        ax.spines[\"right\"].set_visible(False)\n        ax.spines[\"left\"].set_visible(False)\n\n        if self.is_directional:\n            _xtick_directional(ax)\n\n        return ax\n\n    def qq(\n        self,\n        quantiles: int | Sequence[float] | None = None,\n        *,\n        title=None,\n        ax=None,\n        figsize=None,\n        **kwargs,\n    ):\n        \"\"\"Make quantile-quantile (q-q) plot of model data and observations.\n\n        Primarily used to compare multiple models.\n\n        Parameters\n        ----------\n        quantiles: (int, sequence), optional\n            number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000)\n            if int, this is the number of points\n            if sequence (list of floats), represents the desired quantiles (from 0 to 1)\n        title : str, optional\n            plot title, default: \"Q-Q plot for [observation name]\"\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        figsize : tuple, optional\n            figure size, by default None\n        **kwargs\n            other keyword arguments to plt.plot()\n\n        Returns\n        -------\n        matplotlib axes\n\n        Examples\n        --------\n        >>> cmp.plot.qq()\n\n        \"\"\"\n        cmp = self.comparer\n\n        _, ax = _get_fig_ax(ax, figsize)\n\n        x = cmp.data.Observation.values\n        xmin, xmax = x.min(), x.max()\n        ymin, ymax = np.inf, -np.inf\n\n        for mod_name in cmp.mod_names:\n            y = cmp.data[mod_name].values\n            ymin = min([y.min(), ymin])\n            ymax = max([y.max(), ymax])\n            xq, yq = quantiles_xy(x, y, quantiles)\n            ax.plot(\n                xq,\n                yq,\n                \".-\",\n                label=mod_name,\n                zorder=4,\n                **kwargs,\n            )\n\n        xymin = min([xmin, ymin])\n        xymax = max([xmax, ymax])\n\n        # 1:1 line\n        ax.plot(\n            [xymin, xymax],\n            [xymin, xymax],\n            label=options.plot.scatter.oneone_line.label,\n            c=options.plot.scatter.oneone_line.color,\n            zorder=3,\n        )\n\n        ax.axis(\"square\")\n        ax.set_xlim([xymin, xymax])\n        ax.set_ylim([xymin, xymax])\n        ax.minorticks_on()\n        ax.grid(which=\"both\", axis=\"both\", linewidth=\"0.2\", color=\"k\", alpha=0.6)\n\n        ax.legend()\n        ax.set_xlabel(\"Observation, \" + cmp._unit_text)\n        ax.set_ylabel(\"Model, \" + cmp._unit_text)\n        ax.set_title(title or f\"Q-Q plot for {cmp.name}\")\n\n        if self.is_directional:\n            _xtick_directional(ax)\n            _ytick_directional(ax)\n\n        return ax\n\n    def box(self, *, ax=None, title=None, figsize=None, **kwargs):\n        \"\"\"Make a box plot of model data and observations.\n\n        Wraps pandas.DataFrame boxplot() method.\n\n        Parameters\n        ----------\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        title : str, optional\n            plot title, default: [observation name]\n        figsize : tuple, optional\n            figure size, by default None\n        **kwargs\n            other keyword arguments to df.boxplot()\n\n        Returns\n        -------\n        matplotlib axes\n\n        Examples\n        --------\n        >>> cmp.plot.box()\n        >>> cmp.plot.box(showmeans=True)\n        >>> cmp.plot.box(ax=ax, title=\"Box plot\")\n\n        See also\n        --------\n        pandas.DataFrame.boxplot\n        matplotlib.pyplot.boxplot\n        \"\"\"\n        cmp = self.comparer\n\n        _, ax = _get_fig_ax(ax, figsize)\n\n        cols = [\"Observation\"] + cmp.mod_names\n        df = cmp.data[cols].to_dataframe()[cols]\n        df.boxplot(ax=ax, **kwargs)\n        ax.set_ylabel(cmp._unit_text)\n        ax.set_title(title or cmp.name)\n\n        if self.is_directional:\n            _ytick_directional(ax)\n\n        return ax\n\n    def scatter(\n        self,\n        *,\n        model=None,\n        bins: int | float = 120,\n        quantiles: int | Sequence[float] | None = None,\n        fit_to_quantiles: bool = False,\n        show_points: bool | int | float | None = None,\n        show_hist: Optional[bool] = None,\n        show_density: Optional[bool] = None,\n        norm: Optional[colors.Normalize] = None,\n        backend: Literal[\"matplotlib\", \"plotly\"] = \"matplotlib\",\n        figsize: Tuple[float, float] = (8, 8),\n        xlim: Optional[Tuple[float, float]] = None,\n        ylim: Optional[Tuple[float, float]] = None,\n        reg_method: str | bool = \"ols\",\n        title: Optional[str] = None,\n        xlabel: Optional[str] = None,\n        ylabel: Optional[str] = None,\n        skill_table: Optional[Union[str, List[str], bool]] = None,\n        ax: Optional[matplotlib.axes.Axes] = None,\n        **kwargs,\n    ) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n        \"\"\"Scatter plot showing compared data: observation vs modelled\n        Optionally, with density histogram.\n\n        Parameters\n        ----------\n        bins: (int, float, sequence), optional\n            bins for the 2D histogram on the background. By default 20 bins.\n            if int, represents the number of bins of 2D\n            if float, represents the bin size\n            if sequence (list of int or float), represents the bin edges\n        quantiles: (int, sequence), optional\n            number of quantiles for QQ-plot, by default None and will depend\n            on the scatter data length (10, 100 or 1000); if int, this is\n            the number of points; if sequence (list of floats), represents\n            the desired quantiles (from 0 to 1)\n        fit_to_quantiles: bool, optional\n            by default the regression line is fitted to all data, if True,\n            it is fitted to the quantiles which can be useful to represent\n            the extremes of the distribution, by default False\n        show_points : (bool, int, float), optional\n            Should the scatter points be displayed? None means: show all\n            points if fewer than 1e4, otherwise show 1e4 sample points,\n            by default None. float: fraction of points to show on plot\n            from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int)\n            given, then 'n' points will be displayed, randomly selected\n        show_hist : bool, optional\n            show the data density as a a 2d histogram, by default None\n        show_density: bool, optional\n            show the data density as a colormap of the scatter, by default\n            None. If both `show_density` and `show_hist` are None, then\n            `show_density` is used by default. For binning the data, the\n            kword `bins=Float` is used.\n        norm : matplotlib.colors norm\n            colormap normalization. If None, defaults to\n            matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)\n        backend : str, optional\n            use \"plotly\" (interactive) or \"matplotlib\" backend,\n            by default \"matplotlib\"\n        figsize : tuple, optional\n            width and height of the figure, by default (8, 8)\n        xlim : tuple, optional\n            plot range for the observation (xmin, xmax), by default None\n        ylim : tuple, optional\n            plot range for the model (ymin, ymax), by default None\n        reg_method : str or bool, optional\n            method for determining the regression line\n            \"ols\" : ordinary least squares regression\n            \"odr\" : orthogonal distance regression,\n            False : no regression line\n            by default \"ols\"\n        title : str, optional\n            plot title, by default None\n        xlabel : str, optional\n            x-label text on plot, by default None\n        ylabel : str, optional\n            y-label text on plot, by default None\n        skill_table : str, List[str], bool, optional\n            list of modelskill.metrics or boolean, if True then by default\n            modelskill.options.metrics.list. This kword adds a box at the\n            right of the scatter plot, by default False\n        ax : matplotlib.axes.Axes, optional\n            axes to plot on, by default None\n        **kwargs\n            other keyword arguments to plt.scatter()\n\n        Examples\n        ------\n        >>> cmp.plot.scatter()\n        >>> cmp.plot.scatter(bins=0.2, backend='plotly')\n        >>> cmp.plot.scatter(show_points=False, title='no points')\n        >>> cmp.plot.scatter(xlabel='all observations', ylabel='my model')\n        >>> cmp.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n        \"\"\"\n\n        cmp = self.comparer\n        if model is None:\n            mod_names = cmp.mod_names\n        else:\n            warnings.warn(\n                \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.scatter()\",\n                FutureWarning,\n            )\n            model_list = [model] if isinstance(model, (str, int)) else model\n            mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]\n\n        axes = []\n        for mod_name in mod_names:\n            ax_mod = self._scatter_one_model(\n                mod_name=mod_name,\n                bins=bins,\n                quantiles=quantiles,\n                fit_to_quantiles=fit_to_quantiles,\n                show_points=show_points,\n                show_hist=show_hist,\n                show_density=show_density,\n                norm=norm,\n                backend=backend,\n                figsize=figsize,\n                xlim=xlim,\n                ylim=ylim,\n                reg_method=reg_method,\n                title=title,\n                xlabel=xlabel,\n                ylabel=ylabel,\n                skill_table=skill_table,\n                ax=ax,\n                **kwargs,\n            )\n            axes.append(ax_mod)\n        return axes[0] if len(axes) == 1 else axes\n\n    def _scatter_one_model(\n        self,\n        *,\n        mod_name: str,\n        bins: int | float,\n        quantiles: int | Sequence[float] | None,\n        fit_to_quantiles: bool,\n        show_points: bool | int | float | None,\n        show_hist: Optional[bool],\n        show_density: Optional[bool],\n        norm: Optional[colors.Normalize],\n        backend: Literal[\"matplotlib\", \"plotly\"],\n        figsize: Tuple[float, float],\n        xlim: Optional[Tuple[float, float]],\n        ylim: Optional[Tuple[float, float]],\n        reg_method: str | bool,\n        title: Optional[str],\n        xlabel: Optional[str],\n        ylabel: Optional[str],\n        skill_table: Optional[Union[str, List[str], bool]],\n        **kwargs,\n    ):\n        \"\"\"Scatter plot for one model only\"\"\"\n\n        cmp = self.comparer\n        cmp_sel_mod = cmp.sel(model=mod_name)\n        assert mod_name in cmp.mod_names, f\"Model {mod_name} not found in comparer\"\n\n        if cmp_sel_mod.n_points == 0:\n            raise ValueError(\"No data found in selection\")\n\n        x = cmp_sel_mod.data.Observation.values\n        y = cmp_sel_mod.data[mod_name].values\n\n        assert x.ndim == y.ndim == 1, \"x and y must be 1D arrays\"\n        assert x.shape == y.shape, \"x and y must have the same shape\"\n\n        unit_text = cmp._unit_text\n        xlabel = xlabel or f\"Observation, {unit_text}\"\n        ylabel = ylabel or f\"Model, {unit_text}\"\n        title = title or f\"{mod_name} vs {cmp.name}\"\n\n        skill = None\n        skill_score_unit = None\n\n        if skill_table:\n            metrics = None if skill_table is True else skill_table\n            skill = cmp_sel_mod.skill(metrics=metrics)  # type: ignore\n            try:\n                skill_score_unit = unit_text.split(\"[\")[1].split(\"]\")[0]\n            except IndexError:\n                skill_score_unit = \"\"  # Dimensionless\n\n        if self.is_directional:\n            # hide quantiles and regression line\n            quantiles = 0\n            reg_method = False\n\n        skill_scores = skill.iloc[0].to_dict() if skill is not None else None\n\n        ax = scatter(\n            x=x,\n            y=y,\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            norm=norm,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            skill_scores=skill_scores,\n            skill_score_unit=skill_score_unit,\n            **kwargs,\n        )\n\n        if backend == \"matplotlib\" and self.is_directional:\n            _xtick_directional(ax, xlim)\n            _ytick_directional(ax, ylim)\n\n        return ax\n\n    def taylor(\n        self,\n        *,\n        normalize_std: bool = False,\n        figsize: Tuple[float, float] = (7, 7),\n        marker: str = \"o\",\n        marker_size: float = 6.0,\n        title: str = \"Taylor diagram\",\n    ):\n        \"\"\"Taylor diagram showing model std and correlation to observation\n        in a single-quadrant polar plot, with r=std and theta=arccos(cc).\n\n        Parameters\n        ----------\n        normalize_std : bool, optional\n            plot model std normalized with observation std, default False\n        figsize : tuple, optional\n            width and height of the figure (should be square), by default (7, 7)\n        marker : str, optional\n            marker type e.g. \"x\", \"*\", by default \"o\"\n        marker_size : float, optional\n            size of the marker, by default 6\n        title : str, optional\n            title of the plot, by default \"Taylor diagram\"\n\n        Returns\n        -------\n        matplotlib.figure.Figure\n\n        Examples\n        ------\n        >>> comparer.taylor()\n        >>> comparer.taylor(start=\"2017-10-28\", figsize=(5,5))\n\n        References\n        ----------\n        Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin <yannick.copin@laposte.net>\n        \"\"\"\n        cmp = self.comparer\n\n        # TODO consider if this round-trip  via mtr is necessary to get the std:s\n        metrics: List[Callable] = [\n            mtr._std_obs,\n            mtr._std_mod,\n            mtr.cc,\n        ]\n\n        sk = cmp.skill(metrics=metrics)\n\n        if sk is None:  # TODO\n            return\n        df = sk.to_dataframe()\n        ref_std = 1.0 if normalize_std else df.iloc[0][\"_std_obs\"]\n\n        df = df[[\"_std_obs\", \"_std_mod\", \"cc\"]].copy()\n        df.columns = [\"obs_std\", \"std\", \"cc\"]\n\n        pts = [\n            TaylorPoint(\n                r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size\n            )\n            for r in df.itertuples()\n        ]\n\n        return taylor_diagram(\n            obs_std=ref_std,\n            points=pts,\n            figsize=figsize,\n            obs_text=f\"Obs: {cmp.name}\",\n            normalize_std=normalize_std,\n            title=title,\n        )\n\n    def residual_hist(\n        self, bins=100, title=None, color=None, figsize=None, ax=None, **kwargs\n    ) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n        \"\"\"plot histogram of residual values\n\n        Parameters\n        ----------\n        bins : int, optional\n            specification of bins, by default 100\n        title : str, optional\n            plot title, default: Residuals, [name]\n        color : str, optional\n            residual color, by default \"#8B8D8E\"\n        figsize : tuple, optional\n            figure size, by default None\n        ax : matplotlib.axes.Axes | list[matplotlib.axes.Axes], optional\n            axes to plot on, by default None\n        **kwargs\n            other keyword arguments to plt.hist()\n\n        Returns\n        -------\n        matplotlib.axes.Axes | list[matplotlib.axes.Axes]\n        \"\"\"\n        cmp = self.comparer\n\n        if cmp.n_models == 1:\n            return self._residual_hist_one_model(\n                bins=bins,\n                title=title,\n                color=color,\n                figsize=figsize,\n                ax=ax,\n                mod_name=cmp.mod_names[0],\n                **kwargs,\n            )\n\n        if ax is not None and len(ax) != len(cmp.mod_names):\n            raise ValueError(\"Number of axes must match number of models\")\n\n        axs = ax if ax is not None else [None] * len(cmp.mod_names)\n\n        for i, mod_name in enumerate(cmp.mod_names):\n            cmp_model = cmp.sel(model=mod_name)\n            ax_mod = cmp_model.plot.residual_hist(\n                bins=bins,\n                title=title,\n                color=color,\n                figsize=figsize,\n                ax=axs[i],\n                **kwargs,\n            )\n            axs[i] = ax_mod\n\n        return axs\n\n    def _residual_hist_one_model(\n        self,\n        bins=100,\n        title=None,\n        color=None,\n        figsize=None,\n        ax=None,\n        mod_name=None,\n        **kwargs,\n    ) -> matplotlib.axes.Axes:\n        \"\"\"Residual histogram for one model only\"\"\"\n        _, ax = _get_fig_ax(ax, figsize)\n\n        default_color = \"#8B8D8E\"\n        color = default_color if color is None else color\n        title = (\n            f\"Residuals, Observation: {self.comparer.name}, Model: {mod_name}\"\n            if title is None\n            else title\n        )\n        ax.hist(self.comparer._residual, bins=bins, color=color, **kwargs)\n        ax.set_title(title)\n        ax.set_xlabel(f\"Residuals of {self.comparer._unit_text}\")\n\n        if self.is_directional:\n            ticks = np.linspace(-180, 180, 9)\n            ax.set_xticks(ticks)\n            ax.set_xlim(-180, 180)\n\n        return ax\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.__call__","title":"__call__","text":"
__call__(*args, **kwargs)\n

Plot scatter plot of modelled vs observed data

Source code in modelskill/comparison/_comparer_plotter.py
def __call__(\n    self, *args, **kwargs\n) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n    \"\"\"Plot scatter plot of modelled vs observed data\"\"\"\n    return self.scatter(*args, **kwargs)\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.box","title":"box","text":"
box(*, ax=None, title=None, figsize=None, **kwargs)\n

Make a box plot of model data and observations.

Wraps pandas.DataFrame boxplot() method.

Parameters:

Name Type Description Default ax Axes

axes to plot on, by default None

None title str

plot title, default: [observation name]

None figsize tuple

figure size, by default None

None **kwargs

other keyword arguments to df.boxplot()

{}

Returns:

Type Description matplotlib axes

Examples:

>>> cmp.plot.box()\n>>> cmp.plot.box(showmeans=True)\n>>> cmp.plot.box(ax=ax, title=\"Box plot\")\n
See also

pandas.DataFrame.boxplot matplotlib.pyplot.boxplot

Source code in modelskill/comparison/_comparer_plotter.py
def box(self, *, ax=None, title=None, figsize=None, **kwargs):\n    \"\"\"Make a box plot of model data and observations.\n\n    Wraps pandas.DataFrame boxplot() method.\n\n    Parameters\n    ----------\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    title : str, optional\n        plot title, default: [observation name]\n    figsize : tuple, optional\n        figure size, by default None\n    **kwargs\n        other keyword arguments to df.boxplot()\n\n    Returns\n    -------\n    matplotlib axes\n\n    Examples\n    --------\n    >>> cmp.plot.box()\n    >>> cmp.plot.box(showmeans=True)\n    >>> cmp.plot.box(ax=ax, title=\"Box plot\")\n\n    See also\n    --------\n    pandas.DataFrame.boxplot\n    matplotlib.pyplot.boxplot\n    \"\"\"\n    cmp = self.comparer\n\n    _, ax = _get_fig_ax(ax, figsize)\n\n    cols = [\"Observation\"] + cmp.mod_names\n    df = cmp.data[cols].to_dataframe()[cols]\n    df.boxplot(ax=ax, **kwargs)\n    ax.set_ylabel(cmp._unit_text)\n    ax.set_title(title or cmp.name)\n\n    if self.is_directional:\n        _ytick_directional(ax)\n\n    return ax\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.hist","title":"hist","text":"
hist(bins=100, *, model=None, title=None, ax=None, figsize=None, density=True, alpha=0.5, **kwargs)\n

Plot histogram of model data and observations.

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

number of bins, by default 100

100 title str

plot title, default: [model name] vs [observation name]

None ax Axes

axes to plot on, by default None

None figsize tuple

figure size, by default None

None density bool

If True, draw and return a probability density

True alpha float

alpha transparency fraction, by default 0.5

0.5 **kwargs

other keyword arguments to df.plot.hist()

{}

Returns:

Type Description matplotlib axes See also

pandas.Series.plot.hist matplotlib.axes.Axes.hist

Source code in modelskill/comparison/_comparer_plotter.py
def hist(\n    self,\n    bins: int | Sequence = 100,\n    *,\n    model: str | int | None = None,\n    title: str | None = None,\n    ax=None,\n    figsize: Tuple[float, float] | None = None,\n    density: bool = True,\n    alpha: float = 0.5,\n    **kwargs,\n):\n    \"\"\"Plot histogram of model data and observations.\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        number of bins, by default 100\n    title : str, optional\n        plot title, default: [model name] vs [observation name]\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    figsize : tuple, optional\n        figure size, by default None\n    density: bool, optional\n        If True, draw and return a probability density\n    alpha : float, optional\n        alpha transparency fraction, by default 0.5\n    **kwargs\n        other keyword arguments to df.plot.hist()\n\n    Returns\n    -------\n    matplotlib axes\n\n    See also\n    --------\n    pandas.Series.plot.hist\n    matplotlib.axes.Axes.hist\n    \"\"\"\n    cmp = self.comparer\n\n    if model is None:\n        mod_names = cmp.mod_names\n    else:\n        warnings.warn(\n            \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.hist()\",\n            FutureWarning,\n        )\n        model_list = [model] if isinstance(model, (str, int)) else model\n        mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]\n\n    axes = []\n    for mod_name in mod_names:\n        ax_mod = self._hist_one_model(\n            mod_name=mod_name,\n            bins=bins,\n            title=title,\n            ax=ax,\n            figsize=figsize,\n            density=density,\n            alpha=alpha,\n            **kwargs,\n        )\n        axes.append(ax_mod)\n\n    return axes[0] if len(axes) == 1 else axes\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.kde","title":"kde","text":"
kde(ax=None, title=None, figsize=None, **kwargs)\n

Plot kde (kernel density estimates of distributions) of model data and observations.

Wraps pandas.DataFrame kde() method.

Parameters:

Name Type Description Default ax Axes

axes to plot on, by default None

None title str

plot title, default: \"KDE plot for [observation name]\"

None figsize tuple

figure size, by default None

None **kwargs

other keyword arguments to df.plot.kde()

{}

Returns:

Type Description Axes

Examples:

>>> cmp.plot.kde()\n>>> cmp.plot.kde(bw_method=0.3)\n>>> cmp.plot.kde(ax=ax, bw_method='silverman')\n>>> cmp.plot.kde(xlim=[0,None], title=\"Density plot\");\n
See also

pandas.Series.plot.kde

Source code in modelskill/comparison/_comparer_plotter.py
def kde(self, ax=None, title=None, figsize=None, **kwargs) -> matplotlib.axes.Axes:\n    \"\"\"Plot kde (kernel density estimates of distributions) of model data and observations.\n\n    Wraps pandas.DataFrame kde() method.\n\n    Parameters\n    ----------\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    title : str, optional\n        plot title, default: \"KDE plot for [observation name]\"\n    figsize : tuple, optional\n        figure size, by default None\n    **kwargs\n        other keyword arguments to df.plot.kde()\n\n    Returns\n    -------\n    matplotlib.axes.Axes\n\n    Examples\n    --------\n    >>> cmp.plot.kde()\n    >>> cmp.plot.kde(bw_method=0.3)\n    >>> cmp.plot.kde(ax=ax, bw_method='silverman')\n    >>> cmp.plot.kde(xlim=[0,None], title=\"Density plot\");\n\n    See also\n    --------\n    pandas.Series.plot.kde\n    \"\"\"\n    cmp = self.comparer\n\n    _, ax = _get_fig_ax(ax, figsize)\n\n    cmp.data.Observation.to_series().plot.kde(\n        ax=ax, linestyle=\"dashed\", label=\"Observation\", **kwargs\n    )\n\n    for model in cmp.mod_names:\n        cmp.data[model].to_series().plot.kde(ax=ax, label=model, **kwargs)\n\n    ax.set_xlabel(cmp._unit_text)  # TODO\n\n    ax.legend()\n\n    # remove y-axis, ticks and label\n    ax.yaxis.set_visible(False)\n    ax.tick_params(axis=\"y\", which=\"both\", length=0)\n    ax.set_ylabel(\"\")\n    title = f\"KDE plot for {cmp.name}\" if title is None else title\n    ax.set_title(title)\n\n    # remove box around plot\n    ax.spines[\"top\"].set_visible(False)\n    ax.spines[\"right\"].set_visible(False)\n    ax.spines[\"left\"].set_visible(False)\n\n    if self.is_directional:\n        _xtick_directional(ax)\n\n    return ax\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.qq","title":"qq","text":"
qq(quantiles=None, *, title=None, ax=None, figsize=None, **kwargs)\n

Make quantile-quantile (q-q) plot of model data and observations.

Primarily used to compare multiple models.

Parameters:

Name Type Description Default quantiles int | Sequence[float] | None

number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000) if int, this is the number of points if sequence (list of floats), represents the desired quantiles (from 0 to 1)

None title str

plot title, default: \"Q-Q plot for [observation name]\"

None ax Axes

axes to plot on, by default None

None figsize tuple

figure size, by default None

None **kwargs

other keyword arguments to plt.plot()

{}

Returns:

Type Description matplotlib axes

Examples:

>>> cmp.plot.qq()\n
Source code in modelskill/comparison/_comparer_plotter.py
def qq(\n    self,\n    quantiles: int | Sequence[float] | None = None,\n    *,\n    title=None,\n    ax=None,\n    figsize=None,\n    **kwargs,\n):\n    \"\"\"Make quantile-quantile (q-q) plot of model data and observations.\n\n    Primarily used to compare multiple models.\n\n    Parameters\n    ----------\n    quantiles: (int, sequence), optional\n        number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000)\n        if int, this is the number of points\n        if sequence (list of floats), represents the desired quantiles (from 0 to 1)\n    title : str, optional\n        plot title, default: \"Q-Q plot for [observation name]\"\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    figsize : tuple, optional\n        figure size, by default None\n    **kwargs\n        other keyword arguments to plt.plot()\n\n    Returns\n    -------\n    matplotlib axes\n\n    Examples\n    --------\n    >>> cmp.plot.qq()\n\n    \"\"\"\n    cmp = self.comparer\n\n    _, ax = _get_fig_ax(ax, figsize)\n\n    x = cmp.data.Observation.values\n    xmin, xmax = x.min(), x.max()\n    ymin, ymax = np.inf, -np.inf\n\n    for mod_name in cmp.mod_names:\n        y = cmp.data[mod_name].values\n        ymin = min([y.min(), ymin])\n        ymax = max([y.max(), ymax])\n        xq, yq = quantiles_xy(x, y, quantiles)\n        ax.plot(\n            xq,\n            yq,\n            \".-\",\n            label=mod_name,\n            zorder=4,\n            **kwargs,\n        )\n\n    xymin = min([xmin, ymin])\n    xymax = max([xmax, ymax])\n\n    # 1:1 line\n    ax.plot(\n        [xymin, xymax],\n        [xymin, xymax],\n        label=options.plot.scatter.oneone_line.label,\n        c=options.plot.scatter.oneone_line.color,\n        zorder=3,\n    )\n\n    ax.axis(\"square\")\n    ax.set_xlim([xymin, xymax])\n    ax.set_ylim([xymin, xymax])\n    ax.minorticks_on()\n    ax.grid(which=\"both\", axis=\"both\", linewidth=\"0.2\", color=\"k\", alpha=0.6)\n\n    ax.legend()\n    ax.set_xlabel(\"Observation, \" + cmp._unit_text)\n    ax.set_ylabel(\"Model, \" + cmp._unit_text)\n    ax.set_title(title or f\"Q-Q plot for {cmp.name}\")\n\n    if self.is_directional:\n        _xtick_directional(ax)\n        _ytick_directional(ax)\n\n    return ax\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.residual_hist","title":"residual_hist","text":"
residual_hist(bins=100, title=None, color=None, figsize=None, ax=None, **kwargs)\n

plot histogram of residual values

Parameters:

Name Type Description Default bins int

specification of bins, by default 100

100 title str

plot title, default: Residuals, [name]

None color str

residual color, by default \"#8B8D8E\"

None figsize tuple

figure size, by default None

None ax Axes | list[Axes]

axes to plot on, by default None

None **kwargs

other keyword arguments to plt.hist()

{}

Returns:

Type Description Axes | list[Axes] Source code in modelskill/comparison/_comparer_plotter.py
def residual_hist(\n    self, bins=100, title=None, color=None, figsize=None, ax=None, **kwargs\n) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n    \"\"\"plot histogram of residual values\n\n    Parameters\n    ----------\n    bins : int, optional\n        specification of bins, by default 100\n    title : str, optional\n        plot title, default: Residuals, [name]\n    color : str, optional\n        residual color, by default \"#8B8D8E\"\n    figsize : tuple, optional\n        figure size, by default None\n    ax : matplotlib.axes.Axes | list[matplotlib.axes.Axes], optional\n        axes to plot on, by default None\n    **kwargs\n        other keyword arguments to plt.hist()\n\n    Returns\n    -------\n    matplotlib.axes.Axes | list[matplotlib.axes.Axes]\n    \"\"\"\n    cmp = self.comparer\n\n    if cmp.n_models == 1:\n        return self._residual_hist_one_model(\n            bins=bins,\n            title=title,\n            color=color,\n            figsize=figsize,\n            ax=ax,\n            mod_name=cmp.mod_names[0],\n            **kwargs,\n        )\n\n    if ax is not None and len(ax) != len(cmp.mod_names):\n        raise ValueError(\"Number of axes must match number of models\")\n\n    axs = ax if ax is not None else [None] * len(cmp.mod_names)\n\n    for i, mod_name in enumerate(cmp.mod_names):\n        cmp_model = cmp.sel(model=mod_name)\n        ax_mod = cmp_model.plot.residual_hist(\n            bins=bins,\n            title=title,\n            color=color,\n            figsize=figsize,\n            ax=axs[i],\n            **kwargs,\n        )\n        axs[i] = ax_mod\n\n    return axs\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.scatter","title":"scatter","text":"
scatter(*, model=None, bins=120, quantiles=None, fit_to_quantiles=False, show_points=None, show_hist=None, show_density=None, norm=None, backend='matplotlib', figsize=(8, 8), xlim=None, ylim=None, reg_method='ols', title=None, xlabel=None, ylabel=None, skill_table=None, ax=None, **kwargs)\n

Scatter plot showing compared data: observation vs modelled Optionally, with density histogram.

Parameters:

Name Type Description Default bins int | float

bins for the 2D histogram on the background. By default 20 bins. if int, represents the number of bins of 2D if float, represents the bin size if sequence (list of int or float), represents the bin edges

120 quantiles int | Sequence[float] | None

number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000); if int, this is the number of points; if sequence (list of floats), represents the desired quantiles (from 0 to 1)

None fit_to_quantiles bool

by default the regression line is fitted to all data, if True, it is fitted to the quantiles which can be useful to represent the extremes of the distribution, by default False

False show_points (bool, int, float)

Should the scatter points be displayed? None means: show all points if fewer than 1e4, otherwise show 1e4 sample points, by default None. float: fraction of points to show on plot from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int) given, then 'n' points will be displayed, randomly selected

None show_hist bool

show the data density as a a 2d histogram, by default None

None show_density Optional[bool]

show the data density as a colormap of the scatter, by default None. If both show_density and show_hist are None, then show_density is used by default. For binning the data, the kword bins=Float is used.

None norm matplotlib.colors norm

colormap normalization. If None, defaults to matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)

None backend str

use \"plotly\" (interactive) or \"matplotlib\" backend, by default \"matplotlib\"

'matplotlib' figsize tuple

width and height of the figure, by default (8, 8)

(8, 8) xlim tuple

plot range for the observation (xmin, xmax), by default None

None ylim tuple

plot range for the model (ymin, ymax), by default None

None reg_method str or bool

method for determining the regression line \"ols\" : ordinary least squares regression \"odr\" : orthogonal distance regression, False : no regression line by default \"ols\"

'ols' title str

plot title, by default None

None xlabel str

x-label text on plot, by default None

None ylabel str

y-label text on plot, by default None

None skill_table (str, List[str], bool)

list of modelskill.metrics or boolean, if True then by default modelskill.options.metrics.list. This kword adds a box at the right of the scatter plot, by default False

None ax Axes

axes to plot on, by default None

None **kwargs

other keyword arguments to plt.scatter()

{}

Examples:

>>> cmp.plot.scatter()\n>>> cmp.plot.scatter(bins=0.2, backend='plotly')\n>>> cmp.plot.scatter(show_points=False, title='no points')\n>>> cmp.plot.scatter(xlabel='all observations', ylabel='my model')\n>>> cmp.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n
Source code in modelskill/comparison/_comparer_plotter.py
def scatter(\n    self,\n    *,\n    model=None,\n    bins: int | float = 120,\n    quantiles: int | Sequence[float] | None = None,\n    fit_to_quantiles: bool = False,\n    show_points: bool | int | float | None = None,\n    show_hist: Optional[bool] = None,\n    show_density: Optional[bool] = None,\n    norm: Optional[colors.Normalize] = None,\n    backend: Literal[\"matplotlib\", \"plotly\"] = \"matplotlib\",\n    figsize: Tuple[float, float] = (8, 8),\n    xlim: Optional[Tuple[float, float]] = None,\n    ylim: Optional[Tuple[float, float]] = None,\n    reg_method: str | bool = \"ols\",\n    title: Optional[str] = None,\n    xlabel: Optional[str] = None,\n    ylabel: Optional[str] = None,\n    skill_table: Optional[Union[str, List[str], bool]] = None,\n    ax: Optional[matplotlib.axes.Axes] = None,\n    **kwargs,\n) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:\n    \"\"\"Scatter plot showing compared data: observation vs modelled\n    Optionally, with density histogram.\n\n    Parameters\n    ----------\n    bins: (int, float, sequence), optional\n        bins for the 2D histogram on the background. By default 20 bins.\n        if int, represents the number of bins of 2D\n        if float, represents the bin size\n        if sequence (list of int or float), represents the bin edges\n    quantiles: (int, sequence), optional\n        number of quantiles for QQ-plot, by default None and will depend\n        on the scatter data length (10, 100 or 1000); if int, this is\n        the number of points; if sequence (list of floats), represents\n        the desired quantiles (from 0 to 1)\n    fit_to_quantiles: bool, optional\n        by default the regression line is fitted to all data, if True,\n        it is fitted to the quantiles which can be useful to represent\n        the extremes of the distribution, by default False\n    show_points : (bool, int, float), optional\n        Should the scatter points be displayed? None means: show all\n        points if fewer than 1e4, otherwise show 1e4 sample points,\n        by default None. float: fraction of points to show on plot\n        from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int)\n        given, then 'n' points will be displayed, randomly selected\n    show_hist : bool, optional\n        show the data density as a a 2d histogram, by default None\n    show_density: bool, optional\n        show the data density as a colormap of the scatter, by default\n        None. If both `show_density` and `show_hist` are None, then\n        `show_density` is used by default. For binning the data, the\n        kword `bins=Float` is used.\n    norm : matplotlib.colors norm\n        colormap normalization. If None, defaults to\n        matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)\n    backend : str, optional\n        use \"plotly\" (interactive) or \"matplotlib\" backend,\n        by default \"matplotlib\"\n    figsize : tuple, optional\n        width and height of the figure, by default (8, 8)\n    xlim : tuple, optional\n        plot range for the observation (xmin, xmax), by default None\n    ylim : tuple, optional\n        plot range for the model (ymin, ymax), by default None\n    reg_method : str or bool, optional\n        method for determining the regression line\n        \"ols\" : ordinary least squares regression\n        \"odr\" : orthogonal distance regression,\n        False : no regression line\n        by default \"ols\"\n    title : str, optional\n        plot title, by default None\n    xlabel : str, optional\n        x-label text on plot, by default None\n    ylabel : str, optional\n        y-label text on plot, by default None\n    skill_table : str, List[str], bool, optional\n        list of modelskill.metrics or boolean, if True then by default\n        modelskill.options.metrics.list. This kword adds a box at the\n        right of the scatter plot, by default False\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    **kwargs\n        other keyword arguments to plt.scatter()\n\n    Examples\n    ------\n    >>> cmp.plot.scatter()\n    >>> cmp.plot.scatter(bins=0.2, backend='plotly')\n    >>> cmp.plot.scatter(show_points=False, title='no points')\n    >>> cmp.plot.scatter(xlabel='all observations', ylabel='my model')\n    >>> cmp.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n    \"\"\"\n\n    cmp = self.comparer\n    if model is None:\n        mod_names = cmp.mod_names\n    else:\n        warnings.warn(\n            \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.scatter()\",\n            FutureWarning,\n        )\n        model_list = [model] if isinstance(model, (str, int)) else model\n        mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]\n\n    axes = []\n    for mod_name in mod_names:\n        ax_mod = self._scatter_one_model(\n            mod_name=mod_name,\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            norm=norm,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            skill_table=skill_table,\n            ax=ax,\n            **kwargs,\n        )\n        axes.append(ax_mod)\n    return axes[0] if len(axes) == 1 else axes\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.taylor","title":"taylor","text":"
taylor(*, normalize_std=False, figsize=(7, 7), marker='o', marker_size=6.0, title='Taylor diagram')\n

Taylor diagram showing model std and correlation to observation in a single-quadrant polar plot, with r=std and theta=arccos(cc).

Parameters:

Name Type Description Default normalize_std bool

plot model std normalized with observation std, default False

False figsize tuple

width and height of the figure (should be square), by default (7, 7)

(7, 7) marker str

marker type e.g. \"x\", \"*\", by default \"o\"

'o' marker_size float

size of the marker, by default 6

6.0 title str

title of the plot, by default \"Taylor diagram\"

'Taylor diagram'

Returns:

Type Description Figure

Examples:

>>> comparer.taylor()\n>>> comparer.taylor(start=\"2017-10-28\", figsize=(5,5))\n
References

Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin yannick.copin@laposte.net

Source code in modelskill/comparison/_comparer_plotter.py
def taylor(\n    self,\n    *,\n    normalize_std: bool = False,\n    figsize: Tuple[float, float] = (7, 7),\n    marker: str = \"o\",\n    marker_size: float = 6.0,\n    title: str = \"Taylor diagram\",\n):\n    \"\"\"Taylor diagram showing model std and correlation to observation\n    in a single-quadrant polar plot, with r=std and theta=arccos(cc).\n\n    Parameters\n    ----------\n    normalize_std : bool, optional\n        plot model std normalized with observation std, default False\n    figsize : tuple, optional\n        width and height of the figure (should be square), by default (7, 7)\n    marker : str, optional\n        marker type e.g. \"x\", \"*\", by default \"o\"\n    marker_size : float, optional\n        size of the marker, by default 6\n    title : str, optional\n        title of the plot, by default \"Taylor diagram\"\n\n    Returns\n    -------\n    matplotlib.figure.Figure\n\n    Examples\n    ------\n    >>> comparer.taylor()\n    >>> comparer.taylor(start=\"2017-10-28\", figsize=(5,5))\n\n    References\n    ----------\n    Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin <yannick.copin@laposte.net>\n    \"\"\"\n    cmp = self.comparer\n\n    # TODO consider if this round-trip  via mtr is necessary to get the std:s\n    metrics: List[Callable] = [\n        mtr._std_obs,\n        mtr._std_mod,\n        mtr.cc,\n    ]\n\n    sk = cmp.skill(metrics=metrics)\n\n    if sk is None:  # TODO\n        return\n    df = sk.to_dataframe()\n    ref_std = 1.0 if normalize_std else df.iloc[0][\"_std_obs\"]\n\n    df = df[[\"_std_obs\", \"_std_mod\", \"cc\"]].copy()\n    df.columns = [\"obs_std\", \"std\", \"cc\"]\n\n    pts = [\n        TaylorPoint(\n            r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size\n        )\n        for r in df.itertuples()\n    ]\n\n    return taylor_diagram(\n        obs_std=ref_std,\n        points=pts,\n        figsize=figsize,\n        obs_text=f\"Obs: {cmp.name}\",\n        normalize_std=normalize_std,\n        title=title,\n    )\n
"},{"location":"api/comparer/#modelskill.comparison._comparer_plotter.ComparerPlotter.timeseries","title":"timeseries","text":"
timeseries(*, title=None, ylim=None, ax=None, figsize=None, backend='matplotlib', **kwargs)\n

Timeseries plot showing compared data: observation vs modelled

Parameters:

Name Type Description Default title str

plot title, by default None

None ylim (float, float)

plot range for the model (ymin, ymax), by default None

None ax Axes

axes to plot on, by default None

None figsize (float, float)

figure size, by default None

None backend str

use \"plotly\" (interactive) or \"matplotlib\" backend, by default \"matplotlib\"

'matplotlib' **kwargs

other keyword arguments to fig.update_layout (plotly backend)

{}

Returns:

Type Description Axes or Figure Source code in modelskill/comparison/_comparer_plotter.py
def timeseries(\n    self,\n    *,\n    title: str | None = None,\n    ylim: Tuple[float, float] | None = None,\n    ax=None,\n    figsize: Tuple[float, float] | None = None,\n    backend: str = \"matplotlib\",\n    **kwargs,\n):\n    \"\"\"Timeseries plot showing compared data: observation vs modelled\n\n    Parameters\n    ----------\n    title : str, optional\n        plot title, by default None\n    ylim : (float, float), optional\n        plot range for the model (ymin, ymax), by default None\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    figsize : (float, float), optional\n        figure size, by default None\n    backend : str, optional\n        use \"plotly\" (interactive) or \"matplotlib\" backend,\n        by default \"matplotlib\"\n    **kwargs\n        other keyword arguments to fig.update_layout (plotly backend)\n\n    Returns\n    -------\n    matplotlib.axes.Axes or plotly.graph_objects.Figure\n    \"\"\"\n    from ._comparison import MOD_COLORS\n\n    cmp = self.comparer\n\n    if title is None:\n        title = cmp.name\n\n    if backend == \"matplotlib\":\n        fig, ax = _get_fig_ax(ax, figsize)\n        for j in range(cmp.n_models):\n            key = cmp.mod_names[j]\n            mod = cmp.raw_mod_data[key]._values_as_series\n            mod.plot(ax=ax, color=MOD_COLORS[j])\n\n        ax.scatter(\n            cmp.time,\n            cmp.data[cmp._obs_name].values,\n            marker=\".\",\n            color=cmp.data[cmp._obs_name].attrs[\"color\"],\n        )\n        ax.set_ylabel(cmp._unit_text)\n        ax.legend([*cmp.mod_names, cmp._obs_name])\n        ax.set_ylim(ylim)\n        if self.is_directional:\n            _ytick_directional(ax, ylim)\n        ax.set_title(title)\n        return ax\n\n    elif backend == \"plotly\":  # pragma: no cover\n        import plotly.graph_objects as go  # type: ignore\n\n        mod_scatter_list = []\n        for j in range(cmp.n_models):\n            key = cmp.mod_names[j]\n            mod = cmp.raw_mod_data[key]._values_as_series\n            mod_scatter_list.append(\n                go.Scatter(\n                    x=mod.index,\n                    y=mod.values,\n                    name=key,\n                    line=dict(color=MOD_COLORS[j]),\n                )\n            )\n\n        fig = go.Figure(\n            [\n                *mod_scatter_list,\n                go.Scatter(\n                    x=cmp.time,\n                    y=cmp.data[cmp._obs_name].values,\n                    name=cmp._obs_name,\n                    mode=\"markers\",\n                    marker=dict(color=cmp.data[cmp._obs_name].attrs[\"color\"]),\n                ),\n            ]\n        )\n\n        fig.update_layout(title=title, yaxis_title=cmp._unit_text, **kwargs)\n        fig.update_yaxes(range=ylim)\n\n        return fig\n    else:\n        raise ValueError(f\"Plotting backend: {backend} not supported\")\n
"},{"location":"api/comparercollection/","title":"ComparerCollection","text":"

The ComparerCollection is one of the main objects of the modelskill package. It is a collection of Comparer objects and created either by the match() method, by passing a list of Comparers to the ComparerCollection constructor, or by reading a config file using the from_config() function.

Main functionality:

  • selecting/filtering data
    • __get_item__() - get a single Comparer, e.g., cc[0] or cc['obs1']
    • sel()
    • query()
  • skill assessment
    • skill()
    • mean_skill()
    • gridded_skill() (for track observations)
  • plotting
    • plot.scatter()
    • plot.kde()
    • plot.hist()
  • load/save/export data
    • load()
    • save()
"},{"location":"api/comparercollection/#modelskill.ComparerCollection","title":"modelskill.ComparerCollection","text":"

Bases: Mapping, Scoreable

Collection of comparers, constructed by calling the modelskill.match method or by initializing with a list of comparers.

NOTE: In case of multiple model results with different time coverage, only the overlapping time period will be used! (intersection)

Examples:

>>> import modelskill as ms\n>>> mr = ms.DfsuModelResult(\"Oresund2D.dfsu\", item=0)\n>>> o1 = ms.PointObservation(\"klagshamn.dfs0\", item=0, x=366844, y=6154291, name=\"Klagshamn\")\n>>> o2 = ms.PointObservation(\"drogden.dfs0\", item=0, x=355568.0, y=6156863.0)\n>>> cmp1 = ms.match(o1, mr)  # Comparer\n>>> cmp2 = ms.match(o2, mr)  # Comparer\n>>> ccA = ms.ComparerCollection([cmp1, cmp2])\n>>> ccB = ms.match(obs=[o1, o2], mod=mr)\n>>> sk = ccB.skill()\n>>> ccB[\"Klagshamn\"].plot.timeseries()\n
Source code in modelskill/comparison/_collection.py
class ComparerCollection(Mapping, Scoreable):\n    \"\"\"\n    Collection of comparers, constructed by calling the `modelskill.match`\n    method or by initializing with a list of comparers.\n\n    NOTE: In case of multiple model results with different time coverage,\n    only the _overlapping_ time period will be used! (intersection)\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> mr = ms.DfsuModelResult(\"Oresund2D.dfsu\", item=0)\n    >>> o1 = ms.PointObservation(\"klagshamn.dfs0\", item=0, x=366844, y=6154291, name=\"Klagshamn\")\n    >>> o2 = ms.PointObservation(\"drogden.dfs0\", item=0, x=355568.0, y=6156863.0)\n    >>> cmp1 = ms.match(o1, mr)  # Comparer\n    >>> cmp2 = ms.match(o2, mr)  # Comparer\n    >>> ccA = ms.ComparerCollection([cmp1, cmp2])\n    >>> ccB = ms.match(obs=[o1, o2], mod=mr)\n    >>> sk = ccB.skill()\n    >>> ccB[\"Klagshamn\"].plot.timeseries()\n    \"\"\"\n\n    plotter = ComparerCollectionPlotter\n\n    def __init__(self, comparers: Iterable[Comparer]) -> None:\n        self._comparers: Dict[str, Comparer] = {}\n\n        for cmp in comparers:\n            if cmp.name in self._comparers:\n                # comparer with this name already exists!\n                # maybe the user is trying to add a new model\n                # or a new time period\n                self._comparers[cmp.name] += cmp\n            else:\n                self._comparers[cmp.name] = cmp\n\n        self.plot = ComparerCollection.plotter(self)\n        \"\"\"Plot using the ComparerCollectionPlotter\n\n        Examples\n        --------\n        >>> cc.plot.scatter()\n        >>> cc.plot.kde()\n        >>> cc.plot.taylor()\n        >>> cc.plot.hist()\n        \"\"\"\n\n    @property\n    def _name(self) -> str:\n        return \"Observations\"\n\n    @property\n    def _unit_text(self) -> str:\n        # Picking the first one is arbitrary, but it should be the same for all\n        # we could check that they are all the same, but let's assume that they are\n        # for cmp in self:\n        #     if cmp._unit_text != text:\n        #         warnings.warn(f\"Unit text is inconsistent: {text} vs {cmp._unit_text}\")\n        return self[0]._unit_text\n\n    @property\n    def n_comparers(self) -> int:\n        warnings.warn(\n            \"cc.n_comparers is deprecated, use len(cc) instead\",\n            FutureWarning,\n        )\n        return len(self)\n\n    @property\n    def n_points(self) -> int:\n        \"\"\"number of compared points\"\"\"\n        return sum([c.n_points for c in self._comparers.values()])\n\n    @property\n    def start(self) -> pd.Timestamp:\n        warnings.warn(\n            \"start is deprecated, use start_time instead\",\n            FutureWarning,\n        )\n        return self.start_time\n\n    @property\n    def start_time(self) -> pd.Timestamp:\n        \"\"\"start timestamp of compared data\"\"\"\n        starts = [pd.Timestamp.max]\n        for cmp in self._comparers.values():\n            starts.append(cmp.time[0])\n        return min(starts)\n\n    @property\n    def end(self) -> pd.Timestamp:\n        warnings.warn(\n            \"end is deprecated, use end_time instead\",\n            FutureWarning,\n        )\n        return self.end_time\n\n    @property\n    def end_time(self) -> pd.Timestamp:\n        \"\"\"end timestamp of compared data\"\"\"\n        ends = [pd.Timestamp.min]\n        for cmp in self._comparers.values():\n            ends.append(cmp.time[-1])\n        return max(ends)\n\n    @property\n    def obs_names(self) -> List[str]:\n        \"\"\"List of observation names\"\"\"\n        return [c.name for c in self._comparers.values()]\n\n    @property\n    def n_observations(self) -> int:\n        \"\"\"Number of observations (same as len(cc))\"\"\"\n        return len(self)\n\n    @property\n    def mod_names(self) -> List[str]:\n        \"\"\"List of unique model names\"\"\"\n        all_names = [n for cmp in self for n in cmp.mod_names]\n        # preserve order (instead of using set)\n        return list(dict.fromkeys(all_names))\n\n    @property\n    def n_models(self) -> int:\n        \"\"\"Number of unique models\"\"\"\n        return len(self.mod_names)\n\n    @property\n    def aux_names(self) -> List[str]:\n        \"\"\"List of unique auxiliary names\"\"\"\n        all_names = [n for cmp in self for n in cmp.aux_names]\n        # preserve order (instead of using set)\n        return list(dict.fromkeys(all_names))\n\n    @property\n    def quantity_names(self) -> List[str]:\n        \"\"\"List of unique quantity names\"\"\"\n        all_names = [cmp.quantity.name for cmp in self]\n        # preserve order (instead of using set)\n        return list(dict.fromkeys(all_names))\n\n    @property\n    def n_quantities(self) -> int:\n        \"\"\"Number of unique quantities\"\"\"\n        return len(self.quantity_names)\n\n    def __repr__(self) -> str:\n        out = []\n        out.append(\"<ComparerCollection>\")\n        out.append(\"Comparers:\")\n        for index, (key, value) in enumerate(self._comparers.items()):\n            out.append(f\"{index}: {key} - {value.quantity}\")\n        return str.join(\"\\n\", out)\n\n    def rename(self, mapping: Dict[str, str]) -> \"ComparerCollection\":\n        \"\"\"Rename observation, model or auxiliary data variables\n\n        Parameters\n        ----------\n        mapping : dict\n            mapping of old names to new names\n\n        Returns\n        -------\n        ComparerCollection\n\n        Examples\n        --------\n        >>> cc = ms.match([o1, o2], [mr1, mr2])\n        >>> cc.mod_names\n        ['mr1', 'mr2']\n        >>> cc2 = cc.rename({'mr1': 'model1'})\n        >>> cc2.mod_names\n        ['model1', 'mr2']\n        \"\"\"\n        for k in mapping.keys():\n            allowed_keys = self.obs_names + self.mod_names + self.aux_names\n            if k not in allowed_keys:\n                raise KeyError(f\"Unknown key: {k}; must be one of {allowed_keys}\")\n\n        cmps = []\n        for cmp in self._comparers.values():\n            cmps.append(cmp.rename(mapping, errors=\"ignore\"))\n        return ComparerCollection(cmps)\n\n    @overload\n    def __getitem__(self, x: slice | Iterable[Hashable]) -> ComparerCollection: ...\n\n    @overload\n    def __getitem__(self, x: int | Hashable) -> Comparer: ...\n\n    def __getitem__(\n        self, x: int | Hashable | slice | Iterable[Hashable]\n    ) -> Comparer | ComparerCollection:\n        if isinstance(x, str):\n            return self._comparers[x]\n\n        if isinstance(x, slice):\n            idxs = list(range(*x.indices(len(self))))\n            return ComparerCollection([self[i] for i in idxs])\n\n        if isinstance(x, int):\n            name = _get_name(x, self.obs_names)\n            return self._comparers[name]\n\n        if isinstance(x, Iterable):\n            cmps = [self[i] for i in x]\n            return ComparerCollection(cmps)\n\n        raise TypeError(f\"Invalid type for __getitem__: {type(x)}\")\n\n    def __len__(self) -> int:\n        return len(self._comparers)\n\n    def __iter__(self) -> Iterator[Comparer]:\n        return iter(self._comparers.values())\n\n    def copy(self) -> \"ComparerCollection\":\n        return deepcopy(self)\n\n    def __add__(\n        self, other: Union[\"Comparer\", \"ComparerCollection\"]\n    ) -> \"ComparerCollection\":\n        if not isinstance(other, (Comparer, ComparerCollection)):\n            raise TypeError(f\"Cannot add {type(other)} to {type(self)}\")\n\n        if isinstance(other, Comparer):\n            return ComparerCollection([*self, other])\n        elif isinstance(other, ComparerCollection):\n            return ComparerCollection([*self, *other])\n\n    def sel(\n        self,\n        model: Optional[IdxOrNameTypes] = None,\n        observation: Optional[IdxOrNameTypes] = None,\n        quantity: Optional[IdxOrNameTypes] = None,\n        start: Optional[TimeTypes] = None,\n        end: Optional[TimeTypes] = None,\n        time: Optional[TimeTypes] = None,\n        area: Optional[List[float]] = None,\n        variable: Optional[IdxOrNameTypes] = None,  # obsolete\n        **kwargs: Any,\n    ) -> \"ComparerCollection\":\n        \"\"\"Select data based on model, time and/or area.\n\n        Parameters\n        ----------\n        model : str or int or list of str or list of int, optional\n            Model name or index. If None, all models are selected.\n        observation : str or int or list of str or list of int, optional\n            Observation name or index. If None, all observations are selected.\n        quantity : str or int or list of str or list of int, optional\n            Quantity name or index. If None, all quantities are selected.\n        start : str or datetime, optional\n            Start time. If None, all times are selected.\n        end : str or datetime, optional\n            End time. If None, all times are selected.\n        time : str or datetime, optional\n            Time. If None, all times are selected.\n        area : list of float, optional\n            bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.\n        **kwargs\n            Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs\n            e.g. `sel(gtype='track')` or `sel(obs_provider='CMEMS')` if at least\n            one comparer has an entry `obs_provider` with value `CMEMS` in its\n            attrs container. Multiple kwargs are combined with logical AND.\n\n        Returns\n        -------\n        ComparerCollection\n            New ComparerCollection with selected data.\n        \"\"\"\n        if variable is not None:\n            warnings.warn(\n                \"variable is deprecated, use quantity instead\",\n                FutureWarning,\n            )\n            quantity = variable\n        # TODO is this really necessary to do both in ComparerCollection and Comparer?\n        if model is not None:\n            if isinstance(model, (str, int)):\n                models = [model]\n            else:\n                models = list(model)\n            mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]\n        if observation is None:\n            observation = self.obs_names\n        else:\n            observation = [observation] if np.isscalar(observation) else observation  # type: ignore\n            observation = [_get_name(o, self.obs_names) for o in observation]  # type: ignore\n\n        if (quantity is not None) and (self.n_quantities > 1):\n            quantity = [quantity] if np.isscalar(quantity) else quantity  # type: ignore\n            quantity = [_get_name(v, self.quantity_names) for v in quantity]  # type: ignore\n        else:\n            quantity = self.quantity_names\n\n        cmps = []\n        for cmp in self._comparers.values():\n            if cmp.name in observation and cmp.quantity.name in quantity:\n                thismodel = (\n                    [m for m in mod_names if m in cmp.mod_names] if model else None\n                )\n                if (thismodel is not None) and (len(thismodel) == 0):\n                    continue\n                cmpsel = cmp.sel(\n                    model=thismodel,\n                    start=start,\n                    end=end,\n                    time=time,\n                    area=area,\n                )\n                if cmpsel is not None:\n                    # TODO: check if cmpsel is empty\n                    if cmpsel.n_points > 0:\n                        cmps.append(cmpsel)\n        cc = ComparerCollection(cmps)\n\n        if kwargs:\n            cc = cc.filter_by_attrs(**kwargs)\n\n        return cc\n\n    def filter_by_attrs(self, **kwargs: Any) -> \"ComparerCollection\":\n        \"\"\"Filter by comparer attrs similar to xarray.Dataset.filter_by_attrs\n\n        Parameters\n        ----------\n        **kwargs\n            Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs\n            e.g. `sel(gtype='track')` or `sel(obs_provider='CMEMS')` if at least\n            one comparer has an entry `obs_provider` with value `CMEMS` in its\n            attrs container. Multiple kwargs are combined with logical AND.\n\n        Returns\n        -------\n        ComparerCollection\n            New ComparerCollection with selected data.\n\n        Examples\n        --------\n        >>> cc = ms.match([HKNA, EPL, alti], mr)\n        >>> cc.filter_by_attrs(gtype='track')\n        <ComparerCollection>\n        Comparer: alti\n        \"\"\"\n        cmps = []\n        for cmp in self._comparers.values():\n            for k, v in kwargs.items():\n                # TODO: should we also filter on cmp.data.Observation.attrs?\n                if cmp.data.attrs.get(k) != v:\n                    break\n            else:\n                cmps.append(cmp)\n        return ComparerCollection(cmps)\n\n    def query(self, query: str) -> \"ComparerCollection\":\n        \"\"\"Select data based on a query.\n\n        Parameters\n        ----------\n        query : str\n            Query string. See pandas.DataFrame.query() for details.\n\n        Returns\n        -------\n        ComparerCollection\n            New ComparerCollection with selected data.\n        \"\"\"\n        q_cmps = [cmp.query(query) for cmp in self._comparers.values()]\n        cmps_with_data = [cmp for cmp in q_cmps if cmp.n_points > 0]\n\n        return ComparerCollection(cmps_with_data)\n\n    def skill(\n        self,\n        by: str | Iterable[str] | None = None,\n        metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n        observed: bool = False,\n        **kwargs: Any,\n    ) -> SkillTable:\n        \"\"\"Aggregated skill assessment of model(s)\n\n        Parameters\n        ----------\n        by : str or List[str], optional\n            group by, by default [\"model\", \"observation\"]\n\n            - by column name\n            - by temporal bin of the DateTimeIndex via the freq-argument\n            (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n            - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n            syntax 'dt:month'. The dt-argument is different from the freq-argument\n            in that it gives month-of-year rather than month-of-data.\n            - by attributes, stored in the cc.data.attrs container,\n            e.g.: 'attrs:obs_provider' = group by observation provider or\n            'attrs:gtype' = group by geometry type (track or point)\n        metrics : list, optional\n            list of modelskill.metrics (or str), by default modelskill.options.metrics.list\n        observed: bool, optional\n            This only applies if any of the groupers are Categoricals.\n\n            - True: only show observed values for categorical groupers.\n            - False: show all values for categorical groupers.\n\n        Returns\n        -------\n        SkillTable\n            skill assessment as a SkillTable object\n\n        See also\n        --------\n        sel\n            a method for filtering/selecting data\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cc = ms.match([HKNA,EPL,c2], mr)\n        >>> cc.skill().round(2)\n                       n  bias  rmse  urmse   mae    cc    si    r2\n        observation\n        HKNA         385 -0.20  0.35   0.29  0.25  0.97  0.09  0.99\n        EPL           66 -0.08  0.22   0.20  0.18  0.97  0.07  0.99\n        c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n\n        >>> cc.sel(observation='c2', start='2017-10-28').skill().round(2)\n                       n  bias  rmse  urmse   mae    cc    si    r2\n        observation\n        c2            41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n\n        >>> cc.skill(by='freq:D').round(2)\n                      n  bias  rmse  urmse   mae    cc    si    r2\n        2017-10-27  239 -0.15  0.25   0.21  0.20  0.72  0.10  0.98\n        2017-10-28  162 -0.07  0.19   0.18  0.16  0.96  0.06  1.00\n        2017-10-29  163 -0.21  0.52   0.47  0.42  0.79  0.11  0.99\n        \"\"\"\n\n        # TODO remove in v1.1 ----------\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        cc = self.sel(\n            model=model,\n            observation=observation,\n            quantity=variable,\n            start=start,\n            end=end,\n            area=area,\n        )\n        if cc.n_points == 0:\n            raise ValueError(\"Dataset is empty, no data to compare.\")\n\n        ## ---- end of deprecated code ----\n\n        pmetrics = _parse_metric(metrics)\n\n        agg_cols = _parse_groupby(by, n_mod=cc.n_models, n_qnt=cc.n_quantities)\n        agg_cols, attrs_keys = self._attrs_keys_in_by(agg_cols)\n\n        df = cc._to_long_dataframe(attrs_keys=attrs_keys, observed=observed)\n\n        res = _groupby_df(df, by=agg_cols, metrics=pmetrics)\n        mtr_cols = [m.__name__ for m in pmetrics]  # type: ignore\n        res = res.dropna(subset=mtr_cols, how=\"all\")  # TODO: ok to remove empty?\n        res = self._append_xy_to_res(res, cc)\n        res = cc._add_as_col_if_not_in_index(df, skilldf=res)  # type: ignore\n        return SkillTable(res)\n\n    def _to_long_dataframe(\n        self, attrs_keys: Iterable[str] | None = None, observed: bool = False\n    ) -> pd.DataFrame:\n        \"\"\"Return a copy of the data as a long-format pandas DataFrame (for groupby operations)\"\"\"\n        frames = []\n        for cmp in self:\n            frame = cmp._to_long_dataframe(attrs_keys=attrs_keys)\n            if self.n_quantities > 1:\n                frame[\"quantity\"] = cmp.quantity.name\n            frames.append(frame)\n        res = pd.concat(frames)\n\n        cat_cols = res.select_dtypes(include=[\"object\"]).columns\n        res[cat_cols] = res[cat_cols].astype(\"category\")\n\n        if observed:\n            res = res.loc[~(res == False).any(axis=1)]  # noqa\n        return res\n\n    @staticmethod\n    def _attrs_keys_in_by(by: List[str | pd.Grouper]) -> Tuple[List[str], List[str]]:\n        attrs_keys: List[str] = []\n        agg_cols: List[str] = []\n        for b in by:\n            if isinstance(b, str) and b.startswith(\"attrs:\"):\n                key = b.split(\":\")[1]\n                attrs_keys.append(key)\n                agg_cols.append(key)\n            else:\n                agg_cols.append(b)\n        return agg_cols, attrs_keys\n\n    @staticmethod\n    def _append_xy_to_res(res: pd.DataFrame, cc: ComparerCollection) -> pd.DataFrame:\n        \"\"\"skill() helper: Append x and y to res if possible\"\"\"\n        res[\"x\"] = np.nan\n        res[\"y\"] = np.nan\n\n        # for MultiIndex in res find \"observation\" level and\n        # insert x, y if gtype=point for that observation\n        if \"observation\" in res.index.names:\n            idx_names = res.index.names\n            res = res.reset_index()\n            for cmp in cc:\n                if cmp.gtype == \"point\":\n                    res.loc[res.observation == cmp.name, \"x\"] = cmp.x\n                    res.loc[res.observation == cmp.name, \"y\"] = cmp.y\n            res = res.set_index(idx_names)\n        return res\n\n    def _add_as_col_if_not_in_index(\n        self,\n        df: pd.DataFrame,\n        skilldf: pd.DataFrame,\n        fields: List[str] = [\"model\", \"observation\", \"quantity\"],\n    ) -> pd.DataFrame:\n        \"\"\"skill() helper: Add a field to skilldf if unique in df\"\"\"\n        for field in reversed(fields):\n            if (field == \"model\") and (self.n_models <= 1):\n                continue\n            if (field == \"quantity\") and (self.n_quantities <= 1):\n                continue\n            if field not in skilldf.index.names:\n                unames = df[field].unique()\n                if len(unames) == 1:\n                    skilldf.insert(loc=0, column=field, value=unames[0])\n        return skilldf\n\n    def gridded_skill(\n        self,\n        bins: int = 5,\n        binsize: float | None = None,\n        by: str | Iterable[str] | None = None,\n        metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n        n_min: Optional[int] = None,\n        **kwargs: Any,\n    ) -> SkillGrid:\n        \"\"\"Skill assessment of model(s) on a regular spatial grid.\n\n        Parameters\n        ----------\n        bins: int, list of scalars, or IntervalIndex, or tuple of, optional\n            criteria to bin x and y by, argument bins to pd.cut(), default 5\n            define different bins for x and y a tuple\n            e.g.: bins = 5, bins = (5,[2,3,5])\n        binsize : float, optional\n            bin size for x and y dimension, overwrites bins\n            creates bins with reference to round(mean(x)), round(mean(y))\n        by : str, List[str], optional\n            group by, by default [\"model\", \"observation\"]\n\n            - by column name\n            - by temporal bin of the DateTimeIndex via the freq-argument\n            (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n            - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n            syntax 'dt:month'. The dt-argument is different from the freq-argument\n            in that it gives month-of-year rather than month-of-data.\n        metrics : list, optional\n            list of modelskill.metrics, by default modelskill.options.metrics.list\n        n_min : int, optional\n            minimum number of observations in a grid cell;\n            cells with fewer observations get a score of `np.nan`\n\n        Returns\n        -------\n        SkillGrid\n            skill assessment as a SkillGrid object\n\n        See also\n        --------\n        skill\n            a method for aggregated skill assessment\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cc = ms.match([HKNA,EPL,c2], mr)  # with satellite track measurements\n        >>> gs = cc.gridded_skill(metrics='bias')\n        >>> gs.data\n        <xarray.Dataset>\n        Dimensions:      (x: 5, y: 5)\n        Coordinates:\n            observation   'alti'\n        * x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n        * y            (y) float64 50.6 51.66 52.7 53.75 54.8\n        Data variables:\n            n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n            bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n\n        >>> gs = cc.gridded_skill(binsize=0.5)\n        >>> gs.data.coords\n        Coordinates:\n            observation   'alti'\n        * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n        * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n        \"\"\"\n\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        cmp = self.sel(\n            model=model,\n            observation=observation,\n            quantity=variable,\n            start=start,\n            end=end,\n            area=area,\n        )\n\n        if cmp.n_points == 0:\n            raise ValueError(\"Dataset is empty, no data to compare.\")\n\n        ## ---- end of deprecated code ----\n\n        metrics = _parse_metric(metrics)\n\n        df = cmp._to_long_dataframe()\n        df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)\n\n        agg_cols = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=cmp.n_quantities)\n        if \"x\" not in agg_cols:\n            agg_cols.insert(0, \"x\")\n        if \"y\" not in agg_cols:\n            agg_cols.insert(0, \"y\")\n\n        df = df.drop(columns=[\"x\", \"y\"]).rename(columns=dict(xBin=\"x\", yBin=\"y\"))\n        res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)\n        ds = res.to_xarray().squeeze()\n\n        # change categorial index to coordinates\n        for dim in (\"x\", \"y\"):\n            ds[dim] = ds[dim].astype(float)\n        return SkillGrid(ds)\n\n    def mean_skill(\n        self,\n        *,\n        weights: Optional[Union[str, List[float], Dict[str, float]]] = None,\n        metrics: Optional[list] = None,\n        **kwargs: Any,\n    ) -> SkillTable:\n        \"\"\"Weighted mean of skills\n\n        First, the skill is calculated per observation,\n        the weighted mean of the skills is then found.\n\n        Warning: This method is NOT the mean skill of\n        all observational points! (mean_skill_points)\n\n        Parameters\n        ----------\n        weights : str or List(float) or Dict(str, float), optional\n            weighting of observations, by default None\n\n            - None: use observations weight attribute (if assigned, else \"equal\")\n            - \"equal\": giving all observations equal weight,\n            - \"points\": giving all points equal weight,\n            - list of weights e.g. [0.3, 0.3, 0.4] per observation,\n            - dictionary of observations with special weigths, others will be set to 1.0\n        metrics : list, optional\n            list of modelskill.metrics, by default modelskill.options.metrics.list\n\n        Returns\n        -------\n        SkillTable\n            mean skill assessment as a SkillTable object\n\n        See also\n        --------\n        skill\n            skill assessment per observation\n        mean_skill_points\n            skill assessment pooling all observation points together\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cc = ms.match([HKNA,EPL,c2], mod=HKZN_local)\n        >>> cc.mean_skill().round(2)\n                      n  bias  rmse  urmse   mae    cc    si    r2\n        HKZN_local  564 -0.09  0.31   0.28  0.24  0.97  0.09  0.99\n        >>> sk = cc.mean_skill(weights=\"equal\")\n        >>> sk = cc.mean_skill(weights=\"points\")\n        >>> sk = cc.mean_skill(weights={\"EPL\": 2.0}) # more weight on EPL, others=1.0\n        \"\"\"\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        # filter data\n        cc = self.sel(\n            model=model,  # deprecated\n            observation=observation,  # deprecated\n            quantity=variable,  # deprecated\n            start=start,  # deprecated\n            end=end,  # deprecated\n            area=area,  # deprecated\n        )\n        if cc.n_points == 0:\n            raise ValueError(\"Dataset is empty, no data to compare.\")\n\n        ## ---- end of deprecated code ----\n\n        df = cc._to_long_dataframe()  # TODO: remove\n        mod_names = cc.mod_names\n        # obs_names = cmp.obs_names  # df.observation.unique()\n        qnt_names = cc.quantity_names\n\n        # skill assessment\n        pmetrics = _parse_metric(metrics)\n        sk = cc.skill(metrics=pmetrics)\n        if sk is None:\n            return None\n        skilldf = sk.to_dataframe()\n\n        # weights\n        weights = cc._parse_weights(weights, sk.obs_names)\n        skilldf[\"weights\"] = (\n            skilldf.n if weights is None else np.tile(weights, len(mod_names))  # type: ignore\n        )\n\n        def weighted_mean(x: Any) -> Any:\n            return np.average(x, weights=skilldf.loc[x.index, \"weights\"])\n\n        # group by\n        by = cc._mean_skill_by(skilldf, mod_names, qnt_names)  # type: ignore\n        agg = {\"n\": \"sum\"}\n        for metric in pmetrics:  # type: ignore\n            agg[metric.__name__] = weighted_mean  # type: ignore\n        res = skilldf.groupby(by, observed=False).agg(agg)\n\n        # TODO is this correct?\n        res.index.name = \"model\"\n\n        # output\n        res = cc._add_as_col_if_not_in_index(df, res, fields=[\"model\", \"quantity\"])  # type: ignore\n        return SkillTable(res.astype({\"n\": int}))\n\n    # def mean_skill_points(\n    #     self,\n    #     *,\n    #     metrics: Optional[list] = None,\n    #     **kwargs,\n    # ) -> Optional[SkillTable]:  # TODO raise error if no data?\n    #     \"\"\"Mean skill of all observational points\n\n    #     All data points are pooled (disregarding which observation they belong to),\n    #     the skill is then found (for each model).\n\n    #     .. note::\n    #         No weighting can be applied with this method,\n    #         use mean_skill() if you need to apply weighting\n\n    #     .. warning::\n    #         This method is NOT the mean of skills (mean_skill)\n\n    #     Parameters\n    #     ----------\n    #     metrics : list, optional\n    #         list of modelskill.metrics, by default modelskill.options.metrics.list\n\n    #     Returns\n    #     -------\n    #     SkillTable\n    #         mean skill assessment as a skill object\n\n    #     See also\n    #     --------\n    #     skill\n    #         skill assessment per observation\n    #     mean_skill\n    #         weighted mean of skills (not the same as this method)\n\n    #     Examples\n    #     --------\n    #     >>> import modelskill as ms\n    #     >>> cc = ms.match(obs, mod)\n    #     >>> cc.mean_skill_points()\n    #     \"\"\"\n\n    #     # TODO remove in v1.1\n    #     model, start, end, area = _get_deprecated_args(kwargs)\n    #     observation, variable = _get_deprecated_obs_var_args(kwargs)\n    #     assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    #     # filter data\n    #     cmp = self.sel(\n    #         model=model,\n    #         observation=observation,\n    #         variable=variable,\n    #         start=start,\n    #         end=end,\n    #         area=area,\n    #     )\n    #     if cmp.n_points == 0:\n    #         warnings.warn(\"No data!\")\n    #         return None\n\n    #     dfall = cmp.to_dataframe()\n    #     dfall[\"observation\"] = \"all\"\n\n    #     # TODO: no longer possible to do this way\n    #     # return self.skill(df=dfall, metrics=metrics)\n    #     return cmp.skill(metrics=metrics)  # NOT CORRECT - SEE ABOVE\n\n    def _mean_skill_by(self, skilldf, mod_names, qnt_names):  # type: ignore\n        by = []\n        if len(mod_names) > 1:\n            by.append(\"model\")\n        if len(qnt_names) > 1:\n            by.append(\"quantity\")\n        if len(by) == 0:\n            if (self.n_quantities > 1) and (\"quantity\" in skilldf):\n                by.append(\"quantity\")\n            elif \"model\" in skilldf:\n                by.append(\"model\")\n            else:\n                by = [mod_names[0]] * len(skilldf)\n        return by\n\n    def _parse_weights(self, weights: Any, observations: Any) -> Any:\n        if observations is None:\n            observations = self.obs_names\n        else:\n            observations = [observations] if np.isscalar(observations) else observations\n            observations = [_get_name(o, self.obs_names) for o in observations]\n        n_obs = len(observations)\n\n        if weights is None:\n            # get weights from observation objects\n            # default is equal weight to all\n            weights = [self._comparers[o].weight for o in observations]\n        else:\n            if isinstance(weights, int):\n                weights = np.ones(n_obs)  # equal weight to all\n            elif isinstance(weights, dict):\n                w_dict = weights\n                weights = [w_dict.get(name, 1.0) for name in observations]\n\n            elif isinstance(weights, str):\n                if weights.lower() == \"equal\":\n                    weights = np.ones(n_obs)  # equal weight to all\n                elif \"point\" in weights.lower():\n                    weights = None  # no weight => use n_points\n                else:\n                    raise ValueError(\n                        \"unknown weights argument (None, 'equal', 'points', or list of floats)\"\n                    )\n            elif not np.isscalar(weights):\n                if n_obs == 1:\n                    if len(weights) > 1:\n                        warnings.warn(\n                            \"Cannot apply multiple weights to one observation\"\n                        )\n                    weights = [1.0]\n                if not len(weights) == n_obs:\n                    raise ValueError(\n                        f\"weights must have same length as observations: {observations}\"\n                    )\n        if weights is not None:\n            assert len(weights) == n_obs\n        return weights\n\n    def score(\n        self,\n        metric: str | Callable = mtr.rmse,\n        **kwargs: Any,\n    ) -> Dict[str, float]:\n        \"\"\"Weighted mean score of model(s) over all observations\n\n        Wrapping mean_skill() with a single metric.\n\n        NOTE: will take simple mean over different quantities!\n\n        Parameters\n        ----------\n        weights : str or List(float) or Dict(str, float), optional\n            weighting of observations, by default None\n\n            - None: use observations weight attribute (if assigned, else \"equal\")\n            - \"equal\": giving all observations equal weight,\n            - \"points\": giving all points equal weight,\n            - list of weights e.g. [0.3, 0.3, 0.4] per observation,\n            - dictionary of observations with special weigths, others will be set to 1.0\n        metric : list, optional\n            a single metric from modelskill.metrics, by default rmse\n\n        Returns\n        -------\n        Dict[str, float]\n            mean of skills score as a single number (for each model)\n\n        See also\n        --------\n        skill\n            skill assessment per observation\n        mean_skill\n            weighted mean of skills assessment\n        mean_skill_points\n            skill assessment pooling all observation points together\n\n        Examples\n        --------\n        >>> import modelskill as ms\n        >>> cc = ms.match([o1, o2], mod)\n        >>> cc.score()\n        {'mod': 0.30681206}\n        >>> cc.score(weights=[0.1,0.1,0.8])\n        {'mod': 0.3383011631797379}\n\n        >>> cc.score(weights='points', metric=\"mape\")\n        {'mod': 8.414442957854142}\n        \"\"\"\n\n        weights = kwargs.pop(\"weights\", None)\n\n        metric = _parse_metric(metric)[0]\n\n        if weights is None:\n            weights = {c.name: c.weight for c in self._comparers.values()}\n\n        if not (callable(metric) or isinstance(metric, str)):\n            raise ValueError(\"metric must be a string or a function\")\n\n        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n        observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        if model is None:\n            models = self.mod_names\n        else:\n            # TODO: these two lines looks familiar, extract to function\n            models = [model] if np.isscalar(model) else model  # type: ignore\n            models = [_get_name(m, self.mod_names) for m in models]  # type: ignore\n\n        cmp = self.sel(\n            model=models,  # deprecated\n            observation=observation,  # deprecated\n            quantity=variable,  # deprecated\n            start=start,  # deprecated\n            end=end,  # deprecated\n            area=area,  # deprecated\n        )\n\n        if cmp.n_points == 0:\n            raise ValueError(\"Dataset is empty, no data to compare.\")\n\n        ## ---- end of deprecated code ----\n\n        sk = cmp.mean_skill(weights=weights, metrics=[metric])\n        df = sk.to_dataframe()\n\n        metric_name = metric if isinstance(metric, str) else metric.__name__\n        ser = df[metric_name]\n        score = {str(col): float(value) for col, value in ser.items()}\n\n        return score\n\n    def save(self, filename: Union[str, Path]) -> None:\n        \"\"\"Save the ComparerCollection to a zip file.\n\n        Each comparer is stored as a netcdf file in the zip file.\n\n        Parameters\n        ----------\n        filename : str or Path\n            Filename of the zip file.\n\n        Examples\n        --------\n        >>> cc = ms.match(obs, mod)\n        >>> cc.save(\"my_comparer_collection.msk\")\n        \"\"\"\n\n        files = []\n        no = 0\n        for name, cmp in self._comparers.items():\n            cmp_fn = f\"{no}_{name}.nc\"\n            cmp.save(cmp_fn)\n            files.append(cmp_fn)\n            no += 1\n\n        with zipfile.ZipFile(filename, \"w\") as zip:\n            for f in files:\n                zip.write(f)\n                os.remove(f)\n\n    @staticmethod\n    def load(filename: Union[str, Path]) -> \"ComparerCollection\":\n        \"\"\"Load a ComparerCollection from a zip file.\n\n        Parameters\n        ----------\n        filename : str or Path\n            Filename of the zip file.\n\n        Returns\n        -------\n        ComparerCollection\n            The loaded ComparerCollection.\n\n        Examples\n        --------\n        >>> cc = ms.match(obs, mod)\n        >>> cc.save(\"my_comparer_collection.msk\")\n        >>> cc2 = ms.ComparerCollection.load(\"my_comparer_collection.msk\")\n        \"\"\"\n\n        folder = tempfile.TemporaryDirectory().name\n\n        with zipfile.ZipFile(filename, \"r\") as zip:\n            for f in zip.namelist():\n                if f.endswith(\".nc\"):\n                    zip.extract(f, path=folder)\n\n        comparers = [\n            ComparerCollection._load_comparer(folder, f)\n            for f in sorted(os.listdir(folder))\n        ]\n        return ComparerCollection(comparers)\n\n    @staticmethod\n    def _load_comparer(folder: str, f: str) -> Comparer:\n        f = os.path.join(folder, f)\n        cmp = Comparer.load(f)\n        os.remove(f)\n        return cmp\n\n    # =============== Deprecated methods ===============\n\n    def spatial_skill(\n        self,\n        bins=5,\n        binsize=None,\n        by=None,\n        metrics=None,\n        n_min=None,\n        **kwargs,\n    ):\n        warnings.warn(\n            \"spatial_skill is deprecated, use gridded_skill instead\", FutureWarning\n        )\n        return self.gridded_skill(\n            bins=bins,\n            binsize=binsize,\n            by=by,\n            metrics=metrics,\n            n_min=n_min,\n            **kwargs,\n        )\n\n    def scatter(\n        self,\n        *,\n        bins=120,\n        quantiles=None,\n        fit_to_quantiles=False,\n        show_points=None,\n        show_hist=None,\n        show_density=None,\n        backend=\"matplotlib\",\n        figsize=(8, 8),\n        xlim=None,\n        ylim=None,\n        reg_method=\"ols\",\n        title=None,\n        xlabel=None,\n        ylabel=None,\n        skill_table=None,\n        **kwargs,\n    ):\n        warnings.warn(\"scatter is deprecated, use plot.scatter instead\", FutureWarning)\n\n        # TODO remove in v1.1\n        model, start, end, area = _get_deprecated_args(kwargs)\n        observation, variable = _get_deprecated_obs_var_args(kwargs)\n\n        # select model\n        mod_idx = _get_idx(model, self.mod_names)\n        mod_name = self.mod_names[mod_idx]\n\n        # select variable\n        qnt_idx = _get_idx(variable, self.quantity_names)\n        qnt_name = self.quantity_names[qnt_idx]\n\n        # filter data\n        cmp = self.sel(\n            model=mod_name,\n            observation=observation,\n            quantity=qnt_name,\n            start=start,\n            end=end,\n            area=area,\n        )\n\n        return cmp.plot.scatter(\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            skill_table=skill_table,\n            **kwargs,\n        )\n\n    def taylor(\n        self,\n        normalize_std=False,\n        aggregate_observations=True,\n        figsize=(7, 7),\n        marker=\"o\",\n        marker_size=6.0,\n        title=\"Taylor diagram\",\n        **kwargs,\n    ):\n        warnings.warn(\"taylor is deprecated, use plot.taylor instead\", FutureWarning)\n\n        model, start, end, area = _get_deprecated_args(kwargs)\n        observation, variable = _get_deprecated_obs_var_args(kwargs)\n        assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n        cmp = self.sel(\n            model=model,\n            observation=observation,\n            quantity=variable,\n            start=start,\n            end=end,\n            area=area,\n        )\n\n        if cmp.n_points == 0:\n            warnings.warn(\"No data!\")\n            return\n\n        if (not aggregate_observations) and (not normalize_std):\n            raise ValueError(\n                \"aggregate_observations=False is only possible if normalize_std=True!\"\n            )\n\n        metrics = [mtr._std_obs, mtr._std_mod, mtr.cc]\n        skill_func = cmp.mean_skill if aggregate_observations else cmp.skill\n        sk = skill_func(metrics=metrics)\n\n        df = sk.to_dataframe()\n        ref_std = 1.0 if normalize_std else df.iloc[0][\"_std_obs\"]\n\n        if isinstance(df.index, pd.MultiIndex):\n            df.index = df.index.map(\"_\".join)\n\n        df = df[[\"_std_obs\", \"_std_mod\", \"cc\"]].copy()\n        df.columns = [\"obs_std\", \"std\", \"cc\"]\n        pts = [\n            TaylorPoint(\n                r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size\n            )\n            for r in df.itertuples()\n        ]\n\n        taylor_diagram(\n            obs_std=ref_std,\n            points=pts,\n            figsize=figsize,\n            normalize_std=normalize_std,\n            title=title,\n        )\n\n    def kde(self, ax=None, **kwargs):\n        warnings.warn(\"kde is deprecated, use plot.kde instead\", FutureWarning)\n\n        return self.plot.kde(ax=ax, **kwargs)\n\n    def hist(\n        self,\n        model=None,\n        bins=100,\n        title=None,\n        density=True,\n        alpha=0.5,\n        **kwargs,\n    ):\n        warnings.warn(\"hist is deprecated, use plot.hist instead\", FutureWarning)\n\n        return self.plot.hist(\n            model=model, bins=bins, title=title, density=density, alpha=alpha, **kwargs\n        )\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.aux_names","title":"aux_names property","text":"
aux_names\n

List of unique auxiliary names

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.end_time","title":"end_time property","text":"
end_time\n

end timestamp of compared data

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.mod_names","title":"mod_names property","text":"
mod_names\n

List of unique model names

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.n_models","title":"n_models property","text":"
n_models\n

Number of unique models

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.n_observations","title":"n_observations property","text":"
n_observations\n

Number of observations (same as len(cc))

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.n_points","title":"n_points property","text":"
n_points\n

number of compared points

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.n_quantities","title":"n_quantities property","text":"
n_quantities\n

Number of unique quantities

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.obs_names","title":"obs_names property","text":"
obs_names\n

List of observation names

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerCollectionPlotter

Examples:

>>> cc.plot.scatter()\n>>> cc.plot.kde()\n>>> cc.plot.taylor()\n>>> cc.plot.hist()\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.quantity_names","title":"quantity_names property","text":"
quantity_names\n

List of unique quantity names

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.start_time","title":"start_time property","text":"
start_time\n

start timestamp of compared data

"},{"location":"api/comparercollection/#modelskill.ComparerCollection.filter_by_attrs","title":"filter_by_attrs","text":"
filter_by_attrs(**kwargs)\n

Filter by comparer attrs similar to xarray.Dataset.filter_by_attrs

Parameters:

Name Type Description Default **kwargs Any

Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs e.g. sel(gtype='track') or sel(obs_provider='CMEMS') if at least one comparer has an entry obs_provider with value CMEMS in its attrs container. Multiple kwargs are combined with logical AND.

{}

Returns:

Type Description ComparerCollection

New ComparerCollection with selected data.

Examples:

>>> cc = ms.match([HKNA, EPL, alti], mr)\n>>> cc.filter_by_attrs(gtype='track')\n<ComparerCollection>\nComparer: alti\n
Source code in modelskill/comparison/_collection.py
def filter_by_attrs(self, **kwargs: Any) -> \"ComparerCollection\":\n    \"\"\"Filter by comparer attrs similar to xarray.Dataset.filter_by_attrs\n\n    Parameters\n    ----------\n    **kwargs\n        Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs\n        e.g. `sel(gtype='track')` or `sel(obs_provider='CMEMS')` if at least\n        one comparer has an entry `obs_provider` with value `CMEMS` in its\n        attrs container. Multiple kwargs are combined with logical AND.\n\n    Returns\n    -------\n    ComparerCollection\n        New ComparerCollection with selected data.\n\n    Examples\n    --------\n    >>> cc = ms.match([HKNA, EPL, alti], mr)\n    >>> cc.filter_by_attrs(gtype='track')\n    <ComparerCollection>\n    Comparer: alti\n    \"\"\"\n    cmps = []\n    for cmp in self._comparers.values():\n        for k, v in kwargs.items():\n            # TODO: should we also filter on cmp.data.Observation.attrs?\n            if cmp.data.attrs.get(k) != v:\n                break\n        else:\n            cmps.append(cmp)\n    return ComparerCollection(cmps)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.gridded_skill","title":"gridded_skill","text":"
gridded_skill(bins=5, binsize=None, by=None, metrics=None, n_min=None, **kwargs)\n

Skill assessment of model(s) on a regular spatial grid.

Parameters:

Name Type Description Default bins int

criteria to bin x and y by, argument bins to pd.cut(), default 5 define different bins for x and y a tuple e.g.: bins = 5, bins = (5,[2,3,5])

5 binsize float

bin size for x and y dimension, overwrites bins creates bins with reference to round(mean(x)), round(mean(y))

None by (str, List[str])

group by, by default [\"model\", \"observation\"]

  • by column name
  • by temporal bin of the DateTimeIndex via the freq-argument (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily
  • by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the syntax 'dt:month'. The dt-argument is different from the freq-argument in that it gives month-of-year rather than month-of-data.
None metrics list

list of modelskill.metrics, by default modelskill.options.metrics.list

None n_min int

minimum number of observations in a grid cell; cells with fewer observations get a score of np.nan

None

Returns:

Type Description SkillGrid

skill assessment as a SkillGrid object

See also

skill a method for aggregated skill assessment

Examples:

>>> import modelskill as ms\n>>> cc = ms.match([HKNA,EPL,c2], mr)  # with satellite track measurements\n>>> gs = cc.gridded_skill(metrics='bias')\n>>> gs.data\n<xarray.Dataset>\nDimensions:      (x: 5, y: 5)\nCoordinates:\n    observation   'alti'\n* x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n* y            (y) float64 50.6 51.66 52.7 53.75 54.8\nData variables:\n    n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n    bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n
>>> gs = cc.gridded_skill(binsize=0.5)\n>>> gs.data.coords\nCoordinates:\n    observation   'alti'\n* x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n* y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n
Source code in modelskill/comparison/_collection.py
def gridded_skill(\n    self,\n    bins: int = 5,\n    binsize: float | None = None,\n    by: str | Iterable[str] | None = None,\n    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n    n_min: Optional[int] = None,\n    **kwargs: Any,\n) -> SkillGrid:\n    \"\"\"Skill assessment of model(s) on a regular spatial grid.\n\n    Parameters\n    ----------\n    bins: int, list of scalars, or IntervalIndex, or tuple of, optional\n        criteria to bin x and y by, argument bins to pd.cut(), default 5\n        define different bins for x and y a tuple\n        e.g.: bins = 5, bins = (5,[2,3,5])\n    binsize : float, optional\n        bin size for x and y dimension, overwrites bins\n        creates bins with reference to round(mean(x)), round(mean(y))\n    by : str, List[str], optional\n        group by, by default [\"model\", \"observation\"]\n\n        - by column name\n        - by temporal bin of the DateTimeIndex via the freq-argument\n        (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n        - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n        syntax 'dt:month'. The dt-argument is different from the freq-argument\n        in that it gives month-of-year rather than month-of-data.\n    metrics : list, optional\n        list of modelskill.metrics, by default modelskill.options.metrics.list\n    n_min : int, optional\n        minimum number of observations in a grid cell;\n        cells with fewer observations get a score of `np.nan`\n\n    Returns\n    -------\n    SkillGrid\n        skill assessment as a SkillGrid object\n\n    See also\n    --------\n    skill\n        a method for aggregated skill assessment\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.match([HKNA,EPL,c2], mr)  # with satellite track measurements\n    >>> gs = cc.gridded_skill(metrics='bias')\n    >>> gs.data\n    <xarray.Dataset>\n    Dimensions:      (x: 5, y: 5)\n    Coordinates:\n        observation   'alti'\n    * x            (x) float64 -0.436 1.543 3.517 5.492 7.466\n    * y            (y) float64 50.6 51.66 52.7 53.75 54.8\n    Data variables:\n        n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76\n        bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143\n\n    >>> gs = cc.gridded_skill(binsize=0.5)\n    >>> gs.data.coords\n    Coordinates:\n        observation   'alti'\n    * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5\n    * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5\n    \"\"\"\n\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    cmp = self.sel(\n        model=model,\n        observation=observation,\n        quantity=variable,\n        start=start,\n        end=end,\n        area=area,\n    )\n\n    if cmp.n_points == 0:\n        raise ValueError(\"Dataset is empty, no data to compare.\")\n\n    ## ---- end of deprecated code ----\n\n    metrics = _parse_metric(metrics)\n\n    df = cmp._to_long_dataframe()\n    df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)\n\n    agg_cols = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=cmp.n_quantities)\n    if \"x\" not in agg_cols:\n        agg_cols.insert(0, \"x\")\n    if \"y\" not in agg_cols:\n        agg_cols.insert(0, \"y\")\n\n    df = df.drop(columns=[\"x\", \"y\"]).rename(columns=dict(xBin=\"x\", yBin=\"y\"))\n    res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)\n    ds = res.to_xarray().squeeze()\n\n    # change categorial index to coordinates\n    for dim in (\"x\", \"y\"):\n        ds[dim] = ds[dim].astype(float)\n    return SkillGrid(ds)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.load","title":"load staticmethod","text":"
load(filename)\n

Load a ComparerCollection from a zip file.

Parameters:

Name Type Description Default filename str or Path

Filename of the zip file.

required

Returns:

Type Description ComparerCollection

The loaded ComparerCollection.

Examples:

>>> cc = ms.match(obs, mod)\n>>> cc.save(\"my_comparer_collection.msk\")\n>>> cc2 = ms.ComparerCollection.load(\"my_comparer_collection.msk\")\n
Source code in modelskill/comparison/_collection.py
@staticmethod\ndef load(filename: Union[str, Path]) -> \"ComparerCollection\":\n    \"\"\"Load a ComparerCollection from a zip file.\n\n    Parameters\n    ----------\n    filename : str or Path\n        Filename of the zip file.\n\n    Returns\n    -------\n    ComparerCollection\n        The loaded ComparerCollection.\n\n    Examples\n    --------\n    >>> cc = ms.match(obs, mod)\n    >>> cc.save(\"my_comparer_collection.msk\")\n    >>> cc2 = ms.ComparerCollection.load(\"my_comparer_collection.msk\")\n    \"\"\"\n\n    folder = tempfile.TemporaryDirectory().name\n\n    with zipfile.ZipFile(filename, \"r\") as zip:\n        for f in zip.namelist():\n            if f.endswith(\".nc\"):\n                zip.extract(f, path=folder)\n\n    comparers = [\n        ComparerCollection._load_comparer(folder, f)\n        for f in sorted(os.listdir(folder))\n    ]\n    return ComparerCollection(comparers)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.mean_skill","title":"mean_skill","text":"
mean_skill(*, weights=None, metrics=None, **kwargs)\n

Weighted mean of skills

First, the skill is calculated per observation, the weighted mean of the skills is then found.

Warning: This method is NOT the mean skill of all observational points! (mean_skill_points)

Parameters:

Name Type Description Default weights str or List(float) or Dict(str, float)

weighting of observations, by default None

  • None: use observations weight attribute (if assigned, else \"equal\")
  • \"equal\": giving all observations equal weight,
  • \"points\": giving all points equal weight,
  • list of weights e.g. [0.3, 0.3, 0.4] per observation,
  • dictionary of observations with special weigths, others will be set to 1.0
None metrics list

list of modelskill.metrics, by default modelskill.options.metrics.list

None

Returns:

Type Description SkillTable

mean skill assessment as a SkillTable object

See also

skill skill assessment per observation mean_skill_points skill assessment pooling all observation points together

Examples:

>>> import modelskill as ms\n>>> cc = ms.match([HKNA,EPL,c2], mod=HKZN_local)\n>>> cc.mean_skill().round(2)\n              n  bias  rmse  urmse   mae    cc    si    r2\nHKZN_local  564 -0.09  0.31   0.28  0.24  0.97  0.09  0.99\n>>> sk = cc.mean_skill(weights=\"equal\")\n>>> sk = cc.mean_skill(weights=\"points\")\n>>> sk = cc.mean_skill(weights={\"EPL\": 2.0}) # more weight on EPL, others=1.0\n
Source code in modelskill/comparison/_collection.py
def mean_skill(\n    self,\n    *,\n    weights: Optional[Union[str, List[float], Dict[str, float]]] = None,\n    metrics: Optional[list] = None,\n    **kwargs: Any,\n) -> SkillTable:\n    \"\"\"Weighted mean of skills\n\n    First, the skill is calculated per observation,\n    the weighted mean of the skills is then found.\n\n    Warning: This method is NOT the mean skill of\n    all observational points! (mean_skill_points)\n\n    Parameters\n    ----------\n    weights : str or List(float) or Dict(str, float), optional\n        weighting of observations, by default None\n\n        - None: use observations weight attribute (if assigned, else \"equal\")\n        - \"equal\": giving all observations equal weight,\n        - \"points\": giving all points equal weight,\n        - list of weights e.g. [0.3, 0.3, 0.4] per observation,\n        - dictionary of observations with special weigths, others will be set to 1.0\n    metrics : list, optional\n        list of modelskill.metrics, by default modelskill.options.metrics.list\n\n    Returns\n    -------\n    SkillTable\n        mean skill assessment as a SkillTable object\n\n    See also\n    --------\n    skill\n        skill assessment per observation\n    mean_skill_points\n        skill assessment pooling all observation points together\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.match([HKNA,EPL,c2], mod=HKZN_local)\n    >>> cc.mean_skill().round(2)\n                  n  bias  rmse  urmse   mae    cc    si    r2\n    HKZN_local  564 -0.09  0.31   0.28  0.24  0.97  0.09  0.99\n    >>> sk = cc.mean_skill(weights=\"equal\")\n    >>> sk = cc.mean_skill(weights=\"points\")\n    >>> sk = cc.mean_skill(weights={\"EPL\": 2.0}) # more weight on EPL, others=1.0\n    \"\"\"\n\n    # TODO remove in v1.1\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    # filter data\n    cc = self.sel(\n        model=model,  # deprecated\n        observation=observation,  # deprecated\n        quantity=variable,  # deprecated\n        start=start,  # deprecated\n        end=end,  # deprecated\n        area=area,  # deprecated\n    )\n    if cc.n_points == 0:\n        raise ValueError(\"Dataset is empty, no data to compare.\")\n\n    ## ---- end of deprecated code ----\n\n    df = cc._to_long_dataframe()  # TODO: remove\n    mod_names = cc.mod_names\n    # obs_names = cmp.obs_names  # df.observation.unique()\n    qnt_names = cc.quantity_names\n\n    # skill assessment\n    pmetrics = _parse_metric(metrics)\n    sk = cc.skill(metrics=pmetrics)\n    if sk is None:\n        return None\n    skilldf = sk.to_dataframe()\n\n    # weights\n    weights = cc._parse_weights(weights, sk.obs_names)\n    skilldf[\"weights\"] = (\n        skilldf.n if weights is None else np.tile(weights, len(mod_names))  # type: ignore\n    )\n\n    def weighted_mean(x: Any) -> Any:\n        return np.average(x, weights=skilldf.loc[x.index, \"weights\"])\n\n    # group by\n    by = cc._mean_skill_by(skilldf, mod_names, qnt_names)  # type: ignore\n    agg = {\"n\": \"sum\"}\n    for metric in pmetrics:  # type: ignore\n        agg[metric.__name__] = weighted_mean  # type: ignore\n    res = skilldf.groupby(by, observed=False).agg(agg)\n\n    # TODO is this correct?\n    res.index.name = \"model\"\n\n    # output\n    res = cc._add_as_col_if_not_in_index(df, res, fields=[\"model\", \"quantity\"])  # type: ignore\n    return SkillTable(res.astype({\"n\": int}))\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.query","title":"query","text":"
query(query)\n

Select data based on a query.

Parameters:

Name Type Description Default query str

Query string. See pandas.DataFrame.query() for details.

required

Returns:

Type Description ComparerCollection

New ComparerCollection with selected data.

Source code in modelskill/comparison/_collection.py
def query(self, query: str) -> \"ComparerCollection\":\n    \"\"\"Select data based on a query.\n\n    Parameters\n    ----------\n    query : str\n        Query string. See pandas.DataFrame.query() for details.\n\n    Returns\n    -------\n    ComparerCollection\n        New ComparerCollection with selected data.\n    \"\"\"\n    q_cmps = [cmp.query(query) for cmp in self._comparers.values()]\n    cmps_with_data = [cmp for cmp in q_cmps if cmp.n_points > 0]\n\n    return ComparerCollection(cmps_with_data)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.rename","title":"rename","text":"
rename(mapping)\n

Rename observation, model or auxiliary data variables

Parameters:

Name Type Description Default mapping dict

mapping of old names to new names

required

Returns:

Type Description ComparerCollection

Examples:

>>> cc = ms.match([o1, o2], [mr1, mr2])\n>>> cc.mod_names\n['mr1', 'mr2']\n>>> cc2 = cc.rename({'mr1': 'model1'})\n>>> cc2.mod_names\n['model1', 'mr2']\n
Source code in modelskill/comparison/_collection.py
def rename(self, mapping: Dict[str, str]) -> \"ComparerCollection\":\n    \"\"\"Rename observation, model or auxiliary data variables\n\n    Parameters\n    ----------\n    mapping : dict\n        mapping of old names to new names\n\n    Returns\n    -------\n    ComparerCollection\n\n    Examples\n    --------\n    >>> cc = ms.match([o1, o2], [mr1, mr2])\n    >>> cc.mod_names\n    ['mr1', 'mr2']\n    >>> cc2 = cc.rename({'mr1': 'model1'})\n    >>> cc2.mod_names\n    ['model1', 'mr2']\n    \"\"\"\n    for k in mapping.keys():\n        allowed_keys = self.obs_names + self.mod_names + self.aux_names\n        if k not in allowed_keys:\n            raise KeyError(f\"Unknown key: {k}; must be one of {allowed_keys}\")\n\n    cmps = []\n    for cmp in self._comparers.values():\n        cmps.append(cmp.rename(mapping, errors=\"ignore\"))\n    return ComparerCollection(cmps)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.save","title":"save","text":"
save(filename)\n

Save the ComparerCollection to a zip file.

Each comparer is stored as a netcdf file in the zip file.

Parameters:

Name Type Description Default filename str or Path

Filename of the zip file.

required

Examples:

>>> cc = ms.match(obs, mod)\n>>> cc.save(\"my_comparer_collection.msk\")\n
Source code in modelskill/comparison/_collection.py
def save(self, filename: Union[str, Path]) -> None:\n    \"\"\"Save the ComparerCollection to a zip file.\n\n    Each comparer is stored as a netcdf file in the zip file.\n\n    Parameters\n    ----------\n    filename : str or Path\n        Filename of the zip file.\n\n    Examples\n    --------\n    >>> cc = ms.match(obs, mod)\n    >>> cc.save(\"my_comparer_collection.msk\")\n    \"\"\"\n\n    files = []\n    no = 0\n    for name, cmp in self._comparers.items():\n        cmp_fn = f\"{no}_{name}.nc\"\n        cmp.save(cmp_fn)\n        files.append(cmp_fn)\n        no += 1\n\n    with zipfile.ZipFile(filename, \"w\") as zip:\n        for f in files:\n            zip.write(f)\n            os.remove(f)\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.score","title":"score","text":"
score(metric=mtr.rmse, **kwargs)\n

Weighted mean score of model(s) over all observations

Wrapping mean_skill() with a single metric.

NOTE: will take simple mean over different quantities!

Parameters:

Name Type Description Default weights str or List(float) or Dict(str, float)

weighting of observations, by default None

  • None: use observations weight attribute (if assigned, else \"equal\")
  • \"equal\": giving all observations equal weight,
  • \"points\": giving all points equal weight,
  • list of weights e.g. [0.3, 0.3, 0.4] per observation,
  • dictionary of observations with special weigths, others will be set to 1.0
required metric list

a single metric from modelskill.metrics, by default rmse

rmse

Returns:

Type Description Dict[str, float]

mean of skills score as a single number (for each model)

See also

skill skill assessment per observation mean_skill weighted mean of skills assessment mean_skill_points skill assessment pooling all observation points together

Examples:

>>> import modelskill as ms\n>>> cc = ms.match([o1, o2], mod)\n>>> cc.score()\n{'mod': 0.30681206}\n>>> cc.score(weights=[0.1,0.1,0.8])\n{'mod': 0.3383011631797379}\n
>>> cc.score(weights='points', metric=\"mape\")\n{'mod': 8.414442957854142}\n
Source code in modelskill/comparison/_collection.py
def score(\n    self,\n    metric: str | Callable = mtr.rmse,\n    **kwargs: Any,\n) -> Dict[str, float]:\n    \"\"\"Weighted mean score of model(s) over all observations\n\n    Wrapping mean_skill() with a single metric.\n\n    NOTE: will take simple mean over different quantities!\n\n    Parameters\n    ----------\n    weights : str or List(float) or Dict(str, float), optional\n        weighting of observations, by default None\n\n        - None: use observations weight attribute (if assigned, else \"equal\")\n        - \"equal\": giving all observations equal weight,\n        - \"points\": giving all points equal weight,\n        - list of weights e.g. [0.3, 0.3, 0.4] per observation,\n        - dictionary of observations with special weigths, others will be set to 1.0\n    metric : list, optional\n        a single metric from modelskill.metrics, by default rmse\n\n    Returns\n    -------\n    Dict[str, float]\n        mean of skills score as a single number (for each model)\n\n    See also\n    --------\n    skill\n        skill assessment per observation\n    mean_skill\n        weighted mean of skills assessment\n    mean_skill_points\n        skill assessment pooling all observation points together\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.match([o1, o2], mod)\n    >>> cc.score()\n    {'mod': 0.30681206}\n    >>> cc.score(weights=[0.1,0.1,0.8])\n    {'mod': 0.3383011631797379}\n\n    >>> cc.score(weights='points', metric=\"mape\")\n    {'mod': 8.414442957854142}\n    \"\"\"\n\n    weights = kwargs.pop(\"weights\", None)\n\n    metric = _parse_metric(metric)[0]\n\n    if weights is None:\n        weights = {c.name: c.weight for c in self._comparers.values()}\n\n    if not (callable(metric) or isinstance(metric, str)):\n        raise ValueError(\"metric must be a string or a function\")\n\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    if model is None:\n        models = self.mod_names\n    else:\n        # TODO: these two lines looks familiar, extract to function\n        models = [model] if np.isscalar(model) else model  # type: ignore\n        models = [_get_name(m, self.mod_names) for m in models]  # type: ignore\n\n    cmp = self.sel(\n        model=models,  # deprecated\n        observation=observation,  # deprecated\n        quantity=variable,  # deprecated\n        start=start,  # deprecated\n        end=end,  # deprecated\n        area=area,  # deprecated\n    )\n\n    if cmp.n_points == 0:\n        raise ValueError(\"Dataset is empty, no data to compare.\")\n\n    ## ---- end of deprecated code ----\n\n    sk = cmp.mean_skill(weights=weights, metrics=[metric])\n    df = sk.to_dataframe()\n\n    metric_name = metric if isinstance(metric, str) else metric.__name__\n    ser = df[metric_name]\n    score = {str(col): float(value) for col, value in ser.items()}\n\n    return score\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.sel","title":"sel","text":"
sel(model=None, observation=None, quantity=None, start=None, end=None, time=None, area=None, variable=None, **kwargs)\n

Select data based on model, time and/or area.

Parameters:

Name Type Description Default model str or int or list of str or list of int

Model name or index. If None, all models are selected.

None observation str or int or list of str or list of int

Observation name or index. If None, all observations are selected.

None quantity str or int or list of str or list of int

Quantity name or index. If None, all quantities are selected.

None start str or datetime

Start time. If None, all times are selected.

None end str or datetime

End time. If None, all times are selected.

None time str or datetime

Time. If None, all times are selected.

None area list of float

bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.

None **kwargs Any

Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs e.g. sel(gtype='track') or sel(obs_provider='CMEMS') if at least one comparer has an entry obs_provider with value CMEMS in its attrs container. Multiple kwargs are combined with logical AND.

{}

Returns:

Type Description ComparerCollection

New ComparerCollection with selected data.

Source code in modelskill/comparison/_collection.py
def sel(\n    self,\n    model: Optional[IdxOrNameTypes] = None,\n    observation: Optional[IdxOrNameTypes] = None,\n    quantity: Optional[IdxOrNameTypes] = None,\n    start: Optional[TimeTypes] = None,\n    end: Optional[TimeTypes] = None,\n    time: Optional[TimeTypes] = None,\n    area: Optional[List[float]] = None,\n    variable: Optional[IdxOrNameTypes] = None,  # obsolete\n    **kwargs: Any,\n) -> \"ComparerCollection\":\n    \"\"\"Select data based on model, time and/or area.\n\n    Parameters\n    ----------\n    model : str or int or list of str or list of int, optional\n        Model name or index. If None, all models are selected.\n    observation : str or int or list of str or list of int, optional\n        Observation name or index. If None, all observations are selected.\n    quantity : str or int or list of str or list of int, optional\n        Quantity name or index. If None, all quantities are selected.\n    start : str or datetime, optional\n        Start time. If None, all times are selected.\n    end : str or datetime, optional\n        End time. If None, all times are selected.\n    time : str or datetime, optional\n        Time. If None, all times are selected.\n    area : list of float, optional\n        bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.\n    **kwargs\n        Filtering by comparer attrs similar to xarray.Dataset.filter_by_attrs\n        e.g. `sel(gtype='track')` or `sel(obs_provider='CMEMS')` if at least\n        one comparer has an entry `obs_provider` with value `CMEMS` in its\n        attrs container. Multiple kwargs are combined with logical AND.\n\n    Returns\n    -------\n    ComparerCollection\n        New ComparerCollection with selected data.\n    \"\"\"\n    if variable is not None:\n        warnings.warn(\n            \"variable is deprecated, use quantity instead\",\n            FutureWarning,\n        )\n        quantity = variable\n    # TODO is this really necessary to do both in ComparerCollection and Comparer?\n    if model is not None:\n        if isinstance(model, (str, int)):\n            models = [model]\n        else:\n            models = list(model)\n        mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]\n    if observation is None:\n        observation = self.obs_names\n    else:\n        observation = [observation] if np.isscalar(observation) else observation  # type: ignore\n        observation = [_get_name(o, self.obs_names) for o in observation]  # type: ignore\n\n    if (quantity is not None) and (self.n_quantities > 1):\n        quantity = [quantity] if np.isscalar(quantity) else quantity  # type: ignore\n        quantity = [_get_name(v, self.quantity_names) for v in quantity]  # type: ignore\n    else:\n        quantity = self.quantity_names\n\n    cmps = []\n    for cmp in self._comparers.values():\n        if cmp.name in observation and cmp.quantity.name in quantity:\n            thismodel = (\n                [m for m in mod_names if m in cmp.mod_names] if model else None\n            )\n            if (thismodel is not None) and (len(thismodel) == 0):\n                continue\n            cmpsel = cmp.sel(\n                model=thismodel,\n                start=start,\n                end=end,\n                time=time,\n                area=area,\n            )\n            if cmpsel is not None:\n                # TODO: check if cmpsel is empty\n                if cmpsel.n_points > 0:\n                    cmps.append(cmpsel)\n    cc = ComparerCollection(cmps)\n\n    if kwargs:\n        cc = cc.filter_by_attrs(**kwargs)\n\n    return cc\n
"},{"location":"api/comparercollection/#modelskill.ComparerCollection.skill","title":"skill","text":"
skill(by=None, metrics=None, observed=False, **kwargs)\n

Aggregated skill assessment of model(s)

Parameters:

Name Type Description Default by str or List[str]

group by, by default [\"model\", \"observation\"]

  • by column name
  • by temporal bin of the DateTimeIndex via the freq-argument (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily
  • by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the syntax 'dt:month'. The dt-argument is different from the freq-argument in that it gives month-of-year rather than month-of-data.
  • by attributes, stored in the cc.data.attrs container, e.g.: 'attrs:obs_provider' = group by observation provider or 'attrs:gtype' = group by geometry type (track or point)
None metrics list

list of modelskill.metrics (or str), by default modelskill.options.metrics.list

None observed bool

This only applies if any of the groupers are Categoricals.

  • True: only show observed values for categorical groupers.
  • False: show all values for categorical groupers.
False

Returns:

Type Description SkillTable

skill assessment as a SkillTable object

See also

sel a method for filtering/selecting data

Examples:

>>> import modelskill as ms\n>>> cc = ms.match([HKNA,EPL,c2], mr)\n>>> cc.skill().round(2)\n               n  bias  rmse  urmse   mae    cc    si    r2\nobservation\nHKNA         385 -0.20  0.35   0.29  0.25  0.97  0.09  0.99\nEPL           66 -0.08  0.22   0.20  0.18  0.97  0.07  0.99\nc2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n
>>> cc.sel(observation='c2', start='2017-10-28').skill().round(2)\n               n  bias  rmse  urmse   mae    cc    si    r2\nobservation\nc2            41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n
>>> cc.skill(by='freq:D').round(2)\n              n  bias  rmse  urmse   mae    cc    si    r2\n2017-10-27  239 -0.15  0.25   0.21  0.20  0.72  0.10  0.98\n2017-10-28  162 -0.07  0.19   0.18  0.16  0.96  0.06  1.00\n2017-10-29  163 -0.21  0.52   0.47  0.42  0.79  0.11  0.99\n
Source code in modelskill/comparison/_collection.py
def skill(\n    self,\n    by: str | Iterable[str] | None = None,\n    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,\n    observed: bool = False,\n    **kwargs: Any,\n) -> SkillTable:\n    \"\"\"Aggregated skill assessment of model(s)\n\n    Parameters\n    ----------\n    by : str or List[str], optional\n        group by, by default [\"model\", \"observation\"]\n\n        - by column name\n        - by temporal bin of the DateTimeIndex via the freq-argument\n        (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily\n        - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the\n        syntax 'dt:month'. The dt-argument is different from the freq-argument\n        in that it gives month-of-year rather than month-of-data.\n        - by attributes, stored in the cc.data.attrs container,\n        e.g.: 'attrs:obs_provider' = group by observation provider or\n        'attrs:gtype' = group by geometry type (track or point)\n    metrics : list, optional\n        list of modelskill.metrics (or str), by default modelskill.options.metrics.list\n    observed: bool, optional\n        This only applies if any of the groupers are Categoricals.\n\n        - True: only show observed values for categorical groupers.\n        - False: show all values for categorical groupers.\n\n    Returns\n    -------\n    SkillTable\n        skill assessment as a SkillTable object\n\n    See also\n    --------\n    sel\n        a method for filtering/selecting data\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.match([HKNA,EPL,c2], mr)\n    >>> cc.skill().round(2)\n                   n  bias  rmse  urmse   mae    cc    si    r2\n    observation\n    HKNA         385 -0.20  0.35   0.29  0.25  0.97  0.09  0.99\n    EPL           66 -0.08  0.22   0.20  0.18  0.97  0.07  0.99\n    c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99\n\n    >>> cc.sel(observation='c2', start='2017-10-28').skill().round(2)\n                   n  bias  rmse  urmse   mae    cc    si    r2\n    observation\n    c2            41  0.33  0.41   0.25  0.36  0.96  0.06  0.99\n\n    >>> cc.skill(by='freq:D').round(2)\n                  n  bias  rmse  urmse   mae    cc    si    r2\n    2017-10-27  239 -0.15  0.25   0.21  0.20  0.72  0.10  0.98\n    2017-10-28  162 -0.07  0.19   0.18  0.16  0.96  0.06  1.00\n    2017-10-29  163 -0.21  0.52   0.47  0.42  0.79  0.11  0.99\n    \"\"\"\n\n    # TODO remove in v1.1 ----------\n    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore\n    observation, variable = _get_deprecated_obs_var_args(kwargs)  # type: ignore\n    assert kwargs == {}, f\"Unknown keyword arguments: {kwargs}\"\n\n    cc = self.sel(\n        model=model,\n        observation=observation,\n        quantity=variable,\n        start=start,\n        end=end,\n        area=area,\n    )\n    if cc.n_points == 0:\n        raise ValueError(\"Dataset is empty, no data to compare.\")\n\n    ## ---- end of deprecated code ----\n\n    pmetrics = _parse_metric(metrics)\n\n    agg_cols = _parse_groupby(by, n_mod=cc.n_models, n_qnt=cc.n_quantities)\n    agg_cols, attrs_keys = self._attrs_keys_in_by(agg_cols)\n\n    df = cc._to_long_dataframe(attrs_keys=attrs_keys, observed=observed)\n\n    res = _groupby_df(df, by=agg_cols, metrics=pmetrics)\n    mtr_cols = [m.__name__ for m in pmetrics]  # type: ignore\n    res = res.dropna(subset=mtr_cols, how=\"all\")  # TODO: ok to remove empty?\n    res = self._append_xy_to_res(res, cc)\n    res = cc._add_as_col_if_not_in_index(df, skilldf=res)  # type: ignore\n    return SkillTable(res)\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter","title":"modelskill.comparison._collection_plotter.ComparerCollectionPlotter","text":"

Plotter for ComparerCollection

Examples:

>>> cc.plot.scatter()\n>>> cc.plot.hist()\n>>> cc.plot.kde()\n>>> cc.plot.taylor()\n>>> cc.plot.box()\n
Source code in modelskill/comparison/_collection_plotter.py
class ComparerCollectionPlotter:\n    \"\"\"Plotter for ComparerCollection\n\n    Examples\n    --------\n    >>> cc.plot.scatter()\n    >>> cc.plot.hist()\n    >>> cc.plot.kde()\n    >>> cc.plot.taylor()\n    >>> cc.plot.box()\n    \"\"\"\n\n    def __init__(self, cc: ComparerCollection) -> None:\n        self.cc = cc\n        self.is_directional = False\n\n    def __call__(self, *args: Any, **kwds: Any) -> Axes | list[Axes]:\n        return self.scatter(*args, **kwds)\n\n    def scatter(\n        self,\n        *,\n        model=None,\n        bins: int | float = 120,\n        quantiles: int | Sequence[float] | None = None,\n        fit_to_quantiles: bool = False,\n        show_points: bool | int | float | None = None,\n        show_hist: Optional[bool] = None,\n        show_density: Optional[bool] = None,\n        norm: Optional[colors.Normalize] = None,\n        backend: Literal[\"matplotlib\", \"plotly\"] = \"matplotlib\",\n        figsize: Tuple[float, float] = (8, 8),\n        xlim: Optional[Tuple[float, float]] = None,\n        ylim: Optional[Tuple[float, float]] = None,\n        reg_method: str | bool = \"ols\",\n        title: Optional[str] = None,\n        xlabel: Optional[str] = None,\n        ylabel: Optional[str] = None,\n        skill_table: Optional[Union[str, List[str], bool]] = None,\n        ax: Optional[Axes] = None,\n        **kwargs,\n    ) -> Axes | list[Axes]:\n        \"\"\"Scatter plot showing compared data: observation vs modelled\n        Optionally, with density histogram.\n\n        Parameters\n        ----------\n        bins: (int, float, sequence), optional\n            bins for the 2D histogram on the background. By default 20 bins.\n            if int, represents the number of bins of 2D\n            if float, represents the bin size\n            if sequence (list of int or float), represents the bin edges\n        quantiles: (int, sequence), optional\n            number of quantiles for QQ-plot, by default None and will depend\n            on the scatter data length (10, 100 or 1000); if int, this is\n            the number of points; if sequence (list of floats), represents\n            the desired quantiles (from 0 to 1)\n        fit_to_quantiles: bool, optional, by default False\n            by default the regression line is fitted to all data, if True,\n            it is fitted to the quantiles which can be useful to represent\n            the extremes of the distribution, by default False\n        show_points : (bool, int, float), optional\n            Should the scatter points be displayed? None means: show all\n            points if fewer than 1e4, otherwise show 1e4 sample points,\n            by default None. float: fraction of points to show on plot\n            from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int)\n            given, then 'n' points will be displayed, randomly selected\n        show_hist : bool, optional\n            show the data density as a a 2d histogram, by default None\n        show_density: bool, optional\n            show the data density as a colormap of the scatter, by default\n            None. If both `show_density` and `show_hist` are None, then\n            `show_density` is used by default. For binning the data, the\n            kword `bins=Float` is used.\n        norm : matplotlib.colors norm\n            colormap normalization. If None, defaults to\n            matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)\n        backend : str, optional\n            use \"plotly\" (interactive) or \"matplotlib\" backend,\n            by default \"matplotlib\"\n        figsize : tuple, optional\n            width and height of the figure, by default (8, 8)\n        xlim : tuple, optional\n            plot range for the observation (xmin, xmax), by default None\n        ylim : tuple, optional\n            plot range for the model (ymin, ymax), by default None\n        reg_method : str or bool, optional\n            method for determining the regression line\n            \"ols\" : ordinary least squares regression\n            \"odr\" : orthogonal distance regression,\n            False : no regression line,\n            by default \"ols\"\n        title : str, optional\n            plot title, by default None\n        xlabel : str, optional\n            x-label text on plot, by default None\n        ylabel : str, optional\n            y-label text on plot, by default None\n        skill_table : str, List[str], bool, optional\n            list of modelskill.metrics or boolean, if True then by default modelskill.options.metrics.list.\n            This kword adds a box at the right of the scatter plot,\n            by default False\n        ax : matplotlib axes, optional\n            axes to plot on, by default None\n        **kwargs\n            other keyword arguments to matplotlib.pyplot.scatter()\n\n        Examples\n        ------\n        >>> cc.plot.scatter()\n        >>> cc.plot.scatter(bins=0.2, backend='plotly')\n        >>> cc.plot.scatter(show_points=False, title='no points')\n        >>> cc.plot.scatter(xlabel='all observations', ylabel='my model')\n        >>> cc.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n        >>> cc.sel(observations=['c2','HKNA']).plot.scatter()\n        \"\"\"\n\n        cc = self.cc\n        if model is None:\n            mod_names = cc.mod_names\n        else:\n            warnings.warn(\n                \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.scatter()\",\n                FutureWarning,\n            )\n\n            model_list = [model] if isinstance(model, (str, int)) else model\n            mod_names = [\n                self.cc.mod_names[_get_idx(m, self.cc.mod_names)] for m in model_list\n            ]\n\n        axes = []\n        for mod_name in mod_names:\n            ax_mod = self._scatter_one_model(\n                mod_name=mod_name,\n                bins=bins,\n                quantiles=quantiles,\n                fit_to_quantiles=fit_to_quantiles,\n                show_points=show_points,\n                show_hist=show_hist,\n                show_density=show_density,\n                norm=norm,\n                backend=backend,\n                figsize=figsize,\n                xlim=xlim,\n                ylim=ylim,\n                reg_method=reg_method,\n                title=title,\n                xlabel=xlabel,\n                ylabel=ylabel,\n                skill_table=skill_table,\n                ax=ax,\n                **kwargs,\n            )\n            axes.append(ax_mod)\n        return axes[0] if len(axes) == 1 else axes\n\n    def _scatter_one_model(\n        self,\n        *,\n        mod_name: str,\n        bins: int | float,\n        quantiles: int | Sequence[float] | None,\n        fit_to_quantiles: bool,\n        show_points: bool | int | float | None,\n        show_hist: Optional[bool],\n        show_density: Optional[bool],\n        backend: Literal[\"matplotlib\", \"plotly\"],\n        figsize: Tuple[float, float],\n        xlim: Optional[Tuple[float, float]],\n        ylim: Optional[Tuple[float, float]],\n        reg_method: str | bool,\n        title: Optional[str],\n        xlabel: Optional[str],\n        ylabel: Optional[str],\n        skill_table: Optional[Union[str, List[str], bool]],\n        ax,\n        **kwargs,\n    ):\n        assert (\n            mod_name in self.cc.mod_names\n        ), f\"Model {mod_name} not found in collection {self.cc.mod_names}\"\n\n        cc_sel_mod = self.cc.sel(model=mod_name)\n\n        if cc_sel_mod.n_points == 0:\n            raise ValueError(\"No data found in selection\")\n\n        df = cc_sel_mod._to_long_dataframe()\n        x = df.obs_val.values\n        y = df.mod_val.values\n\n        # TODO why the first?\n        unit_text = self.cc[0]._unit_text\n\n        xlabel = xlabel or f\"Observation, {unit_text}\"\n        ylabel = ylabel or f\"Model, {unit_text}\"\n        title = title or f\"{mod_name} vs {cc_sel_mod._name}\"\n\n        skill = None\n        skill_score_unit = None\n        if skill_table:\n            metrics = None if skill_table is True else skill_table\n\n            # TODO why is this here?\n            if isinstance(self, ComparerCollectionPlotter) and len(cc_sel_mod) == 1:\n                skill = cc_sel_mod.skill(metrics=metrics)  # type: ignore\n            else:\n                skill = cc_sel_mod.mean_skill(metrics=metrics)  # type: ignore\n            # TODO improve this\n            try:\n                skill_score_unit = unit_text.split(\"[\")[1].split(\"]\")[0]\n            except IndexError:\n                skill_score_unit = \"\"  # Dimensionless\n\n        if self.is_directional:\n            # hide quantiles and regression line\n            quantiles = 0\n            reg_method = False\n\n        skill_scores = skill.iloc[0].to_dict() if skill is not None else None\n\n        ax = scatter(\n            x=x,\n            y=y,\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            skill_scores=skill_scores,\n            skill_score_unit=skill_score_unit,\n            ax=ax,\n            **kwargs,\n        )\n\n        if backend == \"matplotlib\" and self.is_directional:\n            _xtick_directional(ax, xlim)\n            _ytick_directional(ax, ylim)\n\n        return ax\n\n    def kde(self, *, ax=None, figsize=None, title=None, **kwargs) -> Axes:\n        \"\"\"Plot kernel density estimate of observation and model data.\n\n        Parameters\n        ----------\n        ax : Axes, optional\n            matplotlib axes, by default None\n        figsize : tuple, optional\n            width and height of the figure, by default None\n        title : str, optional\n            plot title, by default None\n        **kwargs\n            passed to pandas.DataFrame.plot.kde()\n\n        Returns\n        -------\n        Axes\n            matplotlib axes\n\n        Examples\n        --------\n        >>> cc.plot.kde()\n        >>> cc.plot.kde(bw_method=0.5)\n        >>> cc.plot.kde(bw_method='silverman')\n\n        \"\"\"\n        _, ax = _get_fig_ax(ax, figsize)\n\n        df = self.cc._to_long_dataframe()\n        ax = df.obs_val.plot.kde(\n            ax=ax, linestyle=\"dashed\", label=\"Observation\", **kwargs\n        )\n\n        for model in self.cc.mod_names:\n            df_model = df[df.model == model]\n            df_model.mod_val.plot.kde(ax=ax, label=model, **kwargs)\n\n        ax.set_xlabel(f\"{self.cc._unit_text}\")\n\n        title = (\n            _default_univarate_title(\"Density plot\", self.cc)\n            if title is None\n            else title\n        )\n        ax.set_title(title)\n        ax.legend()\n\n        # remove y-axis, ticks and label\n        ax.yaxis.set_visible(False)\n        ax.tick_params(axis=\"y\", which=\"both\", length=0)\n        ax.set_ylabel(\"\")\n\n        # remove box around plot\n        ax.spines[\"top\"].set_visible(False)\n        ax.spines[\"right\"].set_visible(False)\n        ax.spines[\"left\"].set_visible(False)\n\n        if self.is_directional:\n            _xtick_directional(ax)\n\n        return ax\n\n    def hist(\n        self,\n        bins: int | Sequence = 100,\n        *,\n        model: str | int | None = None,\n        title: Optional[str] = None,\n        density: bool = True,\n        alpha: float = 0.5,\n        ax=None,\n        figsize: Optional[Tuple[float, float]] = None,\n        **kwargs,\n    ):\n        \"\"\"Plot histogram of specific model and all observations.\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            number of bins, by default 100\n        title : str, optional\n            plot title, default: observation name\n        density: bool, optional\n            If True, draw and return a probability density, by default True\n        alpha : float, optional\n            alpha transparency fraction, by default 0.5\n        ax : matplotlib axes, optional\n            axes to plot on, by default None\n        figsize : tuple, optional\n            width and height of the figure, by default None\n        **kwargs\n            other keyword arguments to df.hist()\n\n        Returns\n        -------\n        matplotlib axes\n\n        Examples\n        --------\n        >>> cc.plot.hist()\n        >>> cc.plot.hist(bins=100)\n\n        See also\n        --------\n        pandas.Series.hist\n        matplotlib.axes.Axes.hist\n        \"\"\"\n        if model is None:\n            mod_names = self.cc.mod_names\n        else:\n            warnings.warn(\n                \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.hist()\",\n                FutureWarning,\n            )\n            model_list = [model] if isinstance(model, (str, int)) else model\n            mod_names = [\n                self.cc.mod_names[_get_idx(m, self.cc.mod_names)] for m in model_list\n            ]\n\n        axes = []\n        for mod_name in mod_names:\n            ax_mod = self._hist_one_model(\n                mod_name=mod_name,\n                bins=bins,\n                title=title,\n                density=density,\n                alpha=alpha,\n                ax=ax,\n                figsize=figsize,\n                **kwargs,\n            )\n            axes.append(ax_mod)\n        return axes[0] if len(axes) == 1 else axes\n\n    def _hist_one_model(\n        self,\n        *,\n        mod_name: str,\n        bins: int | Sequence,\n        title: Optional[str],\n        density: bool,\n        alpha: float,\n        ax,\n        figsize: Optional[Tuple[float, float]],\n        **kwargs,\n    ):\n        from ._comparison import MOD_COLORS\n\n        _, ax = _get_fig_ax(ax, figsize)\n\n        assert (\n            mod_name in self.cc.mod_names\n        ), f\"Model {mod_name} not found in collection\"\n        mod_idx = _get_idx(mod_name, self.cc.mod_names)\n\n        title = (\n            _default_univarate_title(\"Histogram\", self.cc) if title is None else title\n        )\n\n        cmp = self.cc\n        df = cmp._to_long_dataframe()\n        kwargs[\"alpha\"] = alpha\n        kwargs[\"density\"] = density\n        df.mod_val.hist(bins=bins, color=MOD_COLORS[mod_idx], ax=ax, **kwargs)\n        df.obs_val.hist(\n            bins=bins,\n            color=self.cc[0].data[\"Observation\"].attrs[\"color\"],\n            ax=ax,\n            **kwargs,\n        )\n\n        ax.legend([mod_name, \"observations\"])\n        ax.set_title(title)\n        ax.set_xlabel(f\"{self.cc[df.observation.iloc[0]]._unit_text}\")\n\n        if density:\n            ax.set_ylabel(\"density\")\n        else:\n            ax.set_ylabel(\"count\")\n\n        if self.is_directional:\n            _xtick_directional(ax)\n\n        return ax\n\n    def taylor(\n        self,\n        *,\n        normalize_std: bool = False,\n        aggregate_observations: bool = True,\n        figsize: Tuple[float, float] = (7, 7),\n        marker: str = \"o\",\n        marker_size: float = 6.0,\n        title: str = \"Taylor diagram\",\n    ):\n        \"\"\"Taylor diagram showing model std and correlation to observation\n        in a single-quadrant polar plot, with r=std and theta=arccos(cc).\n\n        Parameters\n        ----------\n        normalize_std : bool, optional\n            plot model std normalized with observation std, default False\n        aggregate_observations : bool, optional\n            should multiple observations be aggregated before plotting\n            (or shown individually), default True\n        figsize : tuple, optional\n            width and height of the figure (should be square), by default (7, 7)\n        marker : str, optional\n            marker type e.g. \"x\", \"*\", by default \"o\"\n        marker_size : float, optional\n            size of the marker, by default 6\n        title : str, optional\n            title of the plot, by default \"Taylor diagram\"\n\n        Returns\n        -------\n        matplotlib.figure.Figure\n\n        Examples\n        ------\n        >>> cc.plot.taylor()\n        >>> cc.plot.taylor(observation=\"c2\")\n        >>> cc.plot.taylor(start=\"2017-10-28\", figsize=(5,5))\n\n        References\n        ----------\n        Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin <yannick.copin@laposte.net>\n        \"\"\"\n\n        if (not aggregate_observations) and (not normalize_std):\n            raise ValueError(\n                \"aggregate_observations=False is only possible if normalize_std=True!\"\n            )\n\n        metrics = [mtr._std_obs, mtr._std_mod, mtr.cc]\n        skill_func = self.cc.mean_skill if aggregate_observations else self.cc.skill\n        sk = skill_func(\n            metrics=metrics,  # type: ignore\n        )\n        if sk is None:\n            return\n\n        df = sk.to_dataframe()\n        ref_std = 1.0 if normalize_std else df.iloc[0][\"_std_obs\"]\n\n        if isinstance(df.index, pd.MultiIndex):\n            df.index = df.index.map(\"_\".join)\n\n        df = df[[\"_std_obs\", \"_std_mod\", \"cc\"]].copy()\n        df.columns = [\"obs_std\", \"std\", \"cc\"]\n        pts = [\n            TaylorPoint(\n                r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size\n            )\n            for r in df.itertuples()\n        ]\n\n        return taylor_diagram(\n            obs_std=ref_std,\n            points=pts,\n            figsize=figsize,\n            normalize_std=normalize_std,\n            title=title,\n        )\n\n    def box(self, *, ax=None, figsize=None, title=None, **kwargs) -> Axes:\n        \"\"\"Plot box plot of observations and model data.\n\n        Parameters\n        ----------\n        ax : Axes, optional\n            matplotlib axes, by default None\n        figsize : tuple, optional\n            width and height of the figure, by default None\n        title : str, optional\n            plot title, by default None\n        **kwargs\n            passed to pandas.DataFrame.plot.box()\n\n        Returns\n        -------\n        Axes\n            matplotlib axes\n\n        Examples\n        --------\n        >>> cc.plot.box()\n        >>> cc.plot.box(showmeans=True)\n        >>> cc.plot.box(ax=ax, title=\"Box plot\")\n        \"\"\"\n        _, ax = _get_fig_ax(ax, figsize)\n\n        df = self.cc._to_long_dataframe()\n\n        unique_obs_cols = [\"time\", \"x\", \"y\", \"observation\"]\n        df = df.set_index(unique_obs_cols)\n        unique_obs_values = df[~df.duplicated()].obs_val.values\n\n        data = {\"Observation\": unique_obs_values}\n        for model in df.model.unique():\n            df_model = df[df.model == model]\n            data[model] = df_model.mod_val.values\n\n        data = {k: pd.Series(v) for k, v in data.items()}\n        df = pd.DataFrame(data)\n\n        if \"grid\" not in kwargs:\n            kwargs[\"grid\"] = True\n\n        ax = df.plot.box(ax=ax, **kwargs)\n\n        ax.set_ylabel(f\"{self.cc._unit_text}\")\n\n        title = (\n            _default_univarate_title(\"Box plot\", self.cc) if title is None else title\n        )\n        ax.set_title(title)\n\n        if self.is_directional:\n            _ytick_directional(ax)\n\n        return ax\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.box","title":"box","text":"
box(*, ax=None, figsize=None, title=None, **kwargs)\n

Plot box plot of observations and model data.

Parameters:

Name Type Description Default ax Axes

matplotlib axes, by default None

None figsize tuple

width and height of the figure, by default None

None title str

plot title, by default None

None **kwargs

passed to pandas.DataFrame.plot.box()

{}

Returns:

Type Description Axes

matplotlib axes

Examples:

>>> cc.plot.box()\n>>> cc.plot.box(showmeans=True)\n>>> cc.plot.box(ax=ax, title=\"Box plot\")\n
Source code in modelskill/comparison/_collection_plotter.py
def box(self, *, ax=None, figsize=None, title=None, **kwargs) -> Axes:\n    \"\"\"Plot box plot of observations and model data.\n\n    Parameters\n    ----------\n    ax : Axes, optional\n        matplotlib axes, by default None\n    figsize : tuple, optional\n        width and height of the figure, by default None\n    title : str, optional\n        plot title, by default None\n    **kwargs\n        passed to pandas.DataFrame.plot.box()\n\n    Returns\n    -------\n    Axes\n        matplotlib axes\n\n    Examples\n    --------\n    >>> cc.plot.box()\n    >>> cc.plot.box(showmeans=True)\n    >>> cc.plot.box(ax=ax, title=\"Box plot\")\n    \"\"\"\n    _, ax = _get_fig_ax(ax, figsize)\n\n    df = self.cc._to_long_dataframe()\n\n    unique_obs_cols = [\"time\", \"x\", \"y\", \"observation\"]\n    df = df.set_index(unique_obs_cols)\n    unique_obs_values = df[~df.duplicated()].obs_val.values\n\n    data = {\"Observation\": unique_obs_values}\n    for model in df.model.unique():\n        df_model = df[df.model == model]\n        data[model] = df_model.mod_val.values\n\n    data = {k: pd.Series(v) for k, v in data.items()}\n    df = pd.DataFrame(data)\n\n    if \"grid\" not in kwargs:\n        kwargs[\"grid\"] = True\n\n    ax = df.plot.box(ax=ax, **kwargs)\n\n    ax.set_ylabel(f\"{self.cc._unit_text}\")\n\n    title = (\n        _default_univarate_title(\"Box plot\", self.cc) if title is None else title\n    )\n    ax.set_title(title)\n\n    if self.is_directional:\n        _ytick_directional(ax)\n\n    return ax\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.hist","title":"hist","text":"
hist(bins=100, *, model=None, title=None, density=True, alpha=0.5, ax=None, figsize=None, **kwargs)\n

Plot histogram of specific model and all observations.

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

number of bins, by default 100

100 title str

plot title, default: observation name

None density bool

If True, draw and return a probability density, by default True

True alpha float

alpha transparency fraction, by default 0.5

0.5 ax matplotlib axes

axes to plot on, by default None

None figsize tuple

width and height of the figure, by default None

None **kwargs

other keyword arguments to df.hist()

{}

Returns:

Type Description matplotlib axes

Examples:

>>> cc.plot.hist()\n>>> cc.plot.hist(bins=100)\n
See also

pandas.Series.hist matplotlib.axes.Axes.hist

Source code in modelskill/comparison/_collection_plotter.py
def hist(\n    self,\n    bins: int | Sequence = 100,\n    *,\n    model: str | int | None = None,\n    title: Optional[str] = None,\n    density: bool = True,\n    alpha: float = 0.5,\n    ax=None,\n    figsize: Optional[Tuple[float, float]] = None,\n    **kwargs,\n):\n    \"\"\"Plot histogram of specific model and all observations.\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        number of bins, by default 100\n    title : str, optional\n        plot title, default: observation name\n    density: bool, optional\n        If True, draw and return a probability density, by default True\n    alpha : float, optional\n        alpha transparency fraction, by default 0.5\n    ax : matplotlib axes, optional\n        axes to plot on, by default None\n    figsize : tuple, optional\n        width and height of the figure, by default None\n    **kwargs\n        other keyword arguments to df.hist()\n\n    Returns\n    -------\n    matplotlib axes\n\n    Examples\n    --------\n    >>> cc.plot.hist()\n    >>> cc.plot.hist(bins=100)\n\n    See also\n    --------\n    pandas.Series.hist\n    matplotlib.axes.Axes.hist\n    \"\"\"\n    if model is None:\n        mod_names = self.cc.mod_names\n    else:\n        warnings.warn(\n            \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.hist()\",\n            FutureWarning,\n        )\n        model_list = [model] if isinstance(model, (str, int)) else model\n        mod_names = [\n            self.cc.mod_names[_get_idx(m, self.cc.mod_names)] for m in model_list\n        ]\n\n    axes = []\n    for mod_name in mod_names:\n        ax_mod = self._hist_one_model(\n            mod_name=mod_name,\n            bins=bins,\n            title=title,\n            density=density,\n            alpha=alpha,\n            ax=ax,\n            figsize=figsize,\n            **kwargs,\n        )\n        axes.append(ax_mod)\n    return axes[0] if len(axes) == 1 else axes\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.kde","title":"kde","text":"
kde(*, ax=None, figsize=None, title=None, **kwargs)\n

Plot kernel density estimate of observation and model data.

Parameters:

Name Type Description Default ax Axes

matplotlib axes, by default None

None figsize tuple

width and height of the figure, by default None

None title str

plot title, by default None

None **kwargs

passed to pandas.DataFrame.plot.kde()

{}

Returns:

Type Description Axes

matplotlib axes

Examples:

>>> cc.plot.kde()\n>>> cc.plot.kde(bw_method=0.5)\n>>> cc.plot.kde(bw_method='silverman')\n
Source code in modelskill/comparison/_collection_plotter.py
def kde(self, *, ax=None, figsize=None, title=None, **kwargs) -> Axes:\n    \"\"\"Plot kernel density estimate of observation and model data.\n\n    Parameters\n    ----------\n    ax : Axes, optional\n        matplotlib axes, by default None\n    figsize : tuple, optional\n        width and height of the figure, by default None\n    title : str, optional\n        plot title, by default None\n    **kwargs\n        passed to pandas.DataFrame.plot.kde()\n\n    Returns\n    -------\n    Axes\n        matplotlib axes\n\n    Examples\n    --------\n    >>> cc.plot.kde()\n    >>> cc.plot.kde(bw_method=0.5)\n    >>> cc.plot.kde(bw_method='silverman')\n\n    \"\"\"\n    _, ax = _get_fig_ax(ax, figsize)\n\n    df = self.cc._to_long_dataframe()\n    ax = df.obs_val.plot.kde(\n        ax=ax, linestyle=\"dashed\", label=\"Observation\", **kwargs\n    )\n\n    for model in self.cc.mod_names:\n        df_model = df[df.model == model]\n        df_model.mod_val.plot.kde(ax=ax, label=model, **kwargs)\n\n    ax.set_xlabel(f\"{self.cc._unit_text}\")\n\n    title = (\n        _default_univarate_title(\"Density plot\", self.cc)\n        if title is None\n        else title\n    )\n    ax.set_title(title)\n    ax.legend()\n\n    # remove y-axis, ticks and label\n    ax.yaxis.set_visible(False)\n    ax.tick_params(axis=\"y\", which=\"both\", length=0)\n    ax.set_ylabel(\"\")\n\n    # remove box around plot\n    ax.spines[\"top\"].set_visible(False)\n    ax.spines[\"right\"].set_visible(False)\n    ax.spines[\"left\"].set_visible(False)\n\n    if self.is_directional:\n        _xtick_directional(ax)\n\n    return ax\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.scatter","title":"scatter","text":"
scatter(*, model=None, bins=120, quantiles=None, fit_to_quantiles=False, show_points=None, show_hist=None, show_density=None, norm=None, backend='matplotlib', figsize=(8, 8), xlim=None, ylim=None, reg_method='ols', title=None, xlabel=None, ylabel=None, skill_table=None, ax=None, **kwargs)\n

Scatter plot showing compared data: observation vs modelled Optionally, with density histogram.

Parameters:

Name Type Description Default bins int | float

bins for the 2D histogram on the background. By default 20 bins. if int, represents the number of bins of 2D if float, represents the bin size if sequence (list of int or float), represents the bin edges

120 quantiles int | Sequence[float] | None

number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000); if int, this is the number of points; if sequence (list of floats), represents the desired quantiles (from 0 to 1)

None fit_to_quantiles bool

by default the regression line is fitted to all data, if True, it is fitted to the quantiles which can be useful to represent the extremes of the distribution, by default False

False show_points (bool, int, float)

Should the scatter points be displayed? None means: show all points if fewer than 1e4, otherwise show 1e4 sample points, by default None. float: fraction of points to show on plot from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int) given, then 'n' points will be displayed, randomly selected

None show_hist bool

show the data density as a a 2d histogram, by default None

None show_density Optional[bool]

show the data density as a colormap of the scatter, by default None. If both show_density and show_hist are None, then show_density is used by default. For binning the data, the kword bins=Float is used.

None norm matplotlib.colors norm

colormap normalization. If None, defaults to matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)

None backend str

use \"plotly\" (interactive) or \"matplotlib\" backend, by default \"matplotlib\"

'matplotlib' figsize tuple

width and height of the figure, by default (8, 8)

(8, 8) xlim tuple

plot range for the observation (xmin, xmax), by default None

None ylim tuple

plot range for the model (ymin, ymax), by default None

None reg_method str or bool

method for determining the regression line \"ols\" : ordinary least squares regression \"odr\" : orthogonal distance regression, False : no regression line, by default \"ols\"

'ols' title str

plot title, by default None

None xlabel str

x-label text on plot, by default None

None ylabel str

y-label text on plot, by default None

None skill_table (str, List[str], bool)

list of modelskill.metrics or boolean, if True then by default modelskill.options.metrics.list. This kword adds a box at the right of the scatter plot, by default False

None ax matplotlib axes

axes to plot on, by default None

None **kwargs

other keyword arguments to matplotlib.pyplot.scatter()

{}

Examples:

>>> cc.plot.scatter()\n>>> cc.plot.scatter(bins=0.2, backend='plotly')\n>>> cc.plot.scatter(show_points=False, title='no points')\n>>> cc.plot.scatter(xlabel='all observations', ylabel='my model')\n>>> cc.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n>>> cc.sel(observations=['c2','HKNA']).plot.scatter()\n
Source code in modelskill/comparison/_collection_plotter.py
def scatter(\n    self,\n    *,\n    model=None,\n    bins: int | float = 120,\n    quantiles: int | Sequence[float] | None = None,\n    fit_to_quantiles: bool = False,\n    show_points: bool | int | float | None = None,\n    show_hist: Optional[bool] = None,\n    show_density: Optional[bool] = None,\n    norm: Optional[colors.Normalize] = None,\n    backend: Literal[\"matplotlib\", \"plotly\"] = \"matplotlib\",\n    figsize: Tuple[float, float] = (8, 8),\n    xlim: Optional[Tuple[float, float]] = None,\n    ylim: Optional[Tuple[float, float]] = None,\n    reg_method: str | bool = \"ols\",\n    title: Optional[str] = None,\n    xlabel: Optional[str] = None,\n    ylabel: Optional[str] = None,\n    skill_table: Optional[Union[str, List[str], bool]] = None,\n    ax: Optional[Axes] = None,\n    **kwargs,\n) -> Axes | list[Axes]:\n    \"\"\"Scatter plot showing compared data: observation vs modelled\n    Optionally, with density histogram.\n\n    Parameters\n    ----------\n    bins: (int, float, sequence), optional\n        bins for the 2D histogram on the background. By default 20 bins.\n        if int, represents the number of bins of 2D\n        if float, represents the bin size\n        if sequence (list of int or float), represents the bin edges\n    quantiles: (int, sequence), optional\n        number of quantiles for QQ-plot, by default None and will depend\n        on the scatter data length (10, 100 or 1000); if int, this is\n        the number of points; if sequence (list of floats), represents\n        the desired quantiles (from 0 to 1)\n    fit_to_quantiles: bool, optional, by default False\n        by default the regression line is fitted to all data, if True,\n        it is fitted to the quantiles which can be useful to represent\n        the extremes of the distribution, by default False\n    show_points : (bool, int, float), optional\n        Should the scatter points be displayed? None means: show all\n        points if fewer than 1e4, otherwise show 1e4 sample points,\n        by default None. float: fraction of points to show on plot\n        from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int)\n        given, then 'n' points will be displayed, randomly selected\n    show_hist : bool, optional\n        show the data density as a a 2d histogram, by default None\n    show_density: bool, optional\n        show the data density as a colormap of the scatter, by default\n        None. If both `show_density` and `show_hist` are None, then\n        `show_density` is used by default. For binning the data, the\n        kword `bins=Float` is used.\n    norm : matplotlib.colors norm\n        colormap normalization. If None, defaults to\n        matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)\n    backend : str, optional\n        use \"plotly\" (interactive) or \"matplotlib\" backend,\n        by default \"matplotlib\"\n    figsize : tuple, optional\n        width and height of the figure, by default (8, 8)\n    xlim : tuple, optional\n        plot range for the observation (xmin, xmax), by default None\n    ylim : tuple, optional\n        plot range for the model (ymin, ymax), by default None\n    reg_method : str or bool, optional\n        method for determining the regression line\n        \"ols\" : ordinary least squares regression\n        \"odr\" : orthogonal distance regression,\n        False : no regression line,\n        by default \"ols\"\n    title : str, optional\n        plot title, by default None\n    xlabel : str, optional\n        x-label text on plot, by default None\n    ylabel : str, optional\n        y-label text on plot, by default None\n    skill_table : str, List[str], bool, optional\n        list of modelskill.metrics or boolean, if True then by default modelskill.options.metrics.list.\n        This kword adds a box at the right of the scatter plot,\n        by default False\n    ax : matplotlib axes, optional\n        axes to plot on, by default None\n    **kwargs\n        other keyword arguments to matplotlib.pyplot.scatter()\n\n    Examples\n    ------\n    >>> cc.plot.scatter()\n    >>> cc.plot.scatter(bins=0.2, backend='plotly')\n    >>> cc.plot.scatter(show_points=False, title='no points')\n    >>> cc.plot.scatter(xlabel='all observations', ylabel='my model')\n    >>> cc.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))\n    >>> cc.sel(observations=['c2','HKNA']).plot.scatter()\n    \"\"\"\n\n    cc = self.cc\n    if model is None:\n        mod_names = cc.mod_names\n    else:\n        warnings.warn(\n            \"The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.scatter()\",\n            FutureWarning,\n        )\n\n        model_list = [model] if isinstance(model, (str, int)) else model\n        mod_names = [\n            self.cc.mod_names[_get_idx(m, self.cc.mod_names)] for m in model_list\n        ]\n\n    axes = []\n    for mod_name in mod_names:\n        ax_mod = self._scatter_one_model(\n            mod_name=mod_name,\n            bins=bins,\n            quantiles=quantiles,\n            fit_to_quantiles=fit_to_quantiles,\n            show_points=show_points,\n            show_hist=show_hist,\n            show_density=show_density,\n            norm=norm,\n            backend=backend,\n            figsize=figsize,\n            xlim=xlim,\n            ylim=ylim,\n            reg_method=reg_method,\n            title=title,\n            xlabel=xlabel,\n            ylabel=ylabel,\n            skill_table=skill_table,\n            ax=ax,\n            **kwargs,\n        )\n        axes.append(ax_mod)\n    return axes[0] if len(axes) == 1 else axes\n
"},{"location":"api/comparercollection/#modelskill.comparison._collection_plotter.ComparerCollectionPlotter.taylor","title":"taylor","text":"
taylor(*, normalize_std=False, aggregate_observations=True, figsize=(7, 7), marker='o', marker_size=6.0, title='Taylor diagram')\n

Taylor diagram showing model std and correlation to observation in a single-quadrant polar plot, with r=std and theta=arccos(cc).

Parameters:

Name Type Description Default normalize_std bool

plot model std normalized with observation std, default False

False aggregate_observations bool

should multiple observations be aggregated before plotting (or shown individually), default True

True figsize tuple

width and height of the figure (should be square), by default (7, 7)

(7, 7) marker str

marker type e.g. \"x\", \"*\", by default \"o\"

'o' marker_size float

size of the marker, by default 6

6.0 title str

title of the plot, by default \"Taylor diagram\"

'Taylor diagram'

Returns:

Type Description Figure

Examples:

>>> cc.plot.taylor()\n>>> cc.plot.taylor(observation=\"c2\")\n>>> cc.plot.taylor(start=\"2017-10-28\", figsize=(5,5))\n
References

Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin yannick.copin@laposte.net

Source code in modelskill/comparison/_collection_plotter.py
def taylor(\n    self,\n    *,\n    normalize_std: bool = False,\n    aggregate_observations: bool = True,\n    figsize: Tuple[float, float] = (7, 7),\n    marker: str = \"o\",\n    marker_size: float = 6.0,\n    title: str = \"Taylor diagram\",\n):\n    \"\"\"Taylor diagram showing model std and correlation to observation\n    in a single-quadrant polar plot, with r=std and theta=arccos(cc).\n\n    Parameters\n    ----------\n    normalize_std : bool, optional\n        plot model std normalized with observation std, default False\n    aggregate_observations : bool, optional\n        should multiple observations be aggregated before plotting\n        (or shown individually), default True\n    figsize : tuple, optional\n        width and height of the figure (should be square), by default (7, 7)\n    marker : str, optional\n        marker type e.g. \"x\", \"*\", by default \"o\"\n    marker_size : float, optional\n        size of the marker, by default 6\n    title : str, optional\n        title of the plot, by default \"Taylor diagram\"\n\n    Returns\n    -------\n    matplotlib.figure.Figure\n\n    Examples\n    ------\n    >>> cc.plot.taylor()\n    >>> cc.plot.taylor(observation=\"c2\")\n    >>> cc.plot.taylor(start=\"2017-10-28\", figsize=(5,5))\n\n    References\n    ----------\n    Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin <yannick.copin@laposte.net>\n    \"\"\"\n\n    if (not aggregate_observations) and (not normalize_std):\n        raise ValueError(\n            \"aggregate_observations=False is only possible if normalize_std=True!\"\n        )\n\n    metrics = [mtr._std_obs, mtr._std_mod, mtr.cc]\n    skill_func = self.cc.mean_skill if aggregate_observations else self.cc.skill\n    sk = skill_func(\n        metrics=metrics,  # type: ignore\n    )\n    if sk is None:\n        return\n\n    df = sk.to_dataframe()\n    ref_std = 1.0 if normalize_std else df.iloc[0][\"_std_obs\"]\n\n    if isinstance(df.index, pd.MultiIndex):\n        df.index = df.index.map(\"_\".join)\n\n    df = df[[\"_std_obs\", \"_std_mod\", \"cc\"]].copy()\n    df.columns = [\"obs_std\", \"std\", \"cc\"]\n    pts = [\n        TaylorPoint(\n            r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size\n        )\n        for r in df.itertuples()\n    ]\n\n    return taylor_diagram(\n        obs_std=ref_std,\n        points=pts,\n        figsize=figsize,\n        normalize_std=normalize_std,\n        title=title,\n    )\n
"},{"location":"api/gridded_skill/","title":"Gridded Skill","text":""},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid","title":"modelskill.skill_grid.SkillGrid","text":"

Bases: SkillGridMixin

Gridded skill object for analysis and visualization of spatially gridded skill data. The object wraps the xr.DataSet class which can be accessed from the attribute data.

The object contains one or more \"arrays\" of skill metrics, each corresponding to a single metric (e.g. bias, rmse, r2). The arrays are indexed by the metric name, e.g. ss[\"bias\"] or ss.bias.

Examples:

>>> gs = cc.gridded_skill()\n>>> gs.metrics\n['n', 'bias', 'rmse', 'urmse', 'mae', 'cc', 'si', 'r2']\n
>>> gs.mod_names\n['SW_1', 'SW_2']\n
>>> gs.sel(model='SW_1').rmse.plot()\n
Source code in modelskill/skill_grid.py
class SkillGrid(SkillGridMixin):\n    \"\"\"\n    Gridded skill object for analysis and visualization of spatially\n    gridded skill data. The object wraps the xr.DataSet class\n    which can be accessed from the attribute data.\n\n    The object contains one or more \"arrays\" of skill metrics, each\n    corresponding to a single metric (e.g. bias, rmse, r2). The arrays\n    are indexed by the metric name, e.g. `ss[\"bias\"]` or `ss.bias`.\n\n    Examples\n    --------\n    >>> gs = cc.gridded_skill()\n    >>> gs.metrics\n    ['n', 'bias', 'rmse', 'urmse', 'mae', 'cc', 'si', 'r2']\n\n    >>> gs.mod_names\n    ['SW_1', 'SW_2']\n\n    >>> gs.sel(model='SW_1').rmse.plot()\n    \"\"\"\n\n    def __init__(self, data: xr.Dataset) -> None:\n        # TODO: add type and unit info; add domain to plot outline on map\n        self.data = data\n        self._set_attrs()\n\n    @property\n    def metrics(self) -> list[str]:\n        \"\"\"List of metrics (=data vars)\"\"\"\n        return list(self.data.data_vars)\n\n    def __repr__(self) -> str:\n        out = [\n            \"<SkillGrid>\",\n            f\"Dimensions: (x: {len(self.x)}, y: {len(self.y)})\",\n        ]\n        return \"\\n\".join(out)\n\n    @overload\n    def __getitem__(self, key: Hashable) -> SkillGridArray: ...\n\n    @overload\n    def __getitem__(self, key: Iterable[Hashable]) -> SkillGrid: ...\n\n    def __getitem__(\n        self, key: Hashable | Iterable[Hashable]\n    ) -> SkillGridArray | SkillGrid:\n        result = self.data[key]\n        if isinstance(result, xr.DataArray):\n            return SkillGridArray(result)\n        elif isinstance(result, xr.Dataset):\n            return SkillGrid(result)\n        else:\n            return result\n\n    def __getattr__(self, item: str, *args, **kwargs) -> Any:\n        if item in self.data.data_vars:\n            return self[item]  # Redirects to __getitem__\n        else:\n            # return getattr(self.data, item, *args, **kwargs)\n            raise AttributeError(\n                f\"\"\"\n                    SkillGrid has no attribute {item}; Maybe you are\n                    looking for the corresponding xr.Dataset attribute?\n                    Access SkillGrid's Dataset with '.data'.\n                \"\"\"\n            )\n\n    def _set_attrs(self) -> None:\n        # TODO: use type and unit to give better long name\n        # self.ds[\"bias\"].attrs = dict(long_name=\"Bias of Hm0\", units=\"m\")\n\n        self.data[\"n\"].attrs = dict(long_name=\"Number of observations\", units=\"-\")\n        if self._has_geographical_coords():\n            self.data[\"x\"].attrs = dict(long_name=\"Longitude\", units=\"degrees east\")\n            self.data[\"y\"].attrs = dict(long_name=\"Latitude\", units=\"degrees north\")\n        else:\n            self.data[\"x\"].attrs = dict(long_name=\"Easting\", units=\"meter\")\n            self.data[\"y\"].attrs = dict(long_name=\"Northing\", units=\"meter\")\n\n    def _has_geographical_coords(self) -> bool:\n        is_geo = True\n        if (self.x.min() < -180.0) or (self.x.max() > 360.0):\n            is_geo = False\n        if (self.y.min() < -90.0) or (self.y.max() > 90.0):\n            is_geo = False\n        return is_geo\n\n    def sel(self, model: str) -> SkillGrid:\n        \"\"\"Select a model from the SkillGrid\n\n        Parameters\n        ----------\n        model : str\n            Name of model to select\n\n        Returns\n        -------\n        SkillGrid\n            SkillGrid with only the selected model\n        \"\"\"\n        sel_data = self.data.sel(model=model)\n        assert isinstance(sel_data, xr.Dataset)\n        return SkillGrid(sel_data)\n\n    def plot(self, metric: str, model: str | None = None, **kwargs: Any) -> Axes:\n        warnings.warn(\n            \"plot() is deprecated and will be removed in a future version. \",\n            FutureWarning,\n        )\n        if metric not in self.metrics:\n            raise ValueError(f\"metric {metric} not found in {self.metrics}\")\n        return self[metric].plot(model=model, **kwargs)\n\n    def to_dataframe(self) -> pd.DataFrame:\n        \"\"\"Convert gridded skill data to pandas DataFrame\n\n        Returns\n        -------\n        pd.DataFrame\n            data as a pandas DataFrame\n        \"\"\"\n        return self.data.to_dataframe()\n
"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.coords","title":"coords property","text":"
coords\n

Coordinates (same as xr.DataSet.coords)

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.metrics","title":"metrics property","text":"
metrics\n

List of metrics (=data vars)

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.mod_names","title":"mod_names property","text":"
mod_names\n

List of model names

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.obs_names","title":"obs_names property","text":"
obs_names\n

List of observation names

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.x","title":"x property","text":"
x\n

x-coordinate values

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.y","title":"y property","text":"
y\n

y-coordinate values

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.sel","title":"sel","text":"
sel(model)\n

Select a model from the SkillGrid

Parameters:

Name Type Description Default model str

Name of model to select

required

Returns:

Type Description SkillGrid

SkillGrid with only the selected model

Source code in modelskill/skill_grid.py
def sel(self, model: str) -> SkillGrid:\n    \"\"\"Select a model from the SkillGrid\n\n    Parameters\n    ----------\n    model : str\n        Name of model to select\n\n    Returns\n    -------\n    SkillGrid\n        SkillGrid with only the selected model\n    \"\"\"\n    sel_data = self.data.sel(model=model)\n    assert isinstance(sel_data, xr.Dataset)\n    return SkillGrid(sel_data)\n
"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGrid.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert gridded skill data to pandas DataFrame

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/skill_grid.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert gridded skill data to pandas DataFrame\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    return self.data.to_dataframe()\n
"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray","title":"modelskill.skill_grid.SkillGridArray","text":"

Bases: SkillGridMixin

A SkillGridArray is a single metric-SkillGrid, corresponding to a \"column\" in a SkillGrid

Typically created by indexing a SkillGrid object, e.g. ss[\"bias\"].

Examples:

>>> gs = cc.gridded_skill()\n>>> gs[\"bias\"].plot()\n
Source code in modelskill/skill_grid.py
class SkillGridArray(SkillGridMixin):\n    \"\"\"A SkillGridArray is a single metric-SkillGrid, corresponding to a \"column\" in a SkillGrid\n\n    Typically created by indexing a SkillGrid object, e.g. `ss[\"bias\"]`.\n\n    Examples\n    --------\n    >>> gs = cc.gridded_skill()\n    >>> gs[\"bias\"].plot()\n    \"\"\"\n\n    def __init__(self, data: xr.DataArray) -> None:\n        assert isinstance(data, xr.DataArray)\n        self.data = data\n\n    def __repr__(self) -> str:\n        out = [\n            \"<SkillGridArray>\",\n            f\"Dimensions: (x: {len(self.x)}, y: {len(self.y)})\",\n        ]\n        return \"\\n\".join(out)\n\n    def plot(self, model: str | None = None, **kwargs: Any) -> Axes:\n        \"\"\"wrapper for xArray DataArray plot function\n\n        Parameters\n        ----------\n        model : str, optional\n            Name of model to plot, by default all models\n        **kwargs\n            keyword arguments passed to xr.DataArray plot()\n            e.g. figsize\n\n        Examples\n        --------\n        >>> gs = cc.gridded_skill()\n        >>> gs[\"bias\"].plot()\n        >>> gs.rmse.plot(model='SW_1')\n        >>> gs.r2.plot(cmap='YlOrRd', figsize=(10,10))\n        \"\"\"\n        if model is None:\n            da = self.data\n        else:\n            warnings.warn(\n                \"model argument is deprecated, use sel(model=...)\",\n                FutureWarning,\n            )\n            if model not in self.mod_names:\n                raise ValueError(f\"model {model} not in model list ({self.mod_names})\")\n            da = self.data.sel({\"model\": model})\n\n        extra_dims = [d for d in da.coords.dims if d not in [\"x\", \"y\"]]\n        if len(extra_dims) == 2:\n            ax = da.plot(col=extra_dims[0], row=extra_dims[1], **kwargs)\n        elif len(extra_dims) == 1:\n            ax = da.plot(col=extra_dims[0], **kwargs)\n        else:\n            ax = da.plot(**kwargs)\n        return ax\n
"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.coords","title":"coords property","text":"
coords\n

Coordinates (same as xr.DataSet.coords)

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.mod_names","title":"mod_names property","text":"
mod_names\n

List of model names

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.obs_names","title":"obs_names property","text":"
obs_names\n

List of observation names

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.x","title":"x property","text":"
x\n

x-coordinate values

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.y","title":"y property","text":"
y\n

y-coordinate values

"},{"location":"api/gridded_skill/#modelskill.skill_grid.SkillGridArray.plot","title":"plot","text":"
plot(model=None, **kwargs)\n

wrapper for xArray DataArray plot function

Parameters:

Name Type Description Default model str

Name of model to plot, by default all models

None **kwargs Any

keyword arguments passed to xr.DataArray plot() e.g. figsize

{}

Examples:

>>> gs = cc.gridded_skill()\n>>> gs[\"bias\"].plot()\n>>> gs.rmse.plot(model='SW_1')\n>>> gs.r2.plot(cmap='YlOrRd', figsize=(10,10))\n
Source code in modelskill/skill_grid.py
def plot(self, model: str | None = None, **kwargs: Any) -> Axes:\n    \"\"\"wrapper for xArray DataArray plot function\n\n    Parameters\n    ----------\n    model : str, optional\n        Name of model to plot, by default all models\n    **kwargs\n        keyword arguments passed to xr.DataArray plot()\n        e.g. figsize\n\n    Examples\n    --------\n    >>> gs = cc.gridded_skill()\n    >>> gs[\"bias\"].plot()\n    >>> gs.rmse.plot(model='SW_1')\n    >>> gs.r2.plot(cmap='YlOrRd', figsize=(10,10))\n    \"\"\"\n    if model is None:\n        da = self.data\n    else:\n        warnings.warn(\n            \"model argument is deprecated, use sel(model=...)\",\n            FutureWarning,\n        )\n        if model not in self.mod_names:\n            raise ValueError(f\"model {model} not in model list ({self.mod_names})\")\n        da = self.data.sel({\"model\": model})\n\n    extra_dims = [d for d in da.coords.dims if d not in [\"x\", \"y\"]]\n    if len(extra_dims) == 2:\n        ax = da.plot(col=extra_dims[0], row=extra_dims[1], **kwargs)\n    elif len(extra_dims) == 1:\n        ax = da.plot(col=extra_dims[0], **kwargs)\n    else:\n        ax = da.plot(**kwargs)\n    return ax\n
"},{"location":"api/matching/","title":"Matching","text":"

A Comparer/ComparerCollection can be created in one of the following ways:

  • match() - match observations and model results
  • from_matched() - create a Comparer from matched data
  • from_config() - create a ComparerCollection from a config file
"},{"location":"api/matching/#modelskill.match","title":"modelskill.match","text":"
match(obs, mod, *, obs_item=None, mod_item=None, gtype=None, max_model_gap=None, spatial_method=None)\n

Match observation and model result data in space and time

NOTE: In case of multiple model results with different time coverage, only the overlapping time period will be used! (intersection)

NOTE: In case of multiple observations, multiple models can only be matched if they are all of SpatialField type, e.g. DfsuModelResult or GridModelResult.

Parameters:

Name Type Description Default obs (str, Path, DataFrame, Observation, Sequence[Observation])

Observation(s) to be compared

required mod (str, Path, DataFrame, ModelResult, Sequence[ModelResult])

Model result(s) to be compared

required obs_item int or str

observation item if obs is a file/dataframe, by default None

None mod_item (int, str)

model item if mod is a file/dataframe, by default None

None gtype (str, optional)

Geometry type of the model result (if mod is a file/dataframe). If not specified, it will be guessed.

None max_model_gap (float, optional)

Maximum time gap (s) in the model result (e.g. for event-based model results), by default None

None spatial_method str

For Dfsu- and GridModelResult, spatial interpolation/selection method.

  • For DfsuModelResult, one of: 'contained' (=isel), 'nearest', 'inverse_distance' (with 5 nearest points), by default \"inverse_distance\".
  • For GridModelResult, passed to xarray.interp() as method argument, by default 'linear'.
None

Returns:

Type Description Comparer

In case of a single observation

ComparerCollection

In case of multiple observations

See Also

from_matched Create a Comparer from observation and model results that are already matched

Source code in modelskill/matching.py
def match(\n    obs,\n    mod,\n    *,\n    obs_item=None,\n    mod_item=None,\n    gtype=None,\n    max_model_gap=None,\n    spatial_method: Optional[str] = None,\n):\n    \"\"\"Match observation and model result data in space and time\n\n    NOTE: In case of multiple model results with different time coverage,\n    only the _overlapping_ time period will be used! (intersection)\n\n    NOTE: In case of multiple observations, multiple models can _only_\n    be matched if they are _all_ of SpatialField type, e.g. DfsuModelResult\n    or GridModelResult.\n\n    Parameters\n    ----------\n    obs : (str, Path, pd.DataFrame, Observation, Sequence[Observation])\n        Observation(s) to be compared\n    mod : (str, Path, pd.DataFrame, ModelResult, Sequence[ModelResult])\n        Model result(s) to be compared\n    obs_item : int or str, optional\n        observation item if obs is a file/dataframe, by default None\n    mod_item : (int, str), optional\n        model item if mod is a file/dataframe, by default None\n    gtype : (str, optional)\n        Geometry type of the model result (if mod is a file/dataframe).\n        If not specified, it will be guessed.\n    max_model_gap : (float, optional)\n        Maximum time gap (s) in the model result (e.g. for event-based\n        model results), by default None\n    spatial_method : str, optional\n        For Dfsu- and GridModelResult, spatial interpolation/selection method.\n\n        - For DfsuModelResult, one of: 'contained' (=isel), 'nearest',\n        'inverse_distance' (with 5 nearest points), by default \"inverse_distance\".\n        - For GridModelResult, passed to xarray.interp() as method argument,\n        by default 'linear'.\n\n    Returns\n    -------\n    Comparer\n        In case of a single observation\n    ComparerCollection\n        In case of multiple observations\n\n    See Also\n    --------\n    [from_matched][modelskill.from_matched]\n        Create a Comparer from observation and model results that are already matched\n    \"\"\"\n    if isinstance(obs, get_args(ObsInputType)):\n        return _single_obs_compare(\n            obs,\n            mod,\n            obs_item=obs_item,\n            mod_item=mod_item,\n            gtype=gtype,\n            max_model_gap=max_model_gap,\n            spatial_method=spatial_method,\n        )\n\n    if isinstance(obs, Collection):\n        assert all(isinstance(o, get_args(ObsInputType)) for o in obs)\n    else:\n        raise TypeError(\n            f\"Obs is not the correct type: it is {type(obs)}. Check the order of the arguments (obs, mod).\"\n        )\n\n    if len(obs) > 1 and isinstance(mod, Collection) and len(mod) > 1:\n        if not all(isinstance(m, (DfsuModelResult, GridModelResult)) for m in mod):\n            raise ValueError(\n                \"\"\"\n                In case of multiple observations, multiple models can _only_ \n                be matched if they are _all_ of SpatialField type, e.g. DfsuModelResult \n                or GridModelResult. \n\n                If you want match multiple point observations with multiple point model results, \n                please match one observation at a time and then create a collection of these \n                using modelskill.ComparerCollection(cmp_list) afterwards. The same applies to track data.\n                \"\"\"\n            )\n\n    clist = [\n        _single_obs_compare(\n            o,\n            mod,\n            obs_item=obs_item,\n            mod_item=mod_item,\n            gtype=gtype,\n            max_model_gap=max_model_gap,\n            spatial_method=spatial_method,\n        )\n        for o in obs\n    ]\n\n    return ComparerCollection(clist)\n
"},{"location":"api/matching/#modelskill.from_matched","title":"modelskill.from_matched","text":"
from_matched(data, *, obs_item=0, mod_items=None, aux_items=None, quantity=None, name=None, weight=1.0, x=None, y=None, z=None, x_item=None, y_item=None)\n

Create a Comparer from observation and model results that are already matched (aligned)

Parameters:

Name Type Description Default data [DataFrame, str, Path, Dfs0, Dataset]

DataFrame (or object that can be converted to a DataFrame e.g. dfs0) with columns obs_item, mod_items, aux_items

required obs_item [str, int]

Name or index of observation item, by default first item

0 mod_items Iterable[str, int]

Names or indicies of model items, if None all remaining columns are model items, by default None

None aux_items Iterable[str, int]

Names or indicies of auxiliary items, by default None

None quantity Quantity

Quantity of the observation and model results, by default Quantity(name=\"Undefined\", unit=\"Undefined\")

None name str

Name of the comparer, by default None (will be set to obs_item)

None x float

x-coordinate of observation, by default None

None y float

y-coordinate of observation, by default None

None z float

z-coordinate of observation, by default None

None x_item str | int | None

Name of x item, only relevant for track data

None y_item str | int | None

Name of y item, only relevant for track data

None

Examples:

>>> import pandas as pd\n>>> import modelskill as ms\n>>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1]}, index=pd.date_range('2010-01-01', periods=3))\n>>> cmp = ms.from_matched(df, obs_item='stn_a') # remaining columns are model results\n>>> cmp\n<Comparer>\nQuantity: Undefined [Undefined]\nObservation: stn_a, n_points=3\n Model: local, rmse=0.100\n>>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1], 'global': [1.2,2.2,3.2], 'nonsense':[1,2,3]}, index=pd.date_range('2010-01-01', periods=3))\n>>> cmp = ms.from_matched(df, obs_item='stn_a', mod_items=['local', 'global'])\n>>> cmp\n<Comparer>\nQuantity: Undefined [Undefined]\nObservation: stn_a, n_points=3\n    Model: local, rmse=0.100\n    Model: global, rmse=0.200\n
Source code in modelskill/matching.py
def from_matched(\n    data: Union[str, Path, pd.DataFrame, mikeio.Dfs0, mikeio.Dataset],\n    *,\n    obs_item: str | int | None = 0,\n    mod_items: Optional[Iterable[str | int]] = None,\n    aux_items: Optional[Iterable[str | int]] = None,\n    quantity: Optional[Quantity] = None,\n    name: Optional[str] = None,\n    weight: float = 1.0,\n    x: Optional[float] = None,\n    y: Optional[float] = None,\n    z: Optional[float] = None,\n    x_item: str | int | None = None,\n    y_item: str | int | None = None,\n) -> Comparer:\n    \"\"\"Create a Comparer from observation and model results that are already matched (aligned)\n\n    Parameters\n    ----------\n    data : [pd.DataFrame, str, Path, mikeio.Dfs0, mikeio.Dataset]\n        DataFrame (or object that can be converted to a DataFrame e.g. dfs0)\n        with columns obs_item, mod_items, aux_items\n    obs_item : [str, int], optional\n        Name or index of observation item, by default first item\n    mod_items : Iterable[str, int], optional\n        Names or indicies of model items, if None all remaining columns are model items, by default None\n    aux_items : Iterable[str, int], optional\n        Names or indicies of auxiliary items, by default None\n    quantity : Quantity, optional\n        Quantity of the observation and model results, by default Quantity(name=\"Undefined\", unit=\"Undefined\")\n    name : str, optional\n        Name of the comparer, by default None (will be set to obs_item)\n    x : float, optional\n        x-coordinate of observation, by default None\n    y : float, optional\n        y-coordinate of observation, by default None\n    z : float, optional\n        z-coordinate of observation, by default None\n    x_item: [str, int], optional,\n        Name of x item, only relevant for track data\n    y_item: [str, int], optional\n        Name of y item, only relevant for track data\n\n    Examples\n    --------\n    >>> import pandas as pd\n    >>> import modelskill as ms\n    >>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1]}, index=pd.date_range('2010-01-01', periods=3))\n    >>> cmp = ms.from_matched(df, obs_item='stn_a') # remaining columns are model results\n    >>> cmp\n    <Comparer>\n    Quantity: Undefined [Undefined]\n    Observation: stn_a, n_points=3\n     Model: local, rmse=0.100\n    >>> df = pd.DataFrame({'stn_a': [1,2,3], 'local': [1.1,2.1,3.1], 'global': [1.2,2.2,3.2], 'nonsense':[1,2,3]}, index=pd.date_range('2010-01-01', periods=3))\n    >>> cmp = ms.from_matched(df, obs_item='stn_a', mod_items=['local', 'global'])\n    >>> cmp\n    <Comparer>\n    Quantity: Undefined [Undefined]\n    Observation: stn_a, n_points=3\n        Model: local, rmse=0.100\n        Model: global, rmse=0.200\n\n    \"\"\"\n    # pre-process if dfs0, or mikeio.Dataset\n    if isinstance(data, (str, Path)):\n        if Path(data).suffix != \".dfs0\":\n            raise ValueError(f\"File must be a dfs0 file, not {Path(data).suffix}\")\n        data = mikeio.read(data)  # now mikeio.Dataset\n    elif isinstance(data, mikeio.Dfs0):\n        data = data.read()  # now mikeio.Dataset\n    if isinstance(data, mikeio.Dataset):\n        assert len(data.shape) == 1, \"Only 0-dimensional data are supported\"\n        if quantity is None:\n            quantity = Quantity.from_mikeio_iteminfo(data[obs_item].item)\n        data = data.to_dataframe()\n\n    cmp = Comparer.from_matched_data(\n        data,\n        obs_item=obs_item,\n        mod_items=mod_items,\n        aux_items=aux_items,\n        name=name,\n        weight=weight,\n        x=x,\n        y=y,\n        z=z,\n        x_item=x_item,\n        y_item=y_item,\n        quantity=quantity,\n    )\n\n    return cmp\n
"},{"location":"api/matching/#modelskill.from_config","title":"modelskill.from_config","text":"
from_config(conf, *, relative_path=True)\n

Load ComparerCollection from a config file (or dict)

Parameters:

Name Type Description Default conf Union[str, Path, dict]

path to config file or dict with configuration

required relative_path

True: file paths are relative to configuration file, False: file paths are absolute (relative to the current directory), by default True

True

Returns:

Type Description ComparerCollection

A ComparerCollection object from the given configuration

Examples:

>>> import modelskill as ms\n>>> cc = ms.from_config('Oresund.yml')\n
Source code in modelskill/configuration.py
def from_config(\n    conf: Union[dict, str, Path], *, relative_path=True\n) -> ComparerCollection:\n    \"\"\"Load ComparerCollection from a config file (or dict)\n\n    Parameters\n    ----------\n    conf : Union[str, Path, dict]\n        path to config file or dict with configuration\n    relative_path: bool, optional\n        True: file paths are relative to configuration file,\n        False: file paths are absolute (relative to the current directory),\n        by default True\n\n    Returns\n    -------\n    ComparerCollection\n        A ComparerCollection object from the given configuration\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> cc = ms.from_config('Oresund.yml')\n    \"\"\"\n    if isinstance(conf, (str, Path)):\n        p = Path(conf)\n        ext = p.suffix\n        dirname = Path(str(p.parents[0]))\n        if (ext == \".yml\") or (ext == \".yaml\") or (ext == \".conf\"):\n            conf = _yaml_to_dict(p)\n        elif \"xls\" in ext:\n            conf = _excel_to_dict(p)\n        else:\n            raise ValueError(\"Filename extension not supported! Use .yml or .xlsx\")\n    else:\n        dirname = Path(\".\")\n\n    assert isinstance(conf, dict)\n    modelresults = []\n    for name, mr_dict in conf[\"modelresults\"].items():\n        if not mr_dict.get(\"include\", True):\n            continue\n        fp = Path(mr_dict[\"filename\"])\n        if relative_path:\n            fp = dirname / fp\n\n        item = mr_dict.get(\"item\")\n        mr = model_result(fp, name=name, item=item)\n        modelresults.append(mr)\n\n    observations = []\n    for name, data in conf[\"observations\"].items():\n        if data.pop(\"include\", True):\n            data[\"name\"] = name\n            observations.append(_obs_from_dict(name, data, dirname, relative_path))\n\n    return match(obs=observations, mod=modelresults)\n
"},{"location":"api/metrics/","title":"Metrics","text":""},{"location":"api/metrics/#modelskill.metrics","title":"modelskill.metrics","text":"

The metrics module contains different skill metrics for evaluating the difference between a model and an observation.

  • bias
  • max_error
  • root_mean_squared_error (rmse)
  • urmse
  • mean_absolute_error (mae)
  • mean_absolute_percentage_error (mape)
  • kling_gupta_efficiency (kge)
  • nash_sutcliffe_efficiency (nse)
  • r2 (r2=nse)
  • model_efficiency_factor (mef)
  • wilmott
  • scatter_index (si)
  • scatter_index2
  • corrcoef (cc)
  • spearmanr (rho)
  • lin_slope
  • hit_ratio
  • explained_variance (ev)
  • peak_ratio (pr)

Circular metrics (for directional data with units in degrees):

  • c_bias
  • c_max_error
  • c_mean_absolute_error (c_mae)
  • c_root_mean_squared_error (c_rmse)
  • c_unbiased_root_mean_squared_error (c_urmse)

The names in parentheses are shorthand aliases for the different metrics.

Examples:

>>> obs = np.array([0.3, 2.1, -1.0])\n>>> mod = np.array([0.0, 2.3, 1.0])\n>>> bias(obs, mod)\nnp.float64(0.6333333333333332)\n>>> max_error(obs, mod)\nnp.float64(2.0)\n>>> rmse(obs, mod)\nnp.float64(1.173314393786536)\n>>> urmse(obs, mod)\nnp.float64(0.9877021593352702)\n>>> mae(obs, mod)\nnp.float64(0.8333333333333331)\n>>> mape(obs, mod)\nnp.float64(103.17460317460316)\n>>> nse(obs, mod)\nnp.float64(0.14786795048143053)\n>>> r2(obs, mod)\nnp.float64(0.14786795048143053)\n>>> mef(obs, mod)\nnp.float64(0.9231099877688299)\n>>> si(obs, mod)\nnp.float64(0.8715019052958266)\n>>> spearmanr(obs, mod)\nnp.float64(0.5)\n>>> willmott(obs, mod)\nnp.float64(0.7484604452865941)\n>>> hit_ratio(obs, mod, a=0.5)\nnp.float64(0.6666666666666666)\n>>> ev(obs, mod)\nnp.float64(0.39614855570839064)\n
"},{"location":"api/metrics/#modelskill.metrics.add_metric","title":"add_metric","text":"
add_metric(metric, has_units=False)\n

Adds a metric to the metric list. Useful for custom metrics.

Some metrics are dimensionless, others have the same dimension as the observations.

Parameters:

Name Type Description Default metric str or callable

Metric name or function

required has_units bool

True if metric has a dimension, False otherwise. Default:False

False

Returns:

Type Description None

Examples:

>>> add_metric(hit_ratio)\n>>> add_metric(rmse,True)\n
Source code in modelskill/metrics.py
def add_metric(metric: Callable, has_units: bool = False) -> None:\n    \"\"\"Adds a metric to the metric list. Useful for custom metrics.\n\n    Some metrics are dimensionless, others have the same dimension as the observations.\n\n    Parameters\n    ----------\n    metric : str or callable\n        Metric name or function\n    has_units : bool\n        True if metric has a dimension, False otherwise. Default:False\n\n    Returns\n    -------\n    None\n\n    Examples\n    --------\n    >>> add_metric(hit_ratio)\n    >>> add_metric(rmse,True)\n    \"\"\"\n    defined_metrics.add(metric.__name__)\n    if has_units:\n        METRICS_WITH_DIMENSION.add(metric.__name__)\n\n    # add the function to the module\n    setattr(sys.modules[__name__], metric.__name__, metric)\n
"},{"location":"api/metrics/#modelskill.metrics.bias","title":"bias","text":"
bias(obs, model)\n

Bias (mean error)

\\[ bias=\\frac{1}{n}\\sum_{i=1}^n (model_i - obs_i) \\]

Range: \\((-\\infty, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def bias(obs, model) -> Any:\n    r\"\"\"Bias (mean error)\n\n    $$\n    bias=\\frac{1}{n}\\sum_{i=1}^n (model_i - obs_i)\n    $$\n\n    Range: $(-\\infty, \\infty)$; Best: 0\n    \"\"\"\n\n    assert obs.size == model.size\n    return np.mean(model - obs)\n
"},{"location":"api/metrics/#modelskill.metrics.c_bias","title":"c_bias","text":"
c_bias(obs, model)\n

Circular bias (mean error)

Parameters:

Name Type Description Default obs ndarray

Observation in degrees (0, 360)

required model ndarray

Model in degrees (0, 360)

required Range required

Returns:

Type Description float

Circular bias

Examples:

>>> obs = np.array([10., 355., 170.])\n>>> mod = np.array([20., 5., -180.])\n>>> c_bias(obs, mod)\nnp.float64(10.0)\n
Source code in modelskill/metrics.py
def c_bias(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"Circular bias (mean error)\n\n    Parameters\n    ----------\n    obs : np.ndarray\n        Observation in degrees (0, 360)\n    model : np.ndarray\n        Model in degrees (0, 360)\n\n    Range: [-180., 180.]; Best: 0.\n\n    Returns\n    -------\n    float\n        Circular bias\n\n    Examples\n    --------\n    >>> obs = np.array([10., 355., 170.])\n    >>> mod = np.array([20., 5., -180.])\n    >>> c_bias(obs, mod)\n    np.float64(10.0)\n    \"\"\"\n    from scipy.stats import circmean\n\n    resi = _c_residual(obs, model)\n    return circmean(resi, low=-180.0, high=180.0)\n
"},{"location":"api/metrics/#modelskill.metrics.c_mae","title":"c_mae","text":"
c_mae(obs, model, weights=None)\n

alias for circular mean absolute error

Source code in modelskill/metrics.py
def c_mae(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"alias for circular mean absolute error\"\"\"\n    return c_mean_absolute_error(obs, model, weights)\n
"},{"location":"api/metrics/#modelskill.metrics.c_max_error","title":"c_max_error","text":"
c_max_error(obs, model)\n

Circular max error

Parameters:

Name Type Description Default obs ndarray

Observation in degrees (0, 360)

required model ndarray

Model in degrees (0, 360)

required Range required

Returns:

Type Description float

Circular max error

Examples:

>>> obs = np.array([10., 350., 10.])\n>>> mod = np.array([20., 10., 350.])\n>>> c_max_error(obs, mod)\nnp.float64(20.0)\n
Source code in modelskill/metrics.py
def c_max_error(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"Circular max error\n\n    Parameters\n    ----------\n    obs : np.ndarray\n        Observation in degrees (0, 360)\n    model : np.ndarray\n        Model in degrees (0, 360)\n\n    Range: :math:`[0, \\\\infty)`; Best: 0\n\n    Returns\n    -------\n    float\n        Circular max error\n\n    Examples\n    --------\n    >>> obs = np.array([10., 350., 10.])\n    >>> mod = np.array([20., 10., 350.])\n    >>> c_max_error(obs, mod)\n    np.float64(20.0)\n    \"\"\"\n\n    resi = _c_residual(obs, model)\n\n    # Compute the absolute differences and then\n    # find the shortest distance between angles\n    abs_diffs = np.abs(resi)\n    circular_diffs = np.minimum(abs_diffs, 360 - abs_diffs)\n    return np.max(circular_diffs)\n
"},{"location":"api/metrics/#modelskill.metrics.c_mean_absolute_error","title":"c_mean_absolute_error","text":"
c_mean_absolute_error(obs, model, weights=None)\n

Circular mean absolute error

Parameters:

Name Type Description Default obs ndarray

Observation in degrees (0, 360)

required model ndarray

Model in degrees (0, 360)

required weights ndarray

Weights, by default None

None Range required

Returns:

Type Description float

Circular mean absolute error

Source code in modelskill/metrics.py
def c_mean_absolute_error(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"Circular mean absolute error\n\n    Parameters\n    ----------\n    obs : np.ndarray\n        Observation in degrees (0, 360)\n    model : np.ndarray\n        Model in degrees (0, 360)\n    weights : np.ndarray, optional\n        Weights, by default None\n\n    Range: [0, 180]; Best: 0\n\n    Returns\n    -------\n    float\n        Circular mean absolute error\n    \"\"\"\n\n    resi = _c_residual(obs, model)\n    return np.average(np.abs(resi), weights=weights)\n
"},{"location":"api/metrics/#modelskill.metrics.c_rmse","title":"c_rmse","text":"
c_rmse(obs, model, weights=None)\n

alias for circular root mean squared error

Source code in modelskill/metrics.py
def c_rmse(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"alias for circular root mean squared error\"\"\"\n    return c_root_mean_squared_error(obs, model, weights)\n
"},{"location":"api/metrics/#modelskill.metrics.c_root_mean_squared_error","title":"c_root_mean_squared_error","text":"
c_root_mean_squared_error(obs, model, weights=None)\n

Circular root mean squared error

Parameters:

Name Type Description Default obs ndarray

Observation in degrees (0, 360)

required model ndarray

Model in degrees (0, 360)

required weights ndarray

Weights, by default None

None Range required

Returns:

Type Description float

Circular root mean squared error

Source code in modelskill/metrics.py
def c_root_mean_squared_error(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"Circular root mean squared error\n\n    Parameters\n    ----------\n    obs : np.ndarray\n        Observation in degrees (0, 360)\n    model : np.ndarray\n        Model in degrees (0, 360)\n    weights : np.ndarray, optional\n        Weights, by default None\n\n    Range: [0, 180]; Best: 0\n\n    Returns\n    -------\n    float\n        Circular root mean squared error\n    \"\"\"\n    residual = _c_residual(obs, model)\n    return np.sqrt(np.average(residual**2, weights=weights))\n
"},{"location":"api/metrics/#modelskill.metrics.c_unbiased_root_mean_squared_error","title":"c_unbiased_root_mean_squared_error","text":"
c_unbiased_root_mean_squared_error(obs, model, weights=None)\n

Circular unbiased root mean squared error

Parameters:

Name Type Description Default obs ndarray

Observation in degrees (0, 360)

required model ndarray

Model in degrees (0, 360)

required weights ndarray

Weights, by default None

None Range required

Returns:

Type Description float

Circular unbiased root mean squared error

Source code in modelskill/metrics.py
def c_unbiased_root_mean_squared_error(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"Circular unbiased root mean squared error\n\n    Parameters\n    ----------\n    obs : np.ndarray\n        Observation in degrees (0, 360)\n    model : np.ndarray\n        Model in degrees (0, 360)\n    weights : np.ndarray, optional\n        Weights, by default None\n\n    Range: [0, 180]; Best: 0\n\n    Returns\n    -------\n    float\n        Circular unbiased root mean squared error\n    \"\"\"\n    from scipy.stats import circmean\n\n    residual = _c_residual(obs, model)\n    residual = residual - circmean(residual, low=-180.0, high=180.0)\n    return np.sqrt(np.average(residual**2, weights=weights))\n
"},{"location":"api/metrics/#modelskill.metrics.c_urmse","title":"c_urmse","text":"
c_urmse(obs, model, weights=None)\n

alias for circular unbiased root mean squared error

Source code in modelskill/metrics.py
def c_urmse(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n) -> Any:\n    \"\"\"alias for circular unbiased root mean squared error\"\"\"\n    return c_unbiased_root_mean_squared_error(obs, model, weights)\n
"},{"location":"api/metrics/#modelskill.metrics.cc","title":"cc","text":"
cc(obs, model, weights=None)\n

alias for corrcoef

Source code in modelskill/metrics.py
def cc(obs: np.ndarray, model: np.ndarray, weights=None) -> Any:\n    \"\"\"alias for corrcoef\"\"\"\n    return corrcoef(obs, model, weights)\n
"},{"location":"api/metrics/#modelskill.metrics.corrcoef","title":"corrcoef","text":"
corrcoef(obs, model, weights=None)\n

Pearson\u2019s Correlation coefficient (CC)

\\[ CC = \\frac{\\sum_{i=1}^n (model_i - \\overline{model})(obs_i - \\overline{obs}) } {\\sqrt{\\sum_{i=1}^n (model_i - \\overline{model})^2} \\sqrt{\\sum_{i=1}^n (obs_i - \\overline{obs})^2} } \\]

Range: [-1, 1]; Best: 1

See Also

spearmanr np.corrcoef

Source code in modelskill/metrics.py
def corrcoef(obs, model, weights=None) -> Any:\n    r\"\"\"Pearson\u2019s Correlation coefficient (CC)\n\n    $$\n    CC = \\frac{\\sum_{i=1}^n (model_i - \\overline{model})(obs_i - \\overline{obs}) }\n                   {\\sqrt{\\sum_{i=1}^n (model_i - \\overline{model})^2}\n                    \\sqrt{\\sum_{i=1}^n (obs_i - \\overline{obs})^2} }\n    $$\n\n    Range: [-1, 1]; Best: 1\n\n    See Also\n    --------\n    spearmanr\n    np.corrcoef\n    \"\"\"\n    assert obs.size == model.size\n    if len(obs) <= 1:\n        return np.nan\n\n    if weights is None:\n        return np.corrcoef(obs, model)[0, 1]\n    else:\n        C = np.cov(obs, model, fweights=weights)\n        return C[0, 1] / np.sqrt(C[0, 0] * C[1, 1])\n
"},{"location":"api/metrics/#modelskill.metrics.ev","title":"ev","text":"
ev(obs, model)\n

alias for explained_variance

Source code in modelskill/metrics.py
def ev(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for explained_variance\"\"\"\n    assert obs.size == model.size\n    return explained_variance(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.explained_variance","title":"explained_variance","text":"
explained_variance(obs, model)\n

EV: Explained variance

EV is the explained variance and measures the proportion [0 - 1] to which the model accounts for the variation (dispersion) of the observations.

In cases with no bias, EV is equal to r2

\\[ \\frac{ \\sum_{i=1}^n (obs_i - \\overline{obs})^2 - \\sum_{i=1}^n \\left( (obs_i - \\overline{obs}) - (model_i - \\overline{model}) \\right)^2}{\\sum_{i=1}^n (obs_i - \\overline{obs})^2} \\]

Range: [0, 1]; Best: 1

See Also

r2

Source code in modelskill/metrics.py
def explained_variance(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"EV: Explained variance\n\n     EV is the explained variance and measures the proportion\n     [0 - 1] to which the model accounts for the variation\n     (dispersion) of the observations.\n\n     In cases with no bias, EV is equal to r2\n\n    $$\n    \\frac{ \\sum_{i=1}^n (obs_i - \\overline{obs})^2 -\n    \\sum_{i=1}^n \\left( (obs_i - \\overline{obs}) -\n    (model_i - \\overline{model}) \\right)^2}{\\sum_{i=1}^n\n    (obs_i - \\overline{obs})^2}\n    $$\n\n    Range: [0, 1]; Best: 1\n\n    See Also\n    --------\n    r2\n    \"\"\"\n\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n\n    nominator: Any = np.sum((obs - obs.mean()) ** 2) - np.sum(  # type: ignore\n        ((obs - obs.mean()) - (model - model.mean())) ** 2\n    )\n    denominator: Any = np.sum((obs - obs.mean()) ** 2)\n\n    return nominator / denominator\n
"},{"location":"api/metrics/#modelskill.metrics.hit_ratio","title":"hit_ratio","text":"
hit_ratio(obs, model, a=0.1)\n

Fraction within obs \u00b1 acceptable deviation

\\[ HR = \\frac{1}{n}\\sum_{i=1}^n I_{|(model_i - obs_i)|} < a \\]

Range: [0, 1]; Best: 1

Examples:

>>> obs = np.array([1.0, 1.1, 1.2, 1.3, 1.4, 1.4, 1.3])\n>>> model = np.array([1.02, 1.16, 1.3, 1.38, 1.49, 1.45, 1.32])\n>>> hit_ratio(obs, model, a=0.05)\nnp.float64(0.2857142857142857)\n>>> hit_ratio(obs, model, a=0.1)\nnp.float64(0.8571428571428571)\n>>> hit_ratio(obs, model, a=0.15)\nnp.float64(1.0)\n
Source code in modelskill/metrics.py
def hit_ratio(obs: np.ndarray, model: np.ndarray, a=0.1) -> Any:\n    r\"\"\"Fraction within obs \u00b1 acceptable deviation\n\n    $$\n    HR = \\frac{1}{n}\\sum_{i=1}^n I_{|(model_i - obs_i)|} < a\n    $$\n\n    Range: [0, 1]; Best: 1\n\n    Examples\n    --------\n    >>> obs = np.array([1.0, 1.1, 1.2, 1.3, 1.4, 1.4, 1.3])\n    >>> model = np.array([1.02, 1.16, 1.3, 1.38, 1.49, 1.45, 1.32])\n    >>> hit_ratio(obs, model, a=0.05)\n    np.float64(0.2857142857142857)\n    >>> hit_ratio(obs, model, a=0.1)\n    np.float64(0.8571428571428571)\n    >>> hit_ratio(obs, model, a=0.15)\n    np.float64(1.0)\n    \"\"\"\n    assert obs.size == model.size\n\n    return np.mean(np.abs(obs - model) < a)\n
"},{"location":"api/metrics/#modelskill.metrics.kge","title":"kge","text":"
kge(obs, model)\n

alias for kling_gupta_efficiency

Source code in modelskill/metrics.py
def kge(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for kling_gupta_efficiency\"\"\"\n    return kling_gupta_efficiency(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.kling_gupta_efficiency","title":"kling_gupta_efficiency","text":"
kling_gupta_efficiency(obs, model)\n

Kling-Gupta Efficiency (KGE)

\\[ KGE = 1 - \\sqrt{(r-1)^2 + \\left(\\frac{\\sigma_{mod}}{\\sigma_{obs}} - 1\\right)^2 + \\left(\\frac{\\mu_{mod}}{\\mu_{obs}} - 1\\right)^2 } \\]

where \\(r\\) is the pearson correlation coefficient, \\(\\mu_{obs},\\mu_{mod}\\) and \\(\\sigma_{obs},\\sigma_{mod}\\) is the mean and standard deviation of observations and model.

Range: \\((-\\infty, 1]\\); Best: 1

References

Gupta, H. V., Kling, H., Yilmaz, K. K. and Martinez, G. F., (2009), Decomposition of the mean squared error and NSE performance criteria: Implications for improving hydrological modelling, J. Hydrol., 377(1-2), 80-91 https://doi.org/10.1016/j.jhydrol.2009.08.003

Knoben, W. J. M., Freer, J. E., and Woods, R. A. (2019) Technical note: Inherent benchmark or not? Comparing Nash\u2013Sutcliffe and Kling\u2013Gupta efficiency scores, Hydrol. Earth Syst. Sci., 23, 4323-4331 https://doi.org/10.5194/hess-23-4323-2019

Source code in modelskill/metrics.py
def kling_gupta_efficiency(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"\n    Kling-Gupta Efficiency (KGE)\n\n    $$\n    KGE = 1 - \\sqrt{(r-1)^2 + \\left(\\frac{\\sigma_{mod}}{\\sigma_{obs}} - 1\\right)^2 +\n                                \\left(\\frac{\\mu_{mod}}{\\mu_{obs}} - 1\\right)^2 }\n    $$\n\n    where $r$ is the pearson correlation coefficient, $\\mu_{obs},\\mu_{mod}$ and $\\sigma_{obs},\\sigma_{mod}$ is the mean and standard deviation of observations and model.\n\n    Range: $(-\\infty, 1]$; Best: 1\n\n    References\n    ----------\n    Gupta, H. V., Kling, H., Yilmaz, K. K. and Martinez, G. F., (2009), Decomposition of the mean squared error and NSE performance criteria: Implications for improving hydrological modelling, J. Hydrol., 377(1-2), 80-91 <https://doi.org/10.1016/j.jhydrol.2009.08.003>\n\n    Knoben, W. J. M., Freer, J. E., and Woods, R. A. (2019) Technical note: Inherent benchmark or not? Comparing Nash\u2013Sutcliffe and Kling\u2013Gupta efficiency scores, Hydrol. Earth Syst. Sci., 23, 4323-4331 <https://doi.org/10.5194/hess-23-4323-2019>\n    \"\"\"\n    assert obs.size == model.size\n\n    if len(obs) == 0 or obs.std() == 0.0:\n        return np.nan\n\n    if model.std() > 1e-12:\n        r = corrcoef(obs, model)\n        if np.isnan(r):\n            r = 0.0\n    else:\n        r = 0.0\n\n    res = 1 - np.sqrt(\n        (r - 1) ** 2\n        + (model.std() / obs.std() - 1.0) ** 2\n        + (model.mean() / obs.mean() - 1.0) ** 2\n    )\n\n    return res\n
"},{"location":"api/metrics/#modelskill.metrics.lin_slope","title":"lin_slope","text":"
lin_slope(obs, model, reg_method='ols')\n

Slope of the regression line.

\\[ slope = \\frac{\\sum_{i=1}^n (model_i - \\overline {model})(obs_i - \\overline {obs})} {\\sum_{i=1}^n (obs_i - \\overline {obs})^2} \\]

Range: \\((-\\infty, \\infty )\\); Best: 1

Source code in modelskill/metrics.py
def lin_slope(obs: np.ndarray, model: np.ndarray, reg_method=\"ols\") -> Any:\n    r\"\"\"Slope of the regression line.\n\n    $$\n    slope = \\frac{\\sum_{i=1}^n (model_i - \\overline {model})(obs_i - \\overline {obs})}\n                    {\\sum_{i=1}^n (obs_i - \\overline {obs})^2}\n    $$\n\n    Range: $(-\\infty, \\infty )$; Best: 1\n    \"\"\"\n    assert obs.size == model.size\n    return _linear_regression(obs, model, reg_method)[0]\n
"},{"location":"api/metrics/#modelskill.metrics.mae","title":"mae","text":"
mae(obs, model, weights=None)\n

alias for mean_absolute_error

Source code in modelskill/metrics.py
def mae(\n    obs: np.ndarray, model: np.ndarray, weights: Optional[np.ndarray] = None\n) -> Any:\n    \"\"\"alias for mean_absolute_error\"\"\"\n    assert obs.size == model.size\n    return mean_absolute_error(obs, model, weights)\n
"},{"location":"api/metrics/#modelskill.metrics.mape","title":"mape","text":"
mape(obs, model)\n

alias for mean_absolute_percentage_error

Source code in modelskill/metrics.py
def mape(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for mean_absolute_percentage_error\"\"\"\n    return mean_absolute_percentage_error(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.max_error","title":"max_error","text":"
max_error(obs, model)\n

Max (absolute) error

\\[ max_{error} = max(|model_i - obs_i|) \\]

Range: \\([0, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def max_error(obs, model) -> Any:\n    r\"\"\"Max (absolute) error\n\n    $$\n    max_{error} = max(|model_i - obs_i|)\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n    \"\"\"\n\n    assert obs.size == model.size\n    return np.max(np.abs(model - obs))\n
"},{"location":"api/metrics/#modelskill.metrics.mean_absolute_error","title":"mean_absolute_error","text":"
mean_absolute_error(obs, model, weights=None)\n

Mean Absolute Error (MAE)

\\[ MAE=\\frac{1}{n}\\sum_{i=1}^n|model_i - obs_i| \\]

Range: \\([0, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def mean_absolute_error(\n    obs: np.ndarray, model: np.ndarray, weights: Optional[np.ndarray] = None\n) -> Any:\n    r\"\"\"Mean Absolute Error (MAE)\n\n    $$\n    MAE=\\frac{1}{n}\\sum_{i=1}^n|model_i - obs_i|\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n    \"\"\"\n    assert obs.size == model.size\n\n    error = np.average(np.abs(model - obs), weights=weights)\n\n    return error\n
"},{"location":"api/metrics/#modelskill.metrics.mean_absolute_percentage_error","title":"mean_absolute_percentage_error","text":"
mean_absolute_percentage_error(obs, model)\n

Mean Absolute Percentage Error (MAPE)

\\[ MAPE=\\frac{1}{n}\\sum_{i=1}^n\\frac{|model_i - obs_i|}{obs_i}*100 \\]

Range: \\([0, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def mean_absolute_percentage_error(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Mean Absolute Percentage Error (MAPE)\n\n    $$\n    MAPE=\\frac{1}{n}\\sum_{i=1}^n\\frac{|model_i - obs_i|}{obs_i}*100\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n    \"\"\"\n\n    assert obs.size == model.size\n\n    if len(obs) == 0:\n        return np.nan\n    if np.any(obs == 0.0):\n        warnings.warn(\"Observation is zero, consider to use another metric than MAPE\")\n        return np.nan  # TODO is it better to return a large value +inf than NaN?\n\n    return np.mean(np.abs((obs - model) / obs)) * 100\n
"},{"location":"api/metrics/#modelskill.metrics.mef","title":"mef","text":"
mef(obs, model)\n

alias for model_efficiency_factor

Source code in modelskill/metrics.py
def mef(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for model_efficiency_factor\"\"\"\n    return model_efficiency_factor(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.metric_has_units","title":"metric_has_units","text":"
metric_has_units(metric)\n

Check if a metric has units (dimension).

Some metrics are dimensionless, others have the same dimension as the observations.

Parameters:

Name Type Description Default metric str or callable

Metric name or function

required

Returns:

Type Description bool

True if metric has a dimension, False otherwise

Examples:

>>> metric_has_units(\"rmse\")\nTrue\n>>> metric_has_units(\"kge\")\nFalse\n
Source code in modelskill/metrics.py
def metric_has_units(metric: Union[str, Callable]) -> bool:\n    \"\"\"Check if a metric has units (dimension).\n\n    Some metrics are dimensionless, others have the same dimension as the observations.\n\n    Parameters\n    ----------\n    metric : str or callable\n        Metric name or function\n\n    Returns\n    -------\n    bool\n        True if metric has a dimension, False otherwise\n\n    Examples\n    --------\n    >>> metric_has_units(\"rmse\")\n    True\n    >>> metric_has_units(\"kge\")\n    False\n    \"\"\"\n    if hasattr(metric, \"__name__\"):\n        name = metric.__name__\n    else:\n        name = metric\n\n    if name not in defined_metrics:\n        raise ValueError(f\"Metric {name} not defined. Choose from {defined_metrics}\")\n\n    return name in METRICS_WITH_DIMENSION\n
"},{"location":"api/metrics/#modelskill.metrics.model_efficiency_factor","title":"model_efficiency_factor","text":"
model_efficiency_factor(obs, model)\n

Model Efficiency Factor (MEF)

Scale independent RMSE, standardized by Stdev of observations

\\[ MEF = \\frac{RMSE}{STDEV}=\\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^n(model_i - obs_i)^2}} {\\sqrt{\\frac{1}{n} \\sum_{i=1}^n(obs_i - \\overline{obs})^2}}=\\sqrt{1-NSE} \\]

Range: \\([0, \\infty)\\); Best: 0

See Also

nash_sutcliffe_efficiency root_mean_squared_error

Source code in modelskill/metrics.py
def model_efficiency_factor(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Model Efficiency Factor (MEF)\n\n    Scale independent RMSE, standardized by Stdev of observations\n\n    $$\n    MEF = \\frac{RMSE}{STDEV}=\\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^n(model_i - obs_i)^2}}\n                                    {\\sqrt{\\frac{1}{n} \\sum_{i=1}^n(obs_i - \\overline{obs})^2}}=\\sqrt{1-NSE}\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n\n    See Also\n    --------\n    nash_sutcliffe_efficiency\n    root_mean_squared_error\n\n    \"\"\"\n    assert obs.size == model.size\n\n    return rmse(obs, model) / obs.std()\n
"},{"location":"api/metrics/#modelskill.metrics.nash_sutcliffe_efficiency","title":"nash_sutcliffe_efficiency","text":"
nash_sutcliffe_efficiency(obs, model)\n

Nash-Sutcliffe Efficiency (NSE)

\\[ NSE = 1 - \\frac {\\sum _{i=1}^{n}\\left(model_{i} - obs_{i}\\right)^{2}} {\\sum_{i=1}^{n}\\left(obs_{i} - {\\overline{obs}}\\right)^{2}} \\]

Range: \\((-\\infty, 1]\\); Best: 1

Note

r2 = nash_sutcliffe_efficiency(nse)

References

Nash, J. E.; Sutcliffe, J. V. (1970). \"River flow forecasting through conceptual models part I \u2014 A discussion of principles\". Journal of Hydrology. 10 (3): 282\u2013290. https://doi.org/10.1016/0022-1694(70)90255-6

Source code in modelskill/metrics.py
def nash_sutcliffe_efficiency(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Nash-Sutcliffe Efficiency (NSE)\n\n    $$\n    NSE = 1 - \\frac {\\sum _{i=1}^{n}\\left(model_{i} - obs_{i}\\right)^{2}}\n                    {\\sum_{i=1}^{n}\\left(obs_{i} - {\\overline{obs}}\\right)^{2}}\n    $$\n\n    Range: $(-\\infty, 1]$; Best: 1\n\n    Note\n    ----\n    r2 = nash_sutcliffe_efficiency(nse)\n\n    References\n    ----------\n    Nash, J. E.; Sutcliffe, J. V. (1970). \"River flow forecasting through conceptual models part I \u2014 A discussion of principles\". Journal of Hydrology. 10 (3): 282\u2013290. <https://doi.org/10.1016/0022-1694(70)90255-6>\n    \"\"\"\n    assert obs.size == model.size\n\n    if len(obs) == 0:\n        return np.nan\n    error = 1 - (np.sum((obs - model) ** 2) / np.sum((obs - np.mean(obs)) ** 2))  # type: ignore\n\n    return error\n
"},{"location":"api/metrics/#modelskill.metrics.nse","title":"nse","text":"
nse(obs, model)\n

alias for nash_sutcliffe_efficiency

Source code in modelskill/metrics.py
def nse(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for nash_sutcliffe_efficiency\"\"\"\n    return nash_sutcliffe_efficiency(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.peak_ratio","title":"peak_ratio","text":"
peak_ratio(obs, model, inter_event_level=0.7, AAP=2, inter_event_time='36h')\n

Peak Ratio

PR is the mean of the largest-N individual ratios of identified peaks in the model / identified peaks in the measurements (N number of events defined by AAP). PR is calculated only for the joint-events, ie, events that ocurr simulateneously within a window +/- 0.5*inter_event_time.

Parameters:

Name Type Description Default inter_event_level float

Inter-event level threshold (default: 0.7).

0.7 AAP Union[int, float]

Average Annual Peaks (ie, Number of peaks per year, on average). (default: 2)

2 inter_event_time str
Maximum time interval between peaks (default: 36 hours).\n
'36h' Range required Source code in modelskill/metrics.py
def peak_ratio(\n    obs: pd.Series,\n    model: np.ndarray,\n    inter_event_level: float = 0.7,\n    AAP: Union[int, float] = 2,\n    inter_event_time: str = \"36h\",\n) -> Any:\n    r\"\"\"Peak Ratio\n\n    PR is the mean of the largest-N individual ratios of identified peaks in the\n    model / identified peaks in the measurements (N number of events defined by AAP). PR is calculated only for the joint-events,\n    ie, events that ocurr simulateneously within a window +/- 0.5*inter_event_time.\n\n    Parameters\n    ----------\n    inter_event_level (float, optional)\n        Inter-event level threshold (default: 0.7).\n    AAP (int or float, optional)\n        Average Annual Peaks (ie, Number of peaks per year, on average). (default: 2)\n    inter_event_time (str, optional)\n            Maximum time interval between peaks (default: 36 hours).\n\n    $$\n    \\frac{\\sum_{i=1}^{N_{joint-peaks}} (\\frac{Peak_{model_i}}{Peak_{obs_i}} )}{N_{joint-peaks}}\n    $$\n\n    Range: $[0, \\infty)$; Best: 1.0\n    \"\"\"\n\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n    assert isinstance(obs.index, pd.DatetimeIndex)\n    time = obs.index\n\n    # Calculate number of years\n    dt_int = time[1:].values - time[0:-1].values\n    dt_int_mode = float(stats.mode(dt_int, keepdims=False)[0]) / 1e9  # in seconds\n    N_years = dt_int_mode / 24 / 3600 / 365.25 * len(time)\n    peak_index, AAP_ = _partial_duration_series(\n        time,\n        obs,\n        inter_event_level=inter_event_level,\n        AAP=AAP,\n        inter_event_time=inter_event_time,\n    )\n    peaks = obs[peak_index]\n    found_peaks_obs = peaks.sort_values(ascending=False)\n\n    peak_index, _ = _partial_duration_series(\n        time,\n        model,\n        inter_event_level=inter_event_level,\n        AAP=AAP,\n        inter_event_time=inter_event_time,\n    )\n    peaks = model[peak_index]\n    found_peaks_mod = peaks.sort_values(ascending=False)\n\n    top_n_peaks = max(1, min(round(AAP_ * N_years), np.sum(peaks)))\n    # Resample~ish, find peaks spread maximum Half the inter event time (if inter event =36, select data paired +/- 18h) (or inter_event) and then select\n    indices_mod = (\n        abs(found_peaks_obs.index.values[:, None] - found_peaks_mod.index.values)\n        < pd.Timedelta(inter_event_time) / 2\n    ).any(axis=0)\n    indices_obs = (\n        abs(found_peaks_mod.index.values[:, None] - found_peaks_obs.index.values)\n        < pd.Timedelta(inter_event_time) / 2\n    ).any(axis=0)\n    # Find intersection (co-existing peaks, still a large number, O(1000s))\n    obs_joint = found_peaks_obs.loc[indices_obs]\n    mod_joint = found_peaks_mod.loc[indices_mod]\n    # Now we forget about time index, as peaks have been paired already.\n    df_filter = pd.DataFrame(\n        data={\n            \"model\": mod_joint.sort_index().values,\n            \"observation\": obs_joint.sort_index().values,\n        }\n    )\n    df_filter[\"Maximum\"] = df_filter.max(axis=1)\n    df_filter.sort_values(by=\"Maximum\", ascending=False, inplace=True)\n    # Finally we do the selection of the N- largest peaks from either model or measured\n    df_filter = df_filter.iloc[0:top_n_peaks, :]\n    # Rename to avoid further refactoring\n    obs_joint = df_filter.loc[:, \"observation\"]\n    mod_joint = df_filter.loc[:, \"model\"]\n\n    if len(obs_joint) == 0 or len(mod_joint) == 0:\n        return np.nan\n    res = np.mean(mod_joint.values / obs_joint.values)\n    return res\n
"},{"location":"api/metrics/#modelskill.metrics.pr","title":"pr","text":"
pr(obs, model, inter_event_level=0.7, AAP=2, inter_event_time='36h')\n

alias for peak_ratio

Source code in modelskill/metrics.py
def pr(\n    obs: pd.Series,\n    model: np.ndarray,\n    inter_event_level: float = 0.7,\n    AAP: Union[int, float] = 2,\n    inter_event_time: str = \"36h\",\n) -> Any:\n    \"\"\"alias for peak_ratio\"\"\"\n    assert obs.size == model.size\n    return peak_ratio(obs, model, inter_event_level, AAP, inter_event_time)\n
"},{"location":"api/metrics/#modelskill.metrics.r2","title":"r2","text":"
r2(obs, model)\n

Coefficient of determination (R2)

Pronounced 'R-squared'; the proportion of the variation in the dependent variable that is predictable from the independent variable(s), i.e. the proportion of explained variance.

\\[ R^2 = 1 - \\frac{\\sum_{i=1}^n (model_i - obs_i)^2} {\\sum_{i=1}^n (obs_i - \\overline {obs})^2} \\]

Range: \\((-\\infty, 1]\\); Best: 1

Note

r2 = nash_sutcliffe_efficiency(nse)

Examples:

>>> obs = np.array([1.0,1.1,1.2,1.3,1.4])\n>>> model = np.array([1.09, 1.16, 1.3 , 1.38, 1.49])\n>>> r2(obs,model)\nnp.float64(0.6379999999999998)\n
Source code in modelskill/metrics.py
def r2(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Coefficient of determination (R2)\n\n    Pronounced 'R-squared'; the proportion of the variation in the dependent variable that is predictable from the independent variable(s), i.e. the proportion of explained variance.\n\n    $$\n    R^2 = 1 - \\frac{\\sum_{i=1}^n (model_i - obs_i)^2}\n                    {\\sum_{i=1}^n (obs_i - \\overline {obs})^2}\n    $$\n\n    Range: $(-\\infty, 1]$; Best: 1\n\n    Note\n    ----\n    r2 = nash_sutcliffe_efficiency(nse)\n\n    Examples\n    --------\n    >>> obs = np.array([1.0,1.1,1.2,1.3,1.4])\n    >>> model = np.array([1.09, 1.16, 1.3 , 1.38, 1.49])\n    >>> r2(obs,model)\n    np.float64(0.6379999999999998)\n    \"\"\"\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n\n    residual = model - obs\n    SSr: Any = np.sum(residual**2)\n    SSt: Any = np.sum((obs - obs.mean()) ** 2)\n\n    return 1 - SSr / SSt\n
"},{"location":"api/metrics/#modelskill.metrics.rho","title":"rho","text":"
rho(obs, model)\n

alias for spearmanr

Source code in modelskill/metrics.py
def rho(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for spearmanr\"\"\"\n    return spearmanr(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.rmse","title":"rmse","text":"
rmse(obs, model, weights=None, unbiased=False)\n

alias for root_mean_squared_error

Source code in modelskill/metrics.py
def rmse(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n    unbiased: bool = False,\n) -> Any:\n    \"\"\"alias for root_mean_squared_error\"\"\"\n    return root_mean_squared_error(obs, model, weights, unbiased)\n
"},{"location":"api/metrics/#modelskill.metrics.root_mean_squared_error","title":"root_mean_squared_error","text":"
root_mean_squared_error(obs, model, weights=None, unbiased=False)\n

Root Mean Squared Error (RMSE)

\\[ res_i = model_i - obs_i \\] \\[ RMSE=\\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_i^2} \\]

Unbiased version:

\\[ res_{u,i} = res_i - \\overline {res} \\] \\[ uRMSE=\\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_{u,i}^2} \\]

Range: \\([0, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def root_mean_squared_error(\n    obs: np.ndarray,\n    model: np.ndarray,\n    weights: Optional[np.ndarray] = None,\n    unbiased: bool = False,\n) -> Any:\n    r\"\"\"Root Mean Squared Error (RMSE)\n\n    $$\n    res_i = model_i - obs_i\n    $$\n\n    $$\n    RMSE=\\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_i^2}\n    $$\n\n    Unbiased version:\n\n    $$\n    res_{u,i} = res_i - \\overline {res}\n    $$\n\n    $$\n    uRMSE=\\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_{u,i}^2}\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n\n    \"\"\"\n    assert obs.size == model.size\n\n    residual = obs - model\n    if unbiased:\n        residual = residual - residual.mean()\n    error = np.sqrt(np.average(residual**2, weights=weights))\n\n    return error\n
"},{"location":"api/metrics/#modelskill.metrics.scatter_index","title":"scatter_index","text":"
scatter_index(obs, model)\n

Scatter index (SI)

Which is the same as the unbiased-RMSE normalized by the absolute mean of the observations.

\\[ \\frac{ \\sqrt{ \\frac{1}{n} \\sum_{i=1}^n \\left( (model_i - \\overline {model}) - (obs_i - \\overline {obs}) \\right)^2} } {\\frac{1}{n} \\sum_{i=1}^n | obs_i | } \\]

Range: \\([0, \\infty)\\); Best: 0

Source code in modelskill/metrics.py
def scatter_index(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Scatter index (SI)\n\n    Which is the same as the unbiased-RMSE normalized by the absolute mean of the observations.\n\n    $$\n    \\frac{ \\sqrt{ \\frac{1}{n} \\sum_{i=1}^n \\left( (model_i - \\overline {model}) - (obs_i - \\overline {obs}) \\right)^2} }\n    {\\frac{1}{n} \\sum_{i=1}^n | obs_i | }\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n    \"\"\"\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n\n    residual = obs - model\n    residual = residual - residual.mean()  # unbiased\n    return np.sqrt(np.mean(residual**2)) / np.mean(np.abs(obs))\n
"},{"location":"api/metrics/#modelskill.metrics.scatter_index2","title":"scatter_index2","text":"
scatter_index2(obs, model)\n

Alternative formulation of the scatter index (SI)

\\[ \\sqrt {\\frac{\\sum_{i=1}^n \\left( (model_i - \\overline {model}) - (obs_i - \\overline {obs}) \\right)^2} {\\sum_{i=1}^n obs_i^2}} \\]

Range: [0, 100]; Best: 0

Source code in modelskill/metrics.py
def scatter_index2(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Alternative formulation of the scatter index (SI)\n\n    $$\n    \\sqrt {\\frac{\\sum_{i=1}^n \\left( (model_i - \\overline {model}) - (obs_i - \\overline {obs}) \\right)^2}\n    {\\sum_{i=1}^n obs_i^2}}\n    $$\n\n    Range: [0, 100]; Best: 0\n    \"\"\"\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n\n    return np.sqrt(\n        np.sum(((model - model.mean()) - (obs - obs.mean())) ** 2) / np.sum(obs**2)  # type: ignore\n    )\n
"},{"location":"api/metrics/#modelskill.metrics.si","title":"si","text":"
si(obs, model)\n

alias for scatter_index

Source code in modelskill/metrics.py
def si(obs: np.ndarray, model: np.ndarray) -> Any:\n    \"\"\"alias for scatter_index\"\"\"\n    return scatter_index(obs, model)\n
"},{"location":"api/metrics/#modelskill.metrics.spearmanr","title":"spearmanr","text":"
spearmanr(obs, model)\n

Spearman rank correlation coefficient

The rank correlation coefficient is similar to the Pearson correlation coefficient but applied to ranked quantities and is useful to quantify a monotonous relationship

\\[ \\rho = \\frac{\\sum_{i=1}^n (rmodel_i - \\overline{rmodel})(robs_i - \\overline{robs}) } {\\sqrt{\\sum_{i=1}^n (rmodel_i - \\overline{rmodel})^2} \\sqrt{\\sum_{i=1}^n (robs_i - \\overline{robs})^2} } \\]

Range: [-1, 1]; Best: 1

Examples:

>>> obs = np.linspace(-20, 20, 100)\n>>> mod = np.tanh(obs)\n>>> rho(obs, mod)\nnp.float64(0.9999759973116955)\n>>> spearmanr(obs, mod)\nnp.float64(0.9999759973116955)\n
See Also

corrcoef

Source code in modelskill/metrics.py
def spearmanr(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Spearman rank correlation coefficient\n\n    The rank correlation coefficient is similar to the Pearson correlation coefficient but\n    applied to ranked quantities and is useful to quantify a monotonous relationship\n\n    $$\n    \\rho = \\frac{\\sum_{i=1}^n (rmodel_i - \\overline{rmodel})(robs_i - \\overline{robs}) }\n                    {\\sqrt{\\sum_{i=1}^n (rmodel_i - \\overline{rmodel})^2}\n                    \\sqrt{\\sum_{i=1}^n (robs_i - \\overline{robs})^2} }\n    $$\n\n    Range: [-1, 1]; Best: 1\n\n    Examples\n    --------\n    >>> obs = np.linspace(-20, 20, 100)\n    >>> mod = np.tanh(obs)\n    >>> rho(obs, mod)\n    np.float64(0.9999759973116955)\n    >>> spearmanr(obs, mod)\n    np.float64(0.9999759973116955)\n\n    See Also\n    --------\n    corrcoef\n    \"\"\"\n    import scipy.stats\n\n    return scipy.stats.spearmanr(obs, model)[0]\n
"},{"location":"api/metrics/#modelskill.metrics.urmse","title":"urmse","text":"
urmse(obs, model, weights=None)\n

Unbiased Root Mean Squared Error (uRMSE)

\\[ res_i = model_i - obs_i \\] \\[ res_{u,i} = res_i - \\overline {res} \\] \\[ uRMSE = \\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_{u,i}^2} \\]

Range: \\([0, \\infty)\\); Best: 0

See Also

root_mean_squared_error

Source code in modelskill/metrics.py
def urmse(\n    obs: np.ndarray, model: np.ndarray, weights: Optional[np.ndarray] = None\n) -> Any:\n    r\"\"\"Unbiased Root Mean Squared Error (uRMSE)\n\n    $$\n    res_i = model_i - obs_i\n    $$\n\n    $$\n    res_{u,i} = res_i - \\overline {res}\n    $$\n\n    $$\n    uRMSE = \\sqrt{\\frac{1}{n} \\sum_{i=1}^n res_{u,i}^2}\n    $$\n\n    Range: $[0, \\infty)$; Best: 0\n\n    See Also\n    --------\n    root_mean_squared_error\n    \"\"\"\n    return root_mean_squared_error(obs, model, weights, unbiased=True)\n
"},{"location":"api/metrics/#modelskill.metrics.willmott","title":"willmott","text":"
willmott(obs, model)\n

Willmott's Index of Agreement

A scaled representation of the predictive accuracy of the model against observations. A value of 1 indicates a perfect match, and 0 indicates no agreement at all.

\\[ willmott = 1 - \\frac{\\frac{1}{n} \\sum_{i=1}^n(model_i - obs_i)^2} {\\frac{1}{n} \\sum_{i=1}^n(|model_i - \\overline{obs}| + |obs_i - \\overline{obs}|)^2} \\]

Range: [0, 1]; Best: 1

Examples:

>>> obs = np.array([1.0, 1.1, 1.2, 1.3, 1.4, 1.4, 1.3])\n>>> model = np.array([1.02, 1.16, 1.3, 1.38, 1.49, 1.45, 1.32])\n>>> willmott(obs, model)\nnp.float64(0.9501403174479723)\n
References

Willmott, C. J. 1981. \"On the validation of models\". Physical Geography, 2, 184\u2013194.

Source code in modelskill/metrics.py
def willmott(obs: np.ndarray, model: np.ndarray) -> Any:\n    r\"\"\"Willmott's Index of Agreement\n\n    A scaled representation of the predictive accuracy of the model against observations. A value of 1 indicates a perfect match, and 0 indicates no agreement at all.\n\n    $$\n    willmott = 1 - \\frac{\\frac{1}{n} \\sum_{i=1}^n(model_i - obs_i)^2}\n                        {\\frac{1}{n} \\sum_{i=1}^n(|model_i - \\overline{obs}| + |obs_i - \\overline{obs}|)^2}\n    $$\n\n    Range: [0, 1]; Best: 1\n\n    Examples\n    --------\n    >>> obs = np.array([1.0, 1.1, 1.2, 1.3, 1.4, 1.4, 1.3])\n    >>> model = np.array([1.02, 1.16, 1.3, 1.38, 1.49, 1.45, 1.32])\n    >>> willmott(obs, model)\n    np.float64(0.9501403174479723)\n\n    References\n    ----------\n    Willmott, C. J. 1981. \"On the validation of models\". Physical Geography, 2, 184\u2013194.\n    \"\"\"\n\n    assert obs.size == model.size\n    if len(obs) == 0:\n        return np.nan\n\n    residual = model - obs\n    nominator: Any = np.sum(residual**2)\n    denominator: Any = np.sum(\n        (np.abs(model - obs.mean()) + np.abs(obs - obs.mean())) ** 2\n    )\n\n    return 1 - nominator / denominator\n
"},{"location":"api/plotting/","title":"Plotting","text":""},{"location":"api/plotting/#modelskill.plotting","title":"modelskill.plotting","text":"

The plotting module provides functions useful for skill assessment that can be used independently of the comparison module.

  • scatter is a function that can be used to plot a scatter suitable for skill assessment, with a 1:1 line and a linear regression line.
  • wind_rose is a function that can be used to plot a dual wind rose to compare two datasets of magnitudes and directions.
  • spatial_overview is a function that can be used to plot a spatial overview of two datasets.
  • temporal_coverage is a function that can be used to plot the temporal coverage of two datasets.
"},{"location":"api/plotting/#modelskill.plotting.scatter","title":"scatter","text":"
scatter(x, y, *, bins=120, quantiles=None, fit_to_quantiles=False, show_points=None, show_hist=None, show_density=None, norm=None, backend='matplotlib', figsize=(8, 8), xlim=None, ylim=None, reg_method='ols', title='', xlabel='', ylabel='', skill_table=False, skill_scores=None, skill_score_unit='', ax=None, **kwargs)\n

Scatter plot showing compared data: observation vs modelled Optionally, with density histogram.

Parameters:

Name Type Description Default x ndarray

X values e.g model values, must be same length as y

required y ndarray

Y values e.g observation values, must be same length as x

required bins int | float

bins for the 2D histogram on the background. By default 120 bins. if int, represents the number of bins of 2D if float, represents the bin size if sequence (list of int or float), represents the bin edges

120 quantiles int | Sequence[float] | None

number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000) if int, this is the number of points if sequence (list of floats), represents the desired quantiles (from 0 to 1)

None fit_to_quantiles bool

by default the regression line is fitted to all data, if True, it is fitted to the quantiles which can be useful to represent the extremes of the distribution, by default False

False show_points (bool, int, float)

Should the scatter points be displayed? None means: show all points if fewer than 1e4, otherwise show 1e4 sample points, by default None. float: fraction of points to show on plot from 0 to 1. eg 0.5 shows 50% of the points. int: if 'n' (int) given, then 'n' points will be displayed, randomly selected.

None show_hist bool

show the data density as a 2d histogram, by default None

None show_density Optional[bool]

show the data density as a colormap of the scatter, by default None. If both show_density and show_hist are None, then show_density is used by default. for binning the data, the previous kword bins=Float is used

None norm Normalize

colormap normalization If None, defaults to matplotlib.colors.PowerNorm(vmin=1,gamma=0.5)

None backend str

use \"plotly\" (interactive) or \"matplotlib\" backend, by default \"matplotlib\"

'matplotlib' figsize tuple

width and height of the figure, by default (8, 8)

(8, 8) xlim tuple

plot range for the observation (xmin, xmax), by default None

None ylim tuple

plot range for the model (ymin, ymax), by default None

None reg_method str or bool

method for determining the regression line \"ols\" : ordinary least squares regression \"odr\" : orthogonal distance regression, False : no regression line by default \"ols\"

'ols' title str

plot title, by default None

'' xlabel str

x-label text on plot, by default None

'' ylabel str

y-label text on plot, by default None

'' skill_table Optional[str | Sequence[str] | bool]

calculate skill scores and show in box next to the plot, True will show default metrics, list of metrics will show these skill scores, by default False, Note: cannot be used together with skill_scores argument

False skill_scores dict[str, float]

dictionary with skill scores to be shown in box next to the plot, by default None Note: cannot be used together with skill_table argument

None skill_score_unit str

unit for skill_scores, by default None

'' ax Axes

axes to plot on, by default None

None **kwargs {}

Returns:

Type Description Axes

The axes on which the scatter plot was drawn.

Source code in modelskill/plotting/_scatter.py
def scatter(\n    x: np.ndarray,\n    y: np.ndarray,\n    *,\n    bins: int | float = 120,\n    quantiles: int | Sequence[float] | None = None,\n    fit_to_quantiles: bool = False,\n    show_points: bool | int | float | None = None,\n    show_hist: Optional[bool] = None,\n    show_density: Optional[bool] = None,\n    norm: Optional[colors.Normalize] = None,\n    backend: Literal[\"matplotlib\", \"plotly\"] = \"matplotlib\",\n    figsize: Tuple[float, float] = (8, 8),\n    xlim: Optional[Tuple[float, float]] = None,\n    ylim: Optional[Tuple[float, float]] = None,\n    reg_method: str | bool = \"ols\",\n    title: str = \"\",\n    xlabel: str = \"\",\n    ylabel: str = \"\",\n    skill_table: Optional[str | Sequence[str] | bool] = False,\n    skill_scores: Mapping[str, float] | None = None,\n    skill_score_unit: Optional[str] = \"\",\n    ax: Optional[Axes] = None,\n    **kwargs,\n) -> Axes:\n    \"\"\"Scatter plot showing compared data: observation vs modelled\n    Optionally, with density histogram.\n\n    Parameters\n    ----------\n    x: np.array\n        X values e.g model values, must be same length as y\n    y: np.array\n        Y values e.g observation values, must be same length as x\n    bins: (int, float, sequence), optional\n        bins for the 2D histogram on the background. By default 120 bins.\n        if int, represents the number of bins of 2D\n        if float, represents the bin size\n        if sequence (list of int or float), represents the bin edges\n    quantiles: (int, sequence), optional\n        number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000)\n        if int, this is the number of points\n        if sequence (list of floats), represents the desired quantiles (from 0 to 1)\n    fit_to_quantiles: bool, optional\n        by default the regression line is fitted to all data, if True, it is fitted to the quantiles\n        which can be useful to represent the extremes of the distribution, by default False\n    show_points : (bool, int, float), optional\n        Should the scatter points be displayed?\n        None means: show all points if fewer than 1e4, otherwise show 1e4 sample points, by default None.\n        float: fraction of points to show on plot from 0 to 1. eg 0.5 shows 50% of the points.\n        int: if 'n' (int) given, then 'n' points will be displayed, randomly selected.\n    show_hist : bool, optional\n        show the data density as a 2d histogram, by default None\n    show_density: bool, optional\n        show the data density as a colormap of the scatter, by default None. If both `show_density` and `show_hist`\n        are None, then `show_density` is used by default.\n        for binning the data, the previous kword `bins=Float` is used\n    norm : matplotlib.colors.Normalize\n        colormap normalization\n        If None, defaults to matplotlib.colors.PowerNorm(vmin=1,gamma=0.5)\n    backend : str, optional\n        use \"plotly\" (interactive) or \"matplotlib\" backend, by default \"matplotlib\"\n    figsize : tuple, optional\n        width and height of the figure, by default (8, 8)\n    xlim : tuple, optional\n        plot range for the observation (xmin, xmax), by default None\n    ylim : tuple, optional\n        plot range for the model (ymin, ymax), by default None\n    reg_method : str or bool, optional\n        method for determining the regression line\n        \"ols\" : ordinary least squares regression\n        \"odr\" : orthogonal distance regression,\n        False : no regression line\n        by default \"ols\"\n    title : str, optional\n        plot title, by default None\n    xlabel : str, optional\n        x-label text on plot, by default None\n    ylabel : str, optional\n        y-label text on plot, by default None\n    skill_table: str, List[str], bool, optional\n        calculate skill scores and show in box next to the plot,\n        True will show default metrics, list of metrics will show\n        these skill scores, by default False,\n        Note: cannot be used together with skill_scores argument\n    skill_scores : dict[str, float], optional\n        dictionary with skill scores to be shown in box next to\n        the plot, by default None\n        Note: cannot be used together with skill_table argument\n    skill_score_unit : str, optional\n        unit for skill_scores, by default None\n    ax : matplotlib.axes.Axes, optional\n        axes to plot on, by default None\n    **kwargs\n\n    Returns\n    -------\n    matplotlib.axes.Axes\n        The axes on which the scatter plot was drawn.\n    \"\"\"\n    if \"skill_df\" in kwargs:\n        warnings.warn(\n            \"The `skill_df` keyword argument is deprecated. Use `skill_scores` instead.\",\n            FutureWarning,\n        )\n        skill_scores = kwargs.pop(\"skill_df\").to_dict(\"records\")[0]\n\n    if show_hist is None and show_density is None:\n        # Default: points density\n        show_density = True\n\n    if len(x) != len(y):\n        raise ValueError(\"x & y are not of equal length\")\n\n    if norm is None:\n        norm = colors.PowerNorm(vmin=1, gamma=0.5)\n\n    x_sample, y_sample = sample_points(x, y, show_points)\n    xq, yq = quantiles_xy(x, y, quantiles)\n\n    xmin, xmax = x.min(), x.max()\n    ymin, ymax = y.min(), y.max()\n    xymin = min([xmin, ymin])\n    xymax = max([xmax, ymax])\n\n    nbins_hist, binsize = _get_bins(bins, xymin=xymin, xymax=xymax)\n\n    if xlim is None:\n        xlim = (xymin - binsize, xymax + binsize)\n\n    if ylim is None:\n        ylim = (xymin - binsize, xymax + binsize)\n\n    x_trend = np.array([xlim[0], xlim[1]])\n\n    if show_hist and show_density:\n        raise TypeError(\n            \"if `show_hist=True` then `show_density` must be either `False` or `None`\"\n        )\n\n    z = None\n    if show_density and len(x_sample) > 0:\n        if not isinstance(bins, (float, int)):\n            raise TypeError(\n                \"if `show_density=True` then bins must be either float or int\"\n            )\n\n        # calculate density data\n        z = __scatter_density(x_sample, y_sample, binsize=binsize)\n        idx = z.argsort()\n        # Sort data by colormaps\n        x_sample, y_sample, z = x_sample[idx], y_sample[idx], z[idx]\n        # scale Z by sample size\n        z = z * len(x) / len(x_sample)\n\n    PLOTTING_BACKENDS: dict[str, Callable] = {\n        \"matplotlib\": _scatter_matplotlib,\n        \"plotly\": _scatter_plotly,\n    }\n\n    if backend not in PLOTTING_BACKENDS:\n        raise ValueError(f\"backend must be one of {list(PLOTTING_BACKENDS.keys())}\")\n\n    if skill_table:\n        from modelskill import from_matched\n\n        if skill_scores is not None:\n            raise ValueError(\n                \"Cannot pass skill_scores and skill_table at the same time\"\n            )\n        df = pd.DataFrame({\"obs\": x, \"model\": y})\n        cmp = from_matched(df)\n        metrics = None if skill_table is True else skill_table\n        skill = cmp.skill(metrics=metrics)\n        skill_scores = skill.to_dict(\"records\")[0]\n\n    return PLOTTING_BACKENDS[backend](\n        x=x,\n        y=y,\n        x_sample=x_sample,\n        y_sample=y_sample,\n        z=z,\n        xq=xq,\n        yq=yq,\n        x_trend=x_trend,\n        show_density=show_density,\n        norm=norm,\n        show_points=show_points,\n        show_hist=show_hist,\n        nbins_hist=nbins_hist,\n        reg_method=reg_method,\n        xlabel=xlabel,\n        ylabel=ylabel,\n        figsize=figsize,\n        xlim=xlim,\n        ylim=ylim,\n        title=title,\n        skill_scores=skill_scores,\n        skill_score_unit=skill_score_unit,\n        fit_to_quantiles=fit_to_quantiles,\n        ax=ax,\n        **kwargs,\n    )\n
"},{"location":"api/plotting/#modelskill.plotting.spatial_overview","title":"spatial_overview","text":"
spatial_overview(obs, mod=None, ax=None, figsize=None, title=None)\n

Plot observation points on a map showing the model domain

Parameters:

Name Type Description Default obs List[Observation]

List of observations to be shown on map

required mod Union[ModelResult, GeometryFM]

Model domain to be shown as outline

None ax

Adding to existing axis, instead of creating new fig

None figsize (float, float)

figure size, by default None

None title Optional[str]

plot title, default empty

None See Also

temporal_coverage

Returns:

Type Description Axes

The matplotlib axes object

Examples:

>>> import modelskill as ms\n>>> o1 = ms.PointObservation('HKNA_Hm0.dfs0', item=0, x=4.2420, y=52.6887, name=\"HKNA\")\n>>> o2 = ms.TrackObservation(\"Alti_c2_Dutch.dfs0\", item=3, name=\"c2\")\n>>> mr1 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast.dfsu', name='SW_1', item=0)\n>>> mr2 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast_v2.dfsu', name='SW_2', item=0)\n>>> ms.plotting.spatial_overview([o1, o2], [mr1, mr2])\n
Source code in modelskill/plotting/_spatial_overview.py
def spatial_overview(\n    obs: List[Observation],\n    mod=None,\n    ax=None,\n    figsize: Optional[Tuple] = None,\n    title: Optional[str] = None,\n) -> matplotlib.axes.Axes:\n    \"\"\"Plot observation points on a map showing the model domain\n\n    Parameters\n    ----------\n    obs: list[Observation]\n        List of observations to be shown on map\n    mod : Union[ModelResult, mikeio.GeometryFM], optional\n        Model domain to be shown as outline\n    ax: matplotlib.axes, optional\n        Adding to existing axis, instead of creating new fig\n    figsize : (float, float), optional\n        figure size, by default None\n    title: str, optional\n        plot title, default empty\n\n    See Also\n    --------\n    temporal_coverage\n\n    Returns\n    -------\n    matplotlib.axes.Axes\n        The matplotlib axes object\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> o1 = ms.PointObservation('HKNA_Hm0.dfs0', item=0, x=4.2420, y=52.6887, name=\"HKNA\")\n    >>> o2 = ms.TrackObservation(\"Alti_c2_Dutch.dfs0\", item=3, name=\"c2\")\n    >>> mr1 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast.dfsu', name='SW_1', item=0)\n    >>> mr2 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast_v2.dfsu', name='SW_2', item=0)\n    >>> ms.plotting.spatial_overview([o1, o2], [mr1, mr2])\n    \"\"\"\n    obs = [] if obs is None else list(obs) if isinstance(obs, Sequence) else [obs]  # type: ignore\n    mod = [] if mod is None else list(mod) if isinstance(mod, Sequence) else [mod]  # type: ignore\n\n    ax = _get_ax(ax=ax, figsize=figsize)\n    offset_x = 1  # TODO: better default\n\n    for m in mod:\n        # TODO: support Gridded ModelResults\n        if isinstance(m, (PointModelResult, TrackModelResult)):\n            raise ValueError(\n                f\"Model type {type(m)} not supported. Only DfsuModelResult and mikeio.GeometryFM supported!\"\n            )\n        if hasattr(m, \"data\") and hasattr(m.data, \"geometry\"):\n            # mod_name = m.name  # TODO: better support for multiple models\n            m = m.data.geometry\n        if hasattr(m, \"node_coordinates\"):\n            xn = m.node_coordinates[:, 0]\n            offset_x = 0.02 * (max(xn) - min(xn))\n        m.plot.outline(ax=ax)\n\n    for o in obs:\n        if isinstance(o, PointObservation):\n            ax.scatter(x=o.x, y=o.y, marker=\"x\")\n            ax.annotate(o.name, (o.x + offset_x, o.y))  # type: ignore\n        elif isinstance(o, TrackObservation):\n            if o.n_points < 10000:\n                ax.scatter(x=o.x, y=o.y, c=o.values, marker=\".\", cmap=\"Reds\")\n            else:\n                print(f\"{o.name}: Too many points to plot\")\n                # TODO: group by lonlat bin or sample randomly\n        else:\n            raise ValueError(\n                f\"Could not show observation {o}. Only PointObservation and TrackObservation supported.\"\n            )\n\n    if not title:\n        title = \"Spatial coverage\"\n    ax.set_title(title)\n\n    return ax\n
"},{"location":"api/plotting/#modelskill.plotting.taylor_diagram","title":"taylor_diagram","text":"
taylor_diagram(obs_std, points, figsize=(7, 7), obs_text='Observations', normalize_std=False, ax=None, title='Taylor diagram')\n

Plot a Taylor diagram using the given observations and points.

Parameters:

Name Type Description Default obs_std float

Standard deviation of the observations.

required points list of TaylorPoint objects or a single TaylorPoint object

Points to plot on the Taylor diagram.

required figsize tuple

Figure size in inches. Default is (7, 7).

(7, 7) obs_text str

Label for the observations. Default is \"Observations\".

'Observations' normalize_std bool

Whether to normalize the standard deviation of the points by the standard deviation of the observations. Default is False.

False title str

Title of the plot. Default is \"Taylor diagram\".

'Taylor diagram'

Returns:

Type Description Figure

The matplotlib figure object

Source code in modelskill/plotting/_taylor_diagram.py
def taylor_diagram(\n    obs_std,\n    points,\n    figsize=(7, 7),\n    obs_text=\"Observations\",\n    normalize_std=False,\n    ax=None,\n    title=\"Taylor diagram\",\n) -> matplotlib.figure.Figure:\n    \"\"\"\n    Plot a Taylor diagram using the given observations and points.\n\n    Parameters\n    -----------\n    obs_std : float\n        Standard deviation of the observations.\n    points : list of TaylorPoint objects or a single TaylorPoint object\n        Points to plot on the Taylor diagram.\n    figsize : tuple, optional\n        Figure size in inches. Default is (7, 7).\n    obs_text : str, optional\n        Label for the observations. Default is \"Observations\".\n    normalize_std : bool, optional\n        Whether to normalize the standard deviation of the points by the standard deviation of the observations. Default is False.\n    title : str, optional\n        Title of the plot. Default is \"Taylor diagram\".\n\n    Returns\n    --------\n    matplotlib.figure.Figure\n            The matplotlib figure object\n    \"\"\"\n\n    if np.isscalar(figsize):\n        figsize = (figsize, figsize)\n    elif figsize[0] != figsize[1]:\n        warnings.warn(\n            \"It is strongly recommended that the aspect ratio is 1:1 for Taylor diagrams\"\n        )\n    fig = plt.figure(figsize=figsize)\n\n    # srange=(0, 1.5),\n    if len(obs_text) > 30:\n        obs_text = obs_text[:25] + \"...\"\n\n    td = TaylorDiagram(\n        obs_std, fig=fig, rect=111, label=obs_text, normalize_std=normalize_std\n    )\n    contours = td.add_contours(levels=8, colors=\"0.5\", linestyles=\"dotted\")\n    plt.clabel(contours, inline=1, fontsize=10, fmt=\"%.2f\")\n\n    if isinstance(points, TaylorPoint):\n        points = [points]\n    for p in points:\n        assert isinstance(p, TaylorPoint)\n        m = \"o\" if p.marker is None else p.marker\n        ms = \"6\" if p.marker_size is None else p.marker_size\n        std = p.std / p.obs_std if normalize_std else p.std\n        td.add_sample(std, p.cc, marker=m, ms=ms, ls=\"\", label=p.name)\n        # marker=f\"${1}$\",\n        # td.add_sample(0.2, 0.8, marker=\"+\", ms=15, mew=1.2, ls=\"\", label=\"m2\")\n    td.add_grid()\n    fig.legend(\n        td.samplePoints,\n        [p.get_label() for p in td.samplePoints],\n        numpoints=1,\n        prop=dict(size=\"medium\"),\n        loc=\"upper right\",\n    )\n    fig.suptitle(title, size=\"x-large\")\n\n    # prevent the plot from being displayed, since it is also displayed by the returned object\n    plt.close()\n    return fig\n
"},{"location":"api/plotting/#modelskill.plotting.temporal_coverage","title":"temporal_coverage","text":"
temporal_coverage(obs=None, mod=None, *, limit_to_model_period=True, marker='_', ax=None, figsize=None, title=None)\n

Plot graph showing temporal coverage for all observations and models

Parameters:

Name Type Description Default obs List[Observation]

Show observation(s) as separate lines on plot

None mod List[ModelResult]

Show model(s) as separate lines on plot, by default None

None limit_to_model_period bool

Show temporal coverage only for period covered by the model, by default True

True marker str

plot marker for observations, by default \"_\"

'_' ax

Adding to existing axis, instead of creating new fig

None figsize Tuple(float, float)

size of figure, by default (7, 0.45*n_lines)

None title

plot title, default empty

None See Also

spatial_overview

Returns:

Type Description Axes

The matplotlib axes object

Examples:

>>> import modelskill as ms\n>>> o1 = ms.PointObservation('HKNA_Hm0.dfs0', item=0, x=4.2420, y=52.6887, name=\"HKNA\")\n>>> o2 = ms.TrackObservation(\"Alti_c2_Dutch.dfs0\", item=3, name=\"c2\")\n>>> mr1 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast.dfsu', name='SW_1', item=0)\n>>> mr2 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast_v2.dfsu', name='SW_2', item=0)\n>>> ms.plotting.temporal_coverage([o1, o2], [mr1, mr2])\n>>> ms.plotting.temporal_coverage([o1, o2], mr2, limit_to_model_period=False)\n>>> ms.plotting.temporal_coverage(o2, [mr1, mr2], marker=\".\")\n>>> ms.plotting.temporal_coverage(mod=[mr1, mr2], figsize=(5,3))\n
Source code in modelskill/plotting/_temporal_coverage.py
def temporal_coverage(\n    obs=None,\n    mod=None,\n    *,\n    limit_to_model_period=True,\n    marker=\"_\",\n    ax=None,\n    figsize=None,\n    title=None,\n) -> matplotlib.axes.Axes:\n    \"\"\"Plot graph showing temporal coverage for all observations and models\n\n    Parameters\n    ----------\n    obs : List[Observation], optional\n        Show observation(s) as separate lines on plot\n    mod : List[ModelResult], optional\n        Show model(s) as separate lines on plot, by default None\n    limit_to_model_period : bool, optional\n        Show temporal coverage only for period covered\n        by the model, by default True\n    marker : str, optional\n        plot marker for observations, by default \"_\"\n    ax: matplotlib.axes, optional\n        Adding to existing axis, instead of creating new fig\n    figsize : Tuple(float, float), optional\n        size of figure, by default (7, 0.45*n_lines)\n    title: str, optional\n        plot title, default empty\n\n    See Also\n    --------\n    spatial_overview\n\n    Returns\n    -------\n    matplotlib.axes.Axes\n        The matplotlib axes object\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> o1 = ms.PointObservation('HKNA_Hm0.dfs0', item=0, x=4.2420, y=52.6887, name=\"HKNA\")\n    >>> o2 = ms.TrackObservation(\"Alti_c2_Dutch.dfs0\", item=3, name=\"c2\")\n    >>> mr1 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast.dfsu', name='SW_1', item=0)\n    >>> mr2 = ms.DfsuModelResult('HKZN_local_2017_DutchCoast_v2.dfsu', name='SW_2', item=0)\n    >>> ms.plotting.temporal_coverage([o1, o2], [mr1, mr2])\n    >>> ms.plotting.temporal_coverage([o1, o2], mr2, limit_to_model_period=False)\n    >>> ms.plotting.temporal_coverage(o2, [mr1, mr2], marker=\".\")\n    >>> ms.plotting.temporal_coverage(mod=[mr1, mr2], figsize=(5,3))\n    \"\"\"\n    obs = [] if obs is None else list(obs) if isinstance(obs, Sequence) else [obs]\n    mod = [] if mod is None else list(mod) if isinstance(mod, Sequence) else [mod]\n\n    n_lines = len(obs) + len(mod)\n    if figsize is None:\n        ysize = max(2.0, 0.45 * n_lines)\n        figsize = (7, ysize)\n\n    fig, ax = _get_fig_ax(ax=ax, figsize=figsize)\n    y = np.repeat(0.0, 2)\n    labels = []\n\n    if len(mod) > 0:\n        for mr in mod:\n            y += 1.0\n            plt.plot([mr.time[0], mr.time[-1]], y)\n            labels.append(mr.name)\n\n    for o in obs:\n        y += 1.0\n        plt.plot(o.time, y[0] * np.ones(len(o.time)), marker, markersize=5)\n        labels.append(o.name)\n\n    if len(mod) > 0 and limit_to_model_period:\n        mr = mod[0]  # take first model\n        plt.xlim([mr.time[0], mr.time[-1]])\n\n    plt.yticks(np.arange(n_lines) + 1, labels)\n    if len(mod) > 0:\n        for j in range(len(mod)):\n            ax.get_yticklabels()[j].set_fontstyle(\"italic\")\n            ax.get_yticklabels()[j].set_weight(\"bold\")\n            # set_color(\"#004165\")\n    fig.autofmt_xdate()\n\n    if title:\n        ax.set_title(title)\n    return ax\n
"},{"location":"api/plotting/#modelskill.plotting.wind_rose","title":"wind_rose","text":"
wind_rose(data, *, labels=('Measurement', 'Model'), mag_step=None, n_sectors=16, calm_threshold=None, calm_size=None, calm_text='Calm', r_step=0.1, r_max=None, legend=True, cmap1='viridis', cmap2='Greys', mag_bins=None, max_bin=None, n_dir_labels=None, secondary_dir_step_factor=2.0, figsize=(8, 8), ax=None, title=None)\n

Plots a (dual) wind (wave or current) roses with calms.

The size of the calm is determined by the primary (measurement) data.

Parameters:

Name Type Description Default data

array with 2 or 4 columns (magnitude, direction, magnitude2, direction2)

required labels

labels for the legend(s)

('Measurement', 'Model') mag_step Optional[float]

discretization for magnitude (delta_r, in radial direction )

None n_sectors int

number of directional sectors

16 calm_threshold Optional[float]

minimum value for data being counted as valid (i.e. below this is calm)

None calm_text str

text to display in calm.

'Calm' r_step float

radial axis discretization. By default 0.1 i.e. every 10%.

0.1 r_max Optional[float]

maximum radius (%) of plot, e.g. if 50% wanted then r_max=0.5

None max_bin Optional[float]

max value to truncate the data, e.g., max_bin=1.0 if hm0=1m is the desired final bin.

None mag_bins array of floats (optional) Default = None

force bins to array of values, e.g. when specifying non-equidistant bins.

None legend bool

show legend

True cmap1 string. Default= 'viridis'

colormap for main axis

'viridis' cmap2 string. Default= 'Greys'

colormap for secondary axis

'Greys' n_dir_labels int. Default= 4

number of labels in the polar plot, choose between 4, 8 or 16, default is to use the same as n_sectors

None secondary_dir_step_factor float. Default= 2.0

reduce width of secondary axis by this factor

2.0 figsize Tuple[float, float]

figure size

(8, 8) ax

Matplotlib axis to plot on defined as polar, it can be done using \"subplot_kw = dict(projection = 'polar')\". Default = None, new axis created.

None title

title of the plot

None

Returns:

Type Description Axes

Matplotlib axis with the plot

Source code in modelskill/plotting/_wind_rose.py
def wind_rose(\n    data,\n    *,\n    labels=(\"Measurement\", \"Model\"),\n    mag_step: Optional[float] = None,\n    n_sectors: int = 16,\n    calm_threshold: Optional[float] = None,  # TODO rename to vmin?\n    calm_size: Optional[float] = None,\n    calm_text: str = \"Calm\",\n    r_step: float = 0.1,\n    r_max: Optional[float] = None,\n    legend: bool = True,\n    cmap1: str = \"viridis\",\n    cmap2: str = \"Greys\",\n    mag_bins: Optional[List[float]] = None,\n    max_bin: Optional[float] = None,  # TODO rename to vmax?\n    n_dir_labels: Optional[int] = None,\n    secondary_dir_step_factor: float = 2.0,\n    figsize: Tuple[float, float] = (8, 8),\n    ax=None,\n    title=None,\n) -> matplotlib.axes.Axes:\n    \"\"\"Plots a (dual) wind (wave or current) roses with calms.\n\n    The size of the calm is determined by the primary (measurement) data.\n\n    Parameters\n    ----------\n    data: array-like\n        array with 2 or 4 columns (magnitude, direction, magnitude2, direction2)\n    labels: tuple of strings. Default= (\"Measurement\", \"Model\")\n        labels for the legend(s)\n    mag_step: float, (optional) Default= None\n        discretization for magnitude (delta_r, in radial direction )\n    n_sectors: int (optional) Default= 16\n        number of directional sectors\n    calm_threshold: float (optional) Default= None (auto calculated)\n        minimum value for data being counted as valid (i.e. below this is calm)\n    calm_text: str (optional) Default: 'Calm'\n        text to display in calm.\n    r_step: float (optional) Default= 0.1\n        radial axis discretization. By default 0.1 i.e. every 10%.\n    r_max: float (optional) Default= None\n        maximum radius (%) of plot, e.g. if 50% wanted then r_max=0.5\n    max_bin:  float (optional) Default= None\n        max value to truncate the data, e.g.,  max_bin=1.0 if hm0=1m is the desired final bin.\n    mag_bins : array of floats (optional) Default = None\n        force bins to array of values, e.g. when specifying non-equidistant bins.\n    legend: boolean. Default= True\n        show legend\n    cmap1 : string. Default= 'viridis'\n        colormap for main axis\n    cmap2 : string. Default= 'Greys'\n        colormap for secondary axis\n    n_dir_labels : int. Default= 4\n        number of labels in the polar plot, choose between 4, 8 or 16, default is to use the same as n_sectors\n    secondary_dir_step_factor : float. Default= 2.0\n        reduce width of secondary axis by this factor\n    figsize: tuple(float,float)\n        figure size\n    ax: Matplotlib axis Default= None\n        Matplotlib axis to plot on defined as polar, it can be done using \"subplot_kw = dict(projection = 'polar')\". Default = None, new axis created.\n    title: str Default= None\n        title of the plot\n\n    Returns\n    -------\n    matplotlib.axes.Axes\n        Matplotlib axis with the plot\n    \"\"\"\n    if hasattr(data, \"to_numpy\"):\n        data = data.to_numpy()\n\n    # check that data is array_like\n    assert hasattr(data, \"__array__\"), \"data must be array_like\"\n\n    data_1 = data[:, 0:2]  # primary magnitude and direction\n    magmax = data_1[:, 0].max()\n\n    ncols = data.shape[1]\n    assert ncols in [2, 4], \"data must have 2 or 4 columns\"\n    dual = ncols == 4\n\n    if dual:\n        data_2 = data[:, 2:4]  # secondary magnitude and direction\n        magmax = max(magmax, data_2[:, 0].max())\n        assert len(labels) == 2, \"labels must have 2 elements\"\n\n    # magnitude bins\n    ui, vmin, vmax = pretty_intervals(\n        magmax,\n        mag_bins,\n        mag_step,\n        calm_threshold,\n        max_bin,\n    )\n\n    dir_step = 360 // n_sectors\n\n    if n_dir_labels is None:\n        if n_sectors in (4, 8, 16):\n            n_dir_labels = n_sectors\n        else:\n            # Directional labels are not identical to the number of sectors, use a sane default\n            n_dir_labels = 16\n\n    dh = _dirhist2d(data_1, ui=ui, dir_step=dir_step)\n    calm = dh.calm\n\n    if dual:\n        assert len(data_1) == len(data_2), \"data_1 and data_2 must have same length\"\n        dh2 = _dirhist2d(data_2, ui=ui, dir_step=dir_step)\n        assert dh.density.shape == dh2.density.shape\n\n    ri, rmax = _calc_radial_ticks(counts=dh.density, step=r_step, stop=r_max)\n\n    # Resize calm\n    # TODO this overwrites the calm value calculated above\n    if calm_size is not None:\n        calm = calm_size\n\n    cmap = _get_cmap(cmap1)\n\n    if ax is None:\n        _, ax = plt.subplots(figsize=figsize, subplot_kw=dict(projection=\"polar\"))\n\n    ax.set_title(title)\n    ax.set_theta_zero_location(\"N\")\n    ax.set_theta_direction(-1)\n\n    dir_labels = directional_labels(n_dir_labels)\n    grid = np.linspace(0, 360, n_dir_labels + 1)[:-1]\n    ax.set_thetagrids(grid, dir_labels)\n\n    # ax.tick_params(pad=-24)\n\n    ax.set_ylim(0, calm + rmax)\n    ax.set_yticks(ri + calm)\n    tick_labels = [f\"{tick * 100 :.0f}%\" for tick in ri]\n    ax.set_yticklabels(tick_labels)\n    ax.set_rlabel_position(5)\n\n    if vmin > 0:\n        _add_calms_to_ax(ax, threshold=calm, text=calm_text)\n\n    # primary histogram (model)\n    p = _create_patch(\n        thetac=dh.dir_centers,\n        dir_step=dir_step,\n        calm=calm,\n        ui=ui,\n        counts=dh.density,\n        cmap=cmap,\n        vmax=vmax,\n    )\n    ax.add_collection(p)\n\n    if legend:\n        _add_legend_to_ax(\n            ax,\n            cmap=cmap,\n            vmax=vmax,\n            ui=ui,\n            calm=calm,\n            counts=dh.density,\n            label=labels[0],\n            primary=True,\n            dual=dual,\n        )\n\n    if dual:\n        # add second histogram (observation)\n        cmap = _get_cmap(cmap2)\n\n        # TODO should this be calm2?\n        p = _create_patch(\n            thetac=dh.dir_centers,\n            dir_step=dir_step,\n            calm=calm,\n            ui=ui,\n            counts=dh2.density,\n            cmap=cmap,\n            vmax=vmax,\n            dir_step_factor=secondary_dir_step_factor,\n        )\n        ax.add_collection(p)\n\n        if legend:\n            _add_legend_to_ax(\n                ax,\n                cmap=cmap,\n                vmax=vmax,\n                ui=ui,\n                calm=dh2.calm,\n                counts=dh2.density,\n                label=labels[1],\n                primary=False,\n                dual=dual,\n            )\n\n    return ax\n
"},{"location":"api/quantity/","title":"Quantity","text":""},{"location":"api/quantity/#modelskill.quantity.Quantity","title":"modelskill.quantity.Quantity dataclass","text":"

Quantity of data

Parameters:

Name Type Description Default name str

Name of the quantity

required unit str

Unit of the quantity

required is_directional bool

Whether the quantity is directional (e.g. Wind Direction), by default False

False

Examples:

>>> wl = Quantity(name=\"Water Level\", unit=\"meter\")\n>>> wl\nQuantity(name='Water Level', unit='meter')\n>>> wl.name\n'Water Level'\n>>> wl.unit\n'meter'\n>>> wl.is_compatible(wl)\nTrue\n>>> ws = Quantity(name=\"Wind Direction\", unit=\"degree\", is_directional=True)\n>>> ws\nQuantity(name='Wind Direction', unit='degree', is_directional=True)\n
Source code in modelskill/quantity.py
@dataclass(frozen=True)\nclass Quantity:\n    \"\"\"Quantity of data\n\n    Parameters\n    ----------\n    name : str\n        Name of the quantity\n    unit : str\n        Unit of the quantity\n    is_directional : bool, optional\n        Whether the quantity is directional (e.g. Wind Direction), by default False\n\n    Examples\n    --------\n    >>> wl = Quantity(name=\"Water Level\", unit=\"meter\")\n    >>> wl\n    Quantity(name='Water Level', unit='meter')\n    >>> wl.name\n    'Water Level'\n    >>> wl.unit\n    'meter'\n    >>> wl.is_compatible(wl)\n    True\n    >>> ws = Quantity(name=\"Wind Direction\", unit=\"degree\", is_directional=True)\n    >>> ws\n    Quantity(name='Wind Direction', unit='degree', is_directional=True)\n    \"\"\"\n\n    name: str\n    unit: str\n    is_directional: bool = False\n\n    def __str__(self):\n        return f\"{self.name} [{self.unit}]\"\n\n    def __repr__(self):\n        if self.is_directional:\n            return (\n                f\"Quantity(name='{self.name}', unit='{self.unit}', is_directional=True)\"\n            )\n        else:\n            # hide is_directional if False to avoid clutter\n            return f\"Quantity(name='{self.name}', unit='{self.unit}')\"\n\n    def is_compatible(self, other) -> bool:\n        \"\"\"Check if the quantity is compatible with another quantity\n\n        Examples\n        --------\n        >>> wl = Quantity(name=\"Water Level\", unit=\"meter\")\n        >>> ws = Quantity(name=\"Wind Speed\", unit=\"meter per second\")\n        >>> wl.is_compatible(ws)\n        False\n        >>> uq = Quantity(name=\"Undefined\", unit=\"Undefined\")\n        >>> wl.is_compatible(uq)\n        True\n        \"\"\"\n\n        if self == other:\n            return True\n\n        if (self.name == \"Undefined\") or (other.name == \"Undefined\"):\n            return True\n\n        return False\n\n    @staticmethod\n    def undefined() -> \"Quantity\":\n        return Quantity(name=\"\", unit=\"\")\n\n    def to_dict(self) -> Dict[str, str]:\n        return {\"name\": self.name, \"unit\": self.unit}\n\n    @staticmethod\n    def from_cf_attrs(attrs: Mapping[str, str]) -> \"Quantity\":\n        \"\"\"Create a Quantity from a CF compliant attributes dictionary\n\n        If units is \"degree\", \"degrees\" or \"Degree true\", the quantity is assumed\n        to be directional. Based on https://codes.ecmwf.int/grib/param-db/ and\n        https://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html\n\n        Parameters\n        ----------\n        attrs : Mapping[str, str]\n            Attributes dictionary\n\n        Examples\n        --------\n        >>> Quantity.from_cf_attrs({'long_name': 'Water Level', 'units': 'meter'})\n        Quantity(name='Water Level', unit='meter')\n        >>> Quantity.from_cf_attrs({'long_name': 'Wind direction', 'units': 'degree'})\n        Quantity(name='Wind direction', unit='degree', is_directional=True)\n\n        \"\"\"\n        quantity = Quantity.undefined()\n        if long_name := attrs.get(\"long_name\"):\n            if units := attrs.get(\"units\"):\n                is_directional = units in [\"degree\", \"degrees\", \"Degree true\"]\n                quantity = Quantity(\n                    name=long_name,\n                    unit=units,\n                    is_directional=is_directional,\n                )\n        return quantity\n\n    @staticmethod\n    def from_mikeio_iteminfo(iteminfo: mikeio.ItemInfo) -> \"Quantity\":\n        \"\"\"Create a Quantity from mikeio ItemInfo\n\n        If the unit is \"degree\", the quantity is assumed to be directional.\n        \"\"\"\n\n        unit = iteminfo.unit.short_name\n        is_directional = unit == \"degree\"\n        return Quantity(\n            name=repr(iteminfo.type), unit=unit, is_directional=is_directional\n        )\n\n    @staticmethod\n    def from_mikeio_eum_name(type_name: str) -> \"Quantity\":\n        \"\"\"Create a Quantity from a name recognized by mikeio\n\n        Parameters\n        ----------\n        type_name : str\n            Name of the quantity\n\n        Examples\n        --------\n        >>> Quantity.from_mikeio_eum_name(\"Water Level\")\n        Quantity(name='Water Level', unit='meter')\n        \"\"\"\n        try:\n            etype = mikeio.EUMType[type_name]\n        except KeyError:\n            name_underscore = type_name.replace(\" \", \"_\")\n            try:\n                etype = mikeio.EUMType[name_underscore]\n            except KeyError:\n                raise ValueError(\n                    f\"{type_name=} is not recognized as a known type. Please create a Quantity(name='{type_name}' unit='<FILL IN UNIT>')\"\n                )\n        unit = etype.units[0].name\n        is_directional = unit == \"degree\"\n        warnings.warn(f\"{unit=} was automatically set for {type_name=}\")\n        return Quantity(name=type_name, unit=unit, is_directional=is_directional)\n
"},{"location":"api/quantity/#modelskill.quantity.Quantity.from_cf_attrs","title":"from_cf_attrs staticmethod","text":"
from_cf_attrs(attrs)\n

Create a Quantity from a CF compliant attributes dictionary

If units is \"degree\", \"degrees\" or \"Degree true\", the quantity is assumed to be directional. Based on https://codes.ecmwf.int/grib/param-db/ and https://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html

Parameters:

Name Type Description Default attrs Mapping[str, str]

Attributes dictionary

required

Examples:

>>> Quantity.from_cf_attrs({'long_name': 'Water Level', 'units': 'meter'})\nQuantity(name='Water Level', unit='meter')\n>>> Quantity.from_cf_attrs({'long_name': 'Wind direction', 'units': 'degree'})\nQuantity(name='Wind direction', unit='degree', is_directional=True)\n
Source code in modelskill/quantity.py
@staticmethod\ndef from_cf_attrs(attrs: Mapping[str, str]) -> \"Quantity\":\n    \"\"\"Create a Quantity from a CF compliant attributes dictionary\n\n    If units is \"degree\", \"degrees\" or \"Degree true\", the quantity is assumed\n    to be directional. Based on https://codes.ecmwf.int/grib/param-db/ and\n    https://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html\n\n    Parameters\n    ----------\n    attrs : Mapping[str, str]\n        Attributes dictionary\n\n    Examples\n    --------\n    >>> Quantity.from_cf_attrs({'long_name': 'Water Level', 'units': 'meter'})\n    Quantity(name='Water Level', unit='meter')\n    >>> Quantity.from_cf_attrs({'long_name': 'Wind direction', 'units': 'degree'})\n    Quantity(name='Wind direction', unit='degree', is_directional=True)\n\n    \"\"\"\n    quantity = Quantity.undefined()\n    if long_name := attrs.get(\"long_name\"):\n        if units := attrs.get(\"units\"):\n            is_directional = units in [\"degree\", \"degrees\", \"Degree true\"]\n            quantity = Quantity(\n                name=long_name,\n                unit=units,\n                is_directional=is_directional,\n            )\n    return quantity\n
"},{"location":"api/quantity/#modelskill.quantity.Quantity.from_mikeio_eum_name","title":"from_mikeio_eum_name staticmethod","text":"
from_mikeio_eum_name(type_name)\n

Create a Quantity from a name recognized by mikeio

Parameters:

Name Type Description Default type_name str

Name of the quantity

required

Examples:

>>> Quantity.from_mikeio_eum_name(\"Water Level\")\nQuantity(name='Water Level', unit='meter')\n
Source code in modelskill/quantity.py
@staticmethod\ndef from_mikeio_eum_name(type_name: str) -> \"Quantity\":\n    \"\"\"Create a Quantity from a name recognized by mikeio\n\n    Parameters\n    ----------\n    type_name : str\n        Name of the quantity\n\n    Examples\n    --------\n    >>> Quantity.from_mikeio_eum_name(\"Water Level\")\n    Quantity(name='Water Level', unit='meter')\n    \"\"\"\n    try:\n        etype = mikeio.EUMType[type_name]\n    except KeyError:\n        name_underscore = type_name.replace(\" \", \"_\")\n        try:\n            etype = mikeio.EUMType[name_underscore]\n        except KeyError:\n            raise ValueError(\n                f\"{type_name=} is not recognized as a known type. Please create a Quantity(name='{type_name}' unit='<FILL IN UNIT>')\"\n            )\n    unit = etype.units[0].name\n    is_directional = unit == \"degree\"\n    warnings.warn(f\"{unit=} was automatically set for {type_name=}\")\n    return Quantity(name=type_name, unit=unit, is_directional=is_directional)\n
"},{"location":"api/quantity/#modelskill.quantity.Quantity.from_mikeio_iteminfo","title":"from_mikeio_iteminfo staticmethod","text":"
from_mikeio_iteminfo(iteminfo)\n

Create a Quantity from mikeio ItemInfo

If the unit is \"degree\", the quantity is assumed to be directional.

Source code in modelskill/quantity.py
@staticmethod\ndef from_mikeio_iteminfo(iteminfo: mikeio.ItemInfo) -> \"Quantity\":\n    \"\"\"Create a Quantity from mikeio ItemInfo\n\n    If the unit is \"degree\", the quantity is assumed to be directional.\n    \"\"\"\n\n    unit = iteminfo.unit.short_name\n    is_directional = unit == \"degree\"\n    return Quantity(\n        name=repr(iteminfo.type), unit=unit, is_directional=is_directional\n    )\n
"},{"location":"api/quantity/#modelskill.quantity.Quantity.is_compatible","title":"is_compatible","text":"
is_compatible(other)\n

Check if the quantity is compatible with another quantity

Examples:

>>> wl = Quantity(name=\"Water Level\", unit=\"meter\")\n>>> ws = Quantity(name=\"Wind Speed\", unit=\"meter per second\")\n>>> wl.is_compatible(ws)\nFalse\n>>> uq = Quantity(name=\"Undefined\", unit=\"Undefined\")\n>>> wl.is_compatible(uq)\nTrue\n
Source code in modelskill/quantity.py
def is_compatible(self, other) -> bool:\n    \"\"\"Check if the quantity is compatible with another quantity\n\n    Examples\n    --------\n    >>> wl = Quantity(name=\"Water Level\", unit=\"meter\")\n    >>> ws = Quantity(name=\"Wind Speed\", unit=\"meter per second\")\n    >>> wl.is_compatible(ws)\n    False\n    >>> uq = Quantity(name=\"Undefined\", unit=\"Undefined\")\n    >>> wl.is_compatible(uq)\n    True\n    \"\"\"\n\n    if self == other:\n        return True\n\n    if (self.name == \"Undefined\") or (other.name == \"Undefined\"):\n        return True\n\n    return False\n
"},{"location":"api/settings/","title":"Settings","text":""},{"location":"api/settings/#modelskill.settings","title":"modelskill.settings","text":"

The settings module holds package-wide configurables and provides a uniform API for working with them.

This module is inspired by pandas config module.

Overview

This module supports the following requirements:

  • options are referenced using keys in dot.notation, e.g. \"x.y.option - z\".
  • keys are case-insensitive.
  • functions should accept partial/regex keys, when unambiguous.
  • options can be registered by modules at import time.
  • options have a default value, and (optionally) a description and validation function associated with them.
  • options can be reset to their default value.
  • all option can be reset to their default value at once.
  • all options in a certain sub - namespace can be reset at once.
  • the user can set / get / reset or ask for the description of an option.
  • a developer can register an option.
Implementation
  • Data is stored using nested dictionaries, and should be accessed through the provided API.
  • \"Registered options\" have metadata associated with them, which are stored in auxiliary dictionaries keyed on the fully-qualified key, e.g. \"x.y.z.option\".

Examples:

>>> import modelskill as ms\n>>> ms.options\nmetrics.list : [<function bias at 0x0000029D614A2DD0>, (...)]\nplot.rcparams : {}\nplot.scatter.legend.bbox : {'facecolor': 'white', (...)}\nplot.scatter.legend.fontsize : 12\nplot.scatter.legend.kwargs : {}\nplot.scatter.oneone_line.color : blue\nplot.scatter.oneone_line.label : 1:1\nplot.scatter.points.alpha : 0.5\nplot.scatter.points.label :\nplot.scatter.points.size : 20\nplot.scatter.quantiles.color : darkturquoise\nplot.scatter.quantiles.kwargs : {}\nplot.scatter.quantiles.label : Q-Q\nplot.scatter.quantiles.marker : X\nplot.scatter.quantiles.markeredgecolor : (0, 0, 0, 0.4)\nplot.scatter.quantiles.markeredgewidth : 0.5\nplot.scatter.quantiles.markersize : 3.5\nplot.scatter.reg_line.kwargs : {'color': 'r'}\n>>> ms.set_option(\"plot.scatter.points.size\", 4)\n>>> plot.scatter.points.size\n4\n>>> ms.get_option(\"plot.scatter.points.size\")\n4\n>>> ms.options.plot.scatter.points.size = 10\n>>> ms.options.plot.scatter.points.size\n10\n>>> ms.reset_option(\"plot.scatter.points.size\")\n>>> ms.options.plot.scatter.points.size\n20\n
"},{"location":"api/settings/#modelskill.settings.OptionsContainer","title":"OptionsContainer","text":"

provide attribute-style access to a nested dict of options

Accessed by ms.options

Source code in modelskill/settings.py
class OptionsContainer:\n    \"\"\"provide attribute-style access to a nested dict of options\n\n    Accessed by ms.options\n    \"\"\"\n\n    def __init__(self, d: Dict[str, Any], prefix: str = \"\") -> None:\n        object.__setattr__(self, \"d\", d)\n        object.__setattr__(self, \"prefix\", prefix)\n\n    def __setattr__(self, key: str, val: Any) -> None:\n        prefix = object.__getattribute__(self, \"prefix\")\n        if prefix:\n            prefix += \".\"\n        prefix += key\n        # you can't set new keys\n        # can you can't overwrite subtrees\n        if key in self.d and not isinstance(self.d[key], dict):\n            set_option(prefix, val)\n        else:\n            raise OptionError(\"You can only set the value of existing options\")\n\n    def __getattr__(self, key: str):\n        prefix = object.__getattribute__(self, \"prefix\")\n        if prefix:\n            prefix += \".\"\n        prefix += key\n        try:\n            v = object.__getattribute__(self, \"d\")[key]\n        except KeyError as err:\n            raise OptionError(f\"No such option: {key}\") from err\n        if isinstance(v, dict):\n            return OptionsContainer(v, prefix)\n        else:\n            return get_option(prefix)\n\n    def to_dict(self) -> Dict:\n        \"\"\"Return options as dictionary with full-name keys\"\"\"\n        return _option_to_dict(self.prefix)\n\n    # def search(self, pat: str = \"\") -> List[str]:\n    #     keys = _select_options(f\"{self.prefix}*{pat}\")\n    #     return list(keys)\n\n    def __repr__(self) -> str:\n        return _describe_option_short(self.prefix, False) or \"\"\n\n    def __dir__(self) -> Iterable[str]:\n        return list(self.d.keys())\n
"},{"location":"api/settings/#modelskill.settings.OptionsContainer.to_dict","title":"to_dict","text":"
to_dict()\n

Return options as dictionary with full-name keys

Source code in modelskill/settings.py
def to_dict(self) -> Dict:\n    \"\"\"Return options as dictionary with full-name keys\"\"\"\n    return _option_to_dict(self.prefix)\n
"},{"location":"api/settings/#modelskill.settings.get_option","title":"get_option","text":"
get_option(pat)\n

Get value of a single option matching a pattern

Parameters:

Name Type Description Default pat str

pattern of seeked option

required

Returns:

Type Description Any

value of matched option

Source code in modelskill/settings.py
def get_option(pat: str) -> Any:\n    \"\"\"Get value of a single option matching a pattern\n\n    Parameters\n    ----------\n    pat : str\n        pattern of seeked option\n\n    Returns\n    -------\n    Any\n        value of matched option\n    \"\"\"\n    key = _get_single_key(pat)\n\n    # walk the nested dict\n    root, k = _get_root(key)\n    return root[k]\n
"},{"location":"api/settings/#modelskill.settings.load_style","title":"load_style","text":"
load_style(name)\n

Load a number of options from a named style.

Parameters:

Name Type Description Default name str

Name of the predefined style to load. Available styles are: 'MOOD': Resembling the plots of the www.metocean-on-demand.com data portal.

required

Raises:

Type Description KeyError

If a named style is not found.

Examples:

>>> import modelskill as ms\n>>> ms.load_style('MOOD')\n
Source code in modelskill/settings.py
def load_style(name: str) -> None:\n    \"\"\"Load a number of options from a named style.\n\n    Parameters\n    ----------\n    name : str\n        Name of the predefined style to load. Available styles are:\n        'MOOD': Resembling the plots of the www.metocean-on-demand.com data portal.\n\n    Raises\n    ------\n    KeyError\n        If a named style is not found.\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> ms.load_style('MOOD')\n    \"\"\"\n\n    lname = name.lower()\n\n    # The number of folders to search can be expanded in the future\n    path = Path(__file__).parent / \"styles\"\n    NAMED_STYLES = {x.stem: x for x in path.glob(\"*.yml\")}\n\n    if lname not in NAMED_STYLES:\n        raise KeyError(\n            f\"Style '{name}' not found. Choose from {list(NAMED_STYLES.keys())}\"\n        )\n\n    style_path = NAMED_STYLES[lname]\n\n    with open(style_path, encoding=\"utf-8\") as f:\n        contents = f.read()\n        d = yaml.load(contents, Loader=yaml.FullLoader)\n\n    set_option(d)\n
"},{"location":"api/settings/#modelskill.settings.register_option","title":"register_option","text":"
register_option(key, defval, doc='', validator=None)\n

Register an option in the package-wide modelskill settingss object

Parameters:

Name Type Description Default key str

Fully-qualified key, e.g. \"x.y.option - z\".

required defval object

Default value of the option.

required doc str

Description of the option.

'' validator Callable

Function of a single argument, should raise ValueError if called with a value which is not a legal value for the option.

None

Raises:

Type Description ValueError if `validator` is specified and `defval` is not a valid value. Source code in modelskill/settings.py
def register_option(\n    key: str,\n    defval: object,\n    doc: str = \"\",\n    validator: Optional[Callable[[Any], Any]] = None,\n    # cb: Optional[Callable[[str], Any]] = None,\n) -> None:\n    \"\"\"\n    Register an option in the package-wide modelskill settingss object\n\n    Parameters\n    ----------\n    key : str\n        Fully-qualified key, e.g. \"x.y.option - z\".\n    defval : object\n        Default value of the option.\n    doc : str\n        Description of the option.\n    validator : Callable, optional\n        Function of a single argument, should raise `ValueError` if\n        called with a value which is not a legal value for the option.\n\n    Raises\n    ------\n    ValueError if `validator` is specified and `defval` is not a valid value.\n    \"\"\"\n    import keyword\n    import tokenize\n\n    key = key.lower()\n\n    if key in _registered_options:\n        raise OptionError(f\"Option '{key}' has already been registered\")\n    # if key in _reserved_keys:\n    #     raise OptionError(f\"Option '{key}' is a reserved key\")\n\n    # the default value should be legal\n    if validator:\n        validator(defval)\n\n    # walk the nested dict, creating dicts as needed along the path\n    path = key.split(\".\")\n\n    for k in path:\n        if not re.match(\"^\" + tokenize.Name + \"$\", k):\n            raise ValueError(f\"{k} is not a valid identifier\")\n        if keyword.iskeyword(k):\n            raise ValueError(f\"{k} is a python keyword\")\n\n    cursor = _global_settings\n    msg = \"Path prefix to option '{option}' is already an option\"\n\n    for i, p in enumerate(path[:-1]):\n        if not isinstance(cursor, dict):\n            raise OptionError(msg.format(option=\".\".join(path[:i])))\n        if p not in cursor:\n            cursor[p] = {}\n        cursor = cursor[p]\n\n    if not isinstance(cursor, dict):\n        raise OptionError(msg.format(option=\".\".join(path[:-1])))\n\n    cursor[path[-1]] = defval  # initialize\n\n    # save the option metadata\n    _registered_options[key] = RegisteredOption(\n        key=key,\n        defval=defval,\n        doc=doc,\n        validator=validator,  # , cb=cb\n    )\n
"},{"location":"api/settings/#modelskill.settings.reset_option","title":"reset_option","text":"
reset_option(pat='', silent=False)\n

Reset one or more options (matching a pattern) to the default value

Examples:

>>> ms.options.plot.scatter.points.size\n20\n>>> ms.options.plot.scatter.points.size = 10\n>>> ms.options.plot.scatter.points.size\n10\n>>> ms.reset_option(\"plot.scatter.points.size\")\n>>> ms.options.plot.scatter.points.size\n20\n
Source code in modelskill/settings.py
def reset_option(pat: str = \"\", silent: bool = False) -> None:\n    \"\"\"Reset one or more options (matching a pattern) to the default value\n\n    Examples\n    --------\n    >>> ms.options.plot.scatter.points.size\n    20\n    >>> ms.options.plot.scatter.points.size = 10\n    >>> ms.options.plot.scatter.points.size\n    10\n    >>> ms.reset_option(\"plot.scatter.points.size\")\n    >>> ms.options.plot.scatter.points.size\n    20\n\n    \"\"\"\n\n    keys = _select_options(pat)\n\n    if len(keys) == 0:\n        raise OptionError(\"No such keys(s)\")\n\n    if len(keys) > 1 and len(pat) < 4 and pat != \"all\":\n        raise ValueError(\n            \"You must specify at least 4 characters when \"\n            \"resetting multiple keys, use the special keyword \"\n            '\"all\" to reset all the options to their default value'\n        )\n\n    for k in keys:\n        set_option(k, _registered_options[k].defval, silent=silent)\n
"},{"location":"api/settings/#modelskill.settings.set_option","title":"set_option","text":"
set_option(*args, **kwargs)\n

Set the value of one or more options

Examples:

>>> ms.set_option(\"plot.scatter.points.size\", 4)\n>>> ms.set_option({\"plot.scatter.points.size\": 4})\n>>> ms.options.plot.scatter.points.size = 4\n
Source code in modelskill/settings.py
def set_option(*args, **kwargs) -> None:\n    \"\"\"Set the value of one or more options\n\n    Examples\n    --------\n    >>> ms.set_option(\"plot.scatter.points.size\", 4)\n    >>> ms.set_option({\"plot.scatter.points.size\": 4})\n    >>> ms.options.plot.scatter.points.size = 4\n    \"\"\"\n    # must at least 1 arg deal with constraints later\n\n    if len(args) == 1 and isinstance(args[0], dict):\n        kwargs.update(args[0])\n\n    if len(args) % 2 == 0:\n        keys = args[::2]\n        values = args[1::2]\n        kwargs.update(dict(zip(keys, values)))\n\n    if len(args) > 1 and len(args) % 2 != 0:\n        raise ValueError(\"Must provide a value for each key, i.e. even number of args\")\n\n    # default to false\n    kwargs.pop(\"silent\", False)\n\n    for k, v in kwargs.items():\n        key = _get_single_key(k)  # , silent)\n\n        o = _get_registered_option(key)\n        if o and o.validator:\n            o.validator(v)\n\n        # walk the nested dict\n        root, k = _get_root(key)\n        root[k] = v\n
"},{"location":"api/skill/","title":"Skill","text":""},{"location":"api/skill/#modelskill.skill.SkillTable","title":"modelskill.skill.SkillTable","text":"

SkillTable object for visualization and analysis returned by the comparer's skill method. The object wraps the pd.DataFrame class which can be accessed from the attribute data.

The columns are assumed to be metrics and data for a single metric can be accessed by e.g. s.rmse or s[\"rmse\"]. The resulting object can be used for plotting.

Examples:

>>> sk = cc.skill()\n>>> sk.mod_names\n['SW_1', 'SW_2']\n>>> sk.style()\n>>> sk.sel(model='SW_1').style()\n>>> sk.rmse.plot.bar()\n
Source code in modelskill/skill.py
class SkillTable:\n    \"\"\"\n    SkillTable object for visualization and analysis returned by\n    the comparer's `skill` method. The object wraps the pd.DataFrame\n    class which can be accessed from the attribute `data`.\n\n    The columns are assumed to be metrics and data for a single metric\n    can be accessed by e.g. `s.rmse` or `s[\"rmse\"]`. The resulting object\n    can be used for plotting.\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk.mod_names\n    ['SW_1', 'SW_2']\n    >>> sk.style()\n    >>> sk.sel(model='SW_1').style()\n    >>> sk.rmse.plot.bar()\n    \"\"\"\n\n    _large_is_best_metrics = [\n        \"cc\",\n        \"corrcoef\",\n        \"r2\",\n        \"spearmanr\",\n        \"rho\",\n        \"nash_sutcliffe_efficiency\",\n        \"nse\",\n        \"kge\",\n    ]\n    _small_is_best_metrics = [\n        \"mae\",\n        \"mape\",\n        \"mean_absolute_error\",\n        \"mean_absolute_percentage_error\",\n        \"rmse\",\n        \"root_mean_squared_error\",\n        \"urmse\",\n        \"scatter_index\",\n        \"si\",\n        \"mef\",\n        \"model_efficiency_factor\",\n    ]\n    _one_is_best_metrics = [\"lin_slope\"]\n    _zero_is_best_metrics = [\"bias\"]\n\n    def __init__(self, data: pd.DataFrame):\n        self.data: pd.DataFrame = (\n            data if isinstance(data, pd.DataFrame) else data.to_dataframe()\n        )\n        # TODO remove in v1.1\n        self.plot = DeprecatedSkillPlotter(self)  # type: ignore\n\n    # TODO: remove?\n    @property\n    def _df(self) -> pd.DataFrame:\n        \"\"\"Data as DataFrame without x and y columns\"\"\"\n        return self.to_dataframe(drop_xy=True)\n\n    @property\n    def metrics(self) -> Collection[str]:\n        \"\"\"List of metrics (columns) in the SkillTable\"\"\"\n        return list(self._df.columns)\n\n    # TODO: remove?\n    def __len__(self) -> int:\n        return len(self._df)\n\n    def to_dataframe(self, drop_xy: bool = True) -> pd.DataFrame:\n        \"\"\"Convert SkillTable to pd.DataFrame\n\n        Parameters\n        ----------\n        drop_xy : bool, optional\n            Drop the x, y coordinates?, by default True\n\n        Returns\n        -------\n        pd.DataFrame\n            Skill data as pd.DataFrame\n        \"\"\"\n        if drop_xy:\n            return self.data.drop(columns=[\"x\", \"y\"], errors=\"ignore\")\n        else:\n            return self.data.copy()\n\n    def to_geodataframe(self, crs: str = \"EPSG:4326\") -> gpd.GeoDataFrame:\n        \"\"\"Convert SkillTable to geopandas.GeoDataFrame\n\n        Note: requires geopandas to be installed\n\n        Note: requires x and y columns to be present\n\n        Parameters\n        ----------\n        crs : str, optional\n            Coordinate reference system identifier passed to the\n            GeoDataFrame constructor, by default \"EPSG:4326\"\n\n        Returns\n        -------\n        gpd.GeoDataFrame\n            Skill data as GeoDataFrame\n        \"\"\"\n        import geopandas as gpd\n\n        assert \"x\" in self.data.columns\n        assert \"y\" in self.data.columns\n\n        df = self.to_dataframe(drop_xy=False)\n\n        gdf = gpd.GeoDataFrame(\n            df,\n            geometry=gpd.points_from_xy(df.x, df.y),\n            crs=crs,\n        )\n\n        return gdf\n\n    def __repr__(self) -> str:\n        return repr(self._df)\n\n    def _repr_html_(self) -> Any:\n        return self._df._repr_html_()\n\n    @overload\n    def __getitem__(self, key: Hashable | int) -> SkillArray: ...\n\n    @overload\n    def __getitem__(self, key: Iterable[Hashable]) -> SkillTable: ...\n\n    def __getitem__(\n        self, key: Hashable | Iterable[Hashable]\n    ) -> SkillArray | SkillTable:\n        if isinstance(key, int):\n            key = list(self.data.columns)[key]\n        result = self.data[key]\n        if isinstance(result, pd.Series):\n            # I don't think this should be necessary, but in some cases the input doesn't contain x and y\n            if \"x\" in self.data.columns and \"y\" in self.data.columns:\n                cols = [\"x\", \"y\", key]\n                return SkillArray(self.data[cols])\n            else:\n                return SkillArray(result.to_frame())\n        elif isinstance(result, pd.DataFrame):\n            return SkillTable(result)\n        else:\n            raise NotImplementedError(\"Unexpected type of result\")\n\n    def __getattr__(self, item: str, *args, **kwargs) -> Any:\n        # note: no help from type hints here!\n        if item in self.data.columns:\n            return self[item]  # Redirects to __getitem__\n        else:\n            # act as a DataFrame... (necessary for style() to work)\n            # drawback is that methods such as head() etc would appear\n            # as working but return a DataFrame instead of a SkillTable!\n            return getattr(self.data, item, *args, **kwargs)\n            # raise AttributeError(\n            #     f\"\"\"\n            #         SkillTable has no attribute {item}; Maybe you are\n            #         looking for the corresponding DataFrame attribute?\n            #         Try exporting the skill table to a DataFrame using sk.to_dataframe().\n            #     \"\"\"\n            # )\n\n    @property\n    def iloc(self, *args, **kwargs):  # type: ignore\n        return self.data.iloc(*args, **kwargs)\n\n    @property\n    def loc(self, *args, **kwargs):  # type: ignore\n        return self.data.loc(*args, **kwargs)\n\n    def sort_index(self, *args, **kwargs) -> SkillTable:  # type: ignore\n        \"\"\"Sort by index (level) e.g. sorting by observation\n\n        Wrapping pd.DataFrame.sort_index()\n\n        Returns\n        -------\n        SkillTable\n            A new SkillTable with sorted index\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk.sort_index()\n        >>> sk.sort_index(level=\"observation\")\n        \"\"\"\n        return self.__class__(self.data.sort_index(*args, **kwargs))\n\n    def sort_values(self, *args, **kwargs) -> SkillTable:  # type: ignore\n        \"\"\"Sort by values e.g. sorting by rmse values\n\n        Wrapping pd.DataFrame.sort_values()\n\n        Returns\n        -------\n        SkillTable\n            A new SkillTable with sorted values\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk.sort_values(\"rmse\")\n        >>> sk.sort_values(\"rmse\", ascending=False)\n        >>> sk.sort_values([\"n\", \"rmse\"])\n        \"\"\"\n        return self.__class__(self.data.sort_values(*args, **kwargs))\n\n    def swaplevel(self, *args, **kwargs) -> SkillTable:  # type: ignore\n        \"\"\"Swap the levels of the MultiIndex e.g. swapping 'model' and 'observation'\n\n        Wrapping pd.DataFrame.swaplevel()\n\n        Returns\n        -------\n        SkillTable\n            A new SkillTable with swapped levels\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk.swaplevel().sort_index(level=\"observation\")\n        >>> sk.swaplevel(\"model\", \"observation\")\n        >>> sk.swaplevel(0, 1)\n        \"\"\"\n        return self.__class__(self.data.swaplevel(*args, **kwargs))\n\n    @property\n    def mod_names(self) -> list[str]:\n        \"\"\"List of model names (in index)\"\"\"\n        return self._get_index_level_by_name(\"model\")\n\n    @property\n    def obs_names(self) -> list[str]:\n        \"\"\"List of observation names (in index)\"\"\"\n        return self._get_index_level_by_name(\"observation\")\n\n    @property\n    def quantity_names(self) -> list[str]:\n        \"\"\"List of quantity names (in index)\"\"\"\n        return self._get_index_level_by_name(\"quantity\")\n\n    def _get_index_level_by_name(self, name: str) -> list[str]:\n        # Helper function to get unique values of a level in the index (e.g. model)\n        index = self._df.index\n        if name in index.names:\n            level = index.names.index(name)\n            return list(index.get_level_values(level).unique())\n        else:\n            return []\n            # raise ValueError(f\"name {name} not in index {list(self.index.names)}\")\n\n    def query(self, query: str) -> SkillTable:\n        \"\"\"Select a subset of the SkillTable by a query string\n\n        wrapping pd.DataFrame.query()\n\n        Parameters\n        ----------\n        query : str\n            string supported by pd.DataFrame.query()\n\n        Returns\n        -------\n        SkillTable\n            A subset of the original SkillTable\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk_above_0p3 = sk.query(\"rmse>0.3\")\n        \"\"\"\n        return self.__class__(self.data.query(query))\n\n    def sel(\n        self, query: str | None = None, reduce_index: bool = True, **kwargs: Any\n    ) -> SkillTable | SkillArray:\n        \"\"\"Select a subset of the SkillTable by a query,\n           (part of) the index, or specific columns\n\n        Parameters\n        ----------\n        reduce_index : bool, optional\n            Should unnecessary levels of the index be removed after subsetting?\n            Removed levels will stay as columns. By default True\n        **kwargs\n            Concrete keys depend on the index names of the SkillTable\n            (from the \"by\" argument in cc.skill() method)\n            \"model\"=... to select specific models,\n            \"observation\"=... to select specific observations\n\n        Returns\n        -------\n        SkillTable\n            A subset of the original SkillTable\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk_SW1 = sk.sel(model = \"SW_1\")\n        >>> sk2 = sk.sel(observation = [\"EPL\", \"HKNA\"])\n        \"\"\"\n        if query is not None:\n            warnings.warn(\n                \"s.sel(query=...) is deprecated, use s.query(...) instead\",\n                FutureWarning,\n            )\n            return self.query(query)\n\n        for key, value in kwargs.items():\n            if key == \"metrics\" or key == \"columns\":\n                warnings.warn(\n                    f\"s.sel({key}=...) is deprecated, use getitem s[...] instead\",\n                    FutureWarning,\n                )\n                return self[value]  # type: ignore\n\n        df = self.to_dataframe(drop_xy=False)\n\n        for key, value in kwargs.items():\n            if key in df.index.names:\n                df = self._sel_from_index(df, key, value)\n            else:\n                raise KeyError(\n                    f\"Unknown index {key}. Valid index names are {df.index.names}\"\n                )\n\n        if isinstance(df, pd.Series):\n            return SkillArray(df)\n        if reduce_index and isinstance(df.index, pd.MultiIndex):\n            df = self._reduce_index(df)\n        return self.__class__(df)\n\n    def _sel_from_index(\n        self, df: pd.DataFrame, key: str, value: str | int\n    ) -> pd.DataFrame:\n        if (not isinstance(value, str)) and isinstance(value, Iterable):\n            for i, v in enumerate(value):\n                dfi = self._sel_from_index(df, key, v)\n                if i == 0:\n                    dfout = dfi\n                else:\n                    dfout = pd.concat([dfout, dfi])\n            return dfout\n\n        if isinstance(value, int):\n            value = self._idx_to_name(key, value)\n\n        if isinstance(df.index, pd.MultiIndex):\n            df = df.xs(value, level=key, drop_level=False)\n        else:\n            df = df[df.index == value]  # .copy()\n        return df\n\n    def _idx_to_name(self, index_name: str, pos: int) -> str:\n        \"\"\"Assumes that index is valid and idx is int\"\"\"\n        names = self._get_index_level_by_name(index_name)\n        n = len(names)\n        if (pos < 0) or (pos >= n):\n            raise KeyError(f\"Id {pos} is out of bounds for index {index_name} (0, {n})\")\n        return names[pos]\n\n    def _reduce_index(self, df: pd.DataFrame) -> pd.DataFrame:\n        \"\"\"Remove unnecessary levels of MultiIndex\"\"\"\n        df.index = df.index.remove_unused_levels()\n        levels_to_reset = []\n        for j, level in enumerate(df.index.levels):\n            if len(level) == 1:\n                levels_to_reset.append(j)\n        return df.reset_index(level=levels_to_reset)\n\n    def round(self, decimals: int = 3) -> SkillTable:\n        \"\"\"Round all values in SkillTable\n\n        Parameters\n        ----------\n        decimals : int, optional\n            Number of decimal places to round to (default: 3).\n            If decimals is negative, it specifies the number of\n            positions to the left of the decimal point.\n\n        Returns\n        -------\n        SkillTable\n            A new SkillTable with rounded values\n        \"\"\"\n\n        return self.__class__(self.data.round(decimals=decimals))\n\n    def style(\n        self,\n        decimals: int = 3,\n        metrics: Iterable[str] | None = None,\n        cmap: str = \"OrRd\",\n        show_best: bool = True,\n        **kwargs: Any,\n    ) -> pd.io.formats.style.Styler:\n        \"\"\"Style SkillTable with colors using pandas style\n\n        Parameters\n        ----------\n        decimals : int, optional\n            Number of decimal places to round to (default: 3).\n        metrics : str or List[str], optional\n            apply background gradient color to these columns, by default all;\n            if columns is [] then no background gradient will be applied.\n        cmap : str, optional\n            colormap of background gradient, by default \"OrRd\",\n            except \"bias\" column which will always be \"coolwarm\"\n        show_best : bool, optional\n            indicate best of each column by underline, by default True\n\n        Returns\n        -------\n        pd.Styler\n            Returns a pandas Styler object.\n\n        Examples\n        --------\n        >>> sk = cc.skill()\n        >>> sk.style()\n        >>> sk.style(precision=1, metrics=\"rmse\")\n        >>> sk.style(cmap=\"Blues\", show_best=False)\n        \"\"\"\n        # identity metric columns\n        float_cols = list(self._df.select_dtypes(include=\"number\").columns)\n\n        if \"precision\" in kwargs:\n            warnings.warn(\n                FutureWarning(\n                    \"precision is deprecated, it has been renamed to decimals\"\n                )\n            )\n            decimals = kwargs[\"precision\"]\n\n        # selected columns\n        if metrics is None:\n            metrics = float_cols\n        else:\n            if isinstance(metrics, str):\n                if not metrics:\n                    metrics = []\n                else:\n                    metrics = [metrics]\n            for column in metrics:\n                if column not in float_cols:\n                    raise ValueError(\n                        f\"Invalid column name {column} (must be one of {float_cols})\"\n                    )\n\n        sdf = self._df.style.format(precision=decimals)\n\n        # apply background gradient\n        bg_cols = list(set(metrics) & set(float_cols))\n        if \"bias\" in bg_cols:\n            mm = self._df.bias.abs().max()\n            sdf = sdf.background_gradient(\n                subset=[\"bias\"], cmap=\"coolwarm\", vmin=-mm, vmax=mm\n            )\n            bg_cols.remove(\"bias\")\n        if \"lin_slope\" in bg_cols:\n            mm = (self._df.lin_slope - 1).abs().max()\n            sdf = sdf.background_gradient(\n                subset=[\"lin_slope\"], cmap=\"coolwarm\", vmin=(1 - mm), vmax=(1 + mm)\n            )\n            bg_cols.remove(\"lin_slope\")\n        if len(bg_cols) > 0:\n            cols = list(set(self._small_is_best_metrics) & set(bg_cols))\n            sdf = sdf.background_gradient(subset=cols, cmap=cmap)\n\n            cols = list(set(self._large_is_best_metrics) & set(bg_cols))\n            cmap_r = self._reverse_colormap(cmap)  # type: ignore\n            sdf = sdf.background_gradient(subset=cols, cmap=cmap_r)\n\n        if show_best:\n            cols = list(set(self._large_is_best_metrics) & set(float_cols))\n            sdf = sdf.apply(self._style_max, subset=cols)\n            cols = list(set(self._small_is_best_metrics) & set(float_cols))\n            sdf = sdf.apply(self._style_min, subset=cols)\n            cols = list(set(self._one_is_best_metrics) & set(float_cols))\n            sdf = sdf.apply(self._style_one_best, subset=cols)\n            if \"bias\" in float_cols:\n                sdf = sdf.apply(self._style_abs_min, subset=[\"bias\"])\n\n        return sdf\n\n    def _reverse_colormap(self, cmap):  # type: ignore\n        cmap_r = cmap\n        if isinstance(cmap, str):\n            if cmap[-2:] == \"_r\":\n                cmap_r = cmap_r[:-2]\n            else:\n                cmap_r = cmap + \"_r\"\n        else:\n            cmap_r = cmap.reversed()\n        return cmap_r\n\n    def _style_one_best(self, s: pd.Series) -> list[str]:\n        \"\"\"Using underline-etc to highlight the best in a Series.\"\"\"\n        is_best = (s - 1.0).abs() == (s - 1.0).abs().min()\n        cell_style = (\n            \"text-decoration: underline; font-style: italic; font-weight: bold;\"\n        )\n        return [cell_style if v else \"\" for v in is_best]\n\n    def _style_abs_min(self, s: pd.Series) -> list[str]:\n        \"\"\"Using underline-etc to highlight the best in a Series.\"\"\"\n        is_best = s.abs() == s.abs().min()\n        cell_style = (\n            \"text-decoration: underline; font-style: italic; font-weight: bold;\"\n        )\n        return [cell_style if v else \"\" for v in is_best]\n\n    def _style_min(self, s: pd.Series) -> list[str]:\n        \"\"\"Using underline-etc to highlight the best in a Series.\"\"\"\n        cell_style = (\n            \"text-decoration: underline; font-style: italic; font-weight: bold;\"\n        )\n        return [cell_style if v else \"\" for v in (s == s.min())]\n\n    def _style_max(self, s: pd.Series) -> list[str]:\n        \"\"\"Using underline-etc to highlight the best in a Series.\"\"\"\n        cell_style = (\n            \"text-decoration: underline; font-style: italic; font-weight: bold;\"\n        )\n        return [cell_style if v else \"\" for v in (s == s.max())]\n\n    # =============== Deprecated methods ===============\n\n    # TODO: remove plot_* methods in v1.1; warnings are not needed\n    # as the refering method is also deprecated\n    def plot_line(self, **kwargs):  # type: ignore\n        return self.plot.line(**kwargs)  # type: ignore\n\n    def plot_bar(self, **kwargs):  # type: ignore\n        return self.plot.bar(**kwargs)  # type: ignore\n\n    def plot_barh(self, **kwargs):  # type: ignore\n        return self.plot.barh(**kwargs)  # type: ignore\n\n    def plot_grid(self, **kwargs):  # type: ignore\n        return self.plot.grid(**kwargs)  # type: ignore\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.metrics","title":"metrics property","text":"
metrics\n

List of metrics (columns) in the SkillTable

"},{"location":"api/skill/#modelskill.skill.SkillTable.mod_names","title":"mod_names property","text":"
mod_names\n

List of model names (in index)

"},{"location":"api/skill/#modelskill.skill.SkillTable.obs_names","title":"obs_names property","text":"
obs_names\n

List of observation names (in index)

"},{"location":"api/skill/#modelskill.skill.SkillTable.quantity_names","title":"quantity_names property","text":"
quantity_names\n

List of quantity names (in index)

"},{"location":"api/skill/#modelskill.skill.SkillTable.query","title":"query","text":"
query(query)\n

Select a subset of the SkillTable by a query string

wrapping pd.DataFrame.query()

Parameters:

Name Type Description Default query str

string supported by pd.DataFrame.query()

required

Returns:

Type Description SkillTable

A subset of the original SkillTable

Examples:

>>> sk = cc.skill()\n>>> sk_above_0p3 = sk.query(\"rmse>0.3\")\n
Source code in modelskill/skill.py
def query(self, query: str) -> SkillTable:\n    \"\"\"Select a subset of the SkillTable by a query string\n\n    wrapping pd.DataFrame.query()\n\n    Parameters\n    ----------\n    query : str\n        string supported by pd.DataFrame.query()\n\n    Returns\n    -------\n    SkillTable\n        A subset of the original SkillTable\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk_above_0p3 = sk.query(\"rmse>0.3\")\n    \"\"\"\n    return self.__class__(self.data.query(query))\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.round","title":"round","text":"
round(decimals=3)\n

Round all values in SkillTable

Parameters:

Name Type Description Default decimals int

Number of decimal places to round to (default: 3). If decimals is negative, it specifies the number of positions to the left of the decimal point.

3

Returns:

Type Description SkillTable

A new SkillTable with rounded values

Source code in modelskill/skill.py
def round(self, decimals: int = 3) -> SkillTable:\n    \"\"\"Round all values in SkillTable\n\n    Parameters\n    ----------\n    decimals : int, optional\n        Number of decimal places to round to (default: 3).\n        If decimals is negative, it specifies the number of\n        positions to the left of the decimal point.\n\n    Returns\n    -------\n    SkillTable\n        A new SkillTable with rounded values\n    \"\"\"\n\n    return self.__class__(self.data.round(decimals=decimals))\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.sel","title":"sel","text":"
sel(query=None, reduce_index=True, **kwargs)\n

Select a subset of the SkillTable by a query, (part of) the index, or specific columns

Parameters:

Name Type Description Default reduce_index bool

Should unnecessary levels of the index be removed after subsetting? Removed levels will stay as columns. By default True

True **kwargs Any

Concrete keys depend on the index names of the SkillTable (from the \"by\" argument in cc.skill() method) \"model\"=... to select specific models, \"observation\"=... to select specific observations

{}

Returns:

Type Description SkillTable

A subset of the original SkillTable

Examples:

>>> sk = cc.skill()\n>>> sk_SW1 = sk.sel(model = \"SW_1\")\n>>> sk2 = sk.sel(observation = [\"EPL\", \"HKNA\"])\n
Source code in modelskill/skill.py
def sel(\n    self, query: str | None = None, reduce_index: bool = True, **kwargs: Any\n) -> SkillTable | SkillArray:\n    \"\"\"Select a subset of the SkillTable by a query,\n       (part of) the index, or specific columns\n\n    Parameters\n    ----------\n    reduce_index : bool, optional\n        Should unnecessary levels of the index be removed after subsetting?\n        Removed levels will stay as columns. By default True\n    **kwargs\n        Concrete keys depend on the index names of the SkillTable\n        (from the \"by\" argument in cc.skill() method)\n        \"model\"=... to select specific models,\n        \"observation\"=... to select specific observations\n\n    Returns\n    -------\n    SkillTable\n        A subset of the original SkillTable\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk_SW1 = sk.sel(model = \"SW_1\")\n    >>> sk2 = sk.sel(observation = [\"EPL\", \"HKNA\"])\n    \"\"\"\n    if query is not None:\n        warnings.warn(\n            \"s.sel(query=...) is deprecated, use s.query(...) instead\",\n            FutureWarning,\n        )\n        return self.query(query)\n\n    for key, value in kwargs.items():\n        if key == \"metrics\" or key == \"columns\":\n            warnings.warn(\n                f\"s.sel({key}=...) is deprecated, use getitem s[...] instead\",\n                FutureWarning,\n            )\n            return self[value]  # type: ignore\n\n    df = self.to_dataframe(drop_xy=False)\n\n    for key, value in kwargs.items():\n        if key in df.index.names:\n            df = self._sel_from_index(df, key, value)\n        else:\n            raise KeyError(\n                f\"Unknown index {key}. Valid index names are {df.index.names}\"\n            )\n\n    if isinstance(df, pd.Series):\n        return SkillArray(df)\n    if reduce_index and isinstance(df.index, pd.MultiIndex):\n        df = self._reduce_index(df)\n    return self.__class__(df)\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.sort_index","title":"sort_index","text":"
sort_index(*args, **kwargs)\n

Sort by index (level) e.g. sorting by observation

Wrapping pd.DataFrame.sort_index()

Returns:

Type Description SkillTable

A new SkillTable with sorted index

Examples:

>>> sk = cc.skill()\n>>> sk.sort_index()\n>>> sk.sort_index(level=\"observation\")\n
Source code in modelskill/skill.py
def sort_index(self, *args, **kwargs) -> SkillTable:  # type: ignore\n    \"\"\"Sort by index (level) e.g. sorting by observation\n\n    Wrapping pd.DataFrame.sort_index()\n\n    Returns\n    -------\n    SkillTable\n        A new SkillTable with sorted index\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk.sort_index()\n    >>> sk.sort_index(level=\"observation\")\n    \"\"\"\n    return self.__class__(self.data.sort_index(*args, **kwargs))\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.sort_values","title":"sort_values","text":"
sort_values(*args, **kwargs)\n

Sort by values e.g. sorting by rmse values

Wrapping pd.DataFrame.sort_values()

Returns:

Type Description SkillTable

A new SkillTable with sorted values

Examples:

>>> sk = cc.skill()\n>>> sk.sort_values(\"rmse\")\n>>> sk.sort_values(\"rmse\", ascending=False)\n>>> sk.sort_values([\"n\", \"rmse\"])\n
Source code in modelskill/skill.py
def sort_values(self, *args, **kwargs) -> SkillTable:  # type: ignore\n    \"\"\"Sort by values e.g. sorting by rmse values\n\n    Wrapping pd.DataFrame.sort_values()\n\n    Returns\n    -------\n    SkillTable\n        A new SkillTable with sorted values\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk.sort_values(\"rmse\")\n    >>> sk.sort_values(\"rmse\", ascending=False)\n    >>> sk.sort_values([\"n\", \"rmse\"])\n    \"\"\"\n    return self.__class__(self.data.sort_values(*args, **kwargs))\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.style","title":"style","text":"
style(decimals=3, metrics=None, cmap='OrRd', show_best=True, **kwargs)\n

Style SkillTable with colors using pandas style

Parameters:

Name Type Description Default decimals int

Number of decimal places to round to (default: 3).

3 metrics str or List[str]

apply background gradient color to these columns, by default all; if columns is [] then no background gradient will be applied.

None cmap str

colormap of background gradient, by default \"OrRd\", except \"bias\" column which will always be \"coolwarm\"

'OrRd' show_best bool

indicate best of each column by underline, by default True

True

Returns:

Type Description Styler

Returns a pandas Styler object.

Examples:

>>> sk = cc.skill()\n>>> sk.style()\n>>> sk.style(precision=1, metrics=\"rmse\")\n>>> sk.style(cmap=\"Blues\", show_best=False)\n
Source code in modelskill/skill.py
def style(\n    self,\n    decimals: int = 3,\n    metrics: Iterable[str] | None = None,\n    cmap: str = \"OrRd\",\n    show_best: bool = True,\n    **kwargs: Any,\n) -> pd.io.formats.style.Styler:\n    \"\"\"Style SkillTable with colors using pandas style\n\n    Parameters\n    ----------\n    decimals : int, optional\n        Number of decimal places to round to (default: 3).\n    metrics : str or List[str], optional\n        apply background gradient color to these columns, by default all;\n        if columns is [] then no background gradient will be applied.\n    cmap : str, optional\n        colormap of background gradient, by default \"OrRd\",\n        except \"bias\" column which will always be \"coolwarm\"\n    show_best : bool, optional\n        indicate best of each column by underline, by default True\n\n    Returns\n    -------\n    pd.Styler\n        Returns a pandas Styler object.\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk.style()\n    >>> sk.style(precision=1, metrics=\"rmse\")\n    >>> sk.style(cmap=\"Blues\", show_best=False)\n    \"\"\"\n    # identity metric columns\n    float_cols = list(self._df.select_dtypes(include=\"number\").columns)\n\n    if \"precision\" in kwargs:\n        warnings.warn(\n            FutureWarning(\n                \"precision is deprecated, it has been renamed to decimals\"\n            )\n        )\n        decimals = kwargs[\"precision\"]\n\n    # selected columns\n    if metrics is None:\n        metrics = float_cols\n    else:\n        if isinstance(metrics, str):\n            if not metrics:\n                metrics = []\n            else:\n                metrics = [metrics]\n        for column in metrics:\n            if column not in float_cols:\n                raise ValueError(\n                    f\"Invalid column name {column} (must be one of {float_cols})\"\n                )\n\n    sdf = self._df.style.format(precision=decimals)\n\n    # apply background gradient\n    bg_cols = list(set(metrics) & set(float_cols))\n    if \"bias\" in bg_cols:\n        mm = self._df.bias.abs().max()\n        sdf = sdf.background_gradient(\n            subset=[\"bias\"], cmap=\"coolwarm\", vmin=-mm, vmax=mm\n        )\n        bg_cols.remove(\"bias\")\n    if \"lin_slope\" in bg_cols:\n        mm = (self._df.lin_slope - 1).abs().max()\n        sdf = sdf.background_gradient(\n            subset=[\"lin_slope\"], cmap=\"coolwarm\", vmin=(1 - mm), vmax=(1 + mm)\n        )\n        bg_cols.remove(\"lin_slope\")\n    if len(bg_cols) > 0:\n        cols = list(set(self._small_is_best_metrics) & set(bg_cols))\n        sdf = sdf.background_gradient(subset=cols, cmap=cmap)\n\n        cols = list(set(self._large_is_best_metrics) & set(bg_cols))\n        cmap_r = self._reverse_colormap(cmap)  # type: ignore\n        sdf = sdf.background_gradient(subset=cols, cmap=cmap_r)\n\n    if show_best:\n        cols = list(set(self._large_is_best_metrics) & set(float_cols))\n        sdf = sdf.apply(self._style_max, subset=cols)\n        cols = list(set(self._small_is_best_metrics) & set(float_cols))\n        sdf = sdf.apply(self._style_min, subset=cols)\n        cols = list(set(self._one_is_best_metrics) & set(float_cols))\n        sdf = sdf.apply(self._style_one_best, subset=cols)\n        if \"bias\" in float_cols:\n            sdf = sdf.apply(self._style_abs_min, subset=[\"bias\"])\n\n    return sdf\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.swaplevel","title":"swaplevel","text":"
swaplevel(*args, **kwargs)\n

Swap the levels of the MultiIndex e.g. swapping 'model' and 'observation'

Wrapping pd.DataFrame.swaplevel()

Returns:

Type Description SkillTable

A new SkillTable with swapped levels

Examples:

>>> sk = cc.skill()\n>>> sk.swaplevel().sort_index(level=\"observation\")\n>>> sk.swaplevel(\"model\", \"observation\")\n>>> sk.swaplevel(0, 1)\n
Source code in modelskill/skill.py
def swaplevel(self, *args, **kwargs) -> SkillTable:  # type: ignore\n    \"\"\"Swap the levels of the MultiIndex e.g. swapping 'model' and 'observation'\n\n    Wrapping pd.DataFrame.swaplevel()\n\n    Returns\n    -------\n    SkillTable\n        A new SkillTable with swapped levels\n\n    Examples\n    --------\n    >>> sk = cc.skill()\n    >>> sk.swaplevel().sort_index(level=\"observation\")\n    >>> sk.swaplevel(\"model\", \"observation\")\n    >>> sk.swaplevel(0, 1)\n    \"\"\"\n    return self.__class__(self.data.swaplevel(*args, **kwargs))\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.to_dataframe","title":"to_dataframe","text":"
to_dataframe(drop_xy=True)\n

Convert SkillTable to pd.DataFrame

Parameters:

Name Type Description Default drop_xy bool

Drop the x, y coordinates?, by default True

True

Returns:

Type Description DataFrame

Skill data as pd.DataFrame

Source code in modelskill/skill.py
def to_dataframe(self, drop_xy: bool = True) -> pd.DataFrame:\n    \"\"\"Convert SkillTable to pd.DataFrame\n\n    Parameters\n    ----------\n    drop_xy : bool, optional\n        Drop the x, y coordinates?, by default True\n\n    Returns\n    -------\n    pd.DataFrame\n        Skill data as pd.DataFrame\n    \"\"\"\n    if drop_xy:\n        return self.data.drop(columns=[\"x\", \"y\"], errors=\"ignore\")\n    else:\n        return self.data.copy()\n
"},{"location":"api/skill/#modelskill.skill.SkillTable.to_geodataframe","title":"to_geodataframe","text":"
to_geodataframe(crs='EPSG:4326')\n

Convert SkillTable to geopandas.GeoDataFrame

Note: requires geopandas to be installed

Note: requires x and y columns to be present

Parameters:

Name Type Description Default crs str

Coordinate reference system identifier passed to the GeoDataFrame constructor, by default \"EPSG:4326\"

'EPSG:4326'

Returns:

Type Description GeoDataFrame

Skill data as GeoDataFrame

Source code in modelskill/skill.py
def to_geodataframe(self, crs: str = \"EPSG:4326\") -> gpd.GeoDataFrame:\n    \"\"\"Convert SkillTable to geopandas.GeoDataFrame\n\n    Note: requires geopandas to be installed\n\n    Note: requires x and y columns to be present\n\n    Parameters\n    ----------\n    crs : str, optional\n        Coordinate reference system identifier passed to the\n        GeoDataFrame constructor, by default \"EPSG:4326\"\n\n    Returns\n    -------\n    gpd.GeoDataFrame\n        Skill data as GeoDataFrame\n    \"\"\"\n    import geopandas as gpd\n\n    assert \"x\" in self.data.columns\n    assert \"y\" in self.data.columns\n\n    df = self.to_dataframe(drop_xy=False)\n\n    gdf = gpd.GeoDataFrame(\n        df,\n        geometry=gpd.points_from_xy(df.x, df.y),\n        crs=crs,\n    )\n\n    return gdf\n
"},{"location":"api/skill/#modelskill.skill.SkillArray","title":"modelskill.skill.SkillArray","text":"

SkillArray object for visualization obtained by selecting a single metric from a SkillTable.

Examples:

>>> sk = cc.skill()   # SkillTable\n>>> sk.rmse           # SkillArray\n>>> sk.rmse.plot.line()\n
Source code in modelskill/skill.py
class SkillArray:\n    \"\"\"SkillArray object for visualization obtained by\n    selecting a single metric from a SkillTable.\n\n    Examples\n    --------\n    >>> sk = cc.skill()   # SkillTable\n    >>> sk.rmse           # SkillArray\n    >>> sk.rmse.plot.line()\n    \"\"\"\n\n    def __init__(self, data: pd.DataFrame) -> None:\n        self.data = data\n        self._ser = data.iloc[:, -1]  # last column is the metric\n\n        self.plot = SkillArrayPlotter(self)\n        \"\"\"Plot using the SkillArrayPlotter\n\n        Examples\n        --------\n        >>> sk.rmse.plot.line()\n        >>> sk.rmse.plot.bar()\n        >>> sk.rmse.plot.barh()\n        >>> sk.rmse.plot.grid()\n        \"\"\"\n\n    def to_dataframe(self, drop_xy: bool = True) -> pd.DataFrame:\n        \"\"\"Convert SkillArray to pd.DataFrame\n\n        Parameters\n        ----------\n        drop_xy : bool, optional\n            Drop the x, y coordinates?, by default True\n\n        Returns\n        -------\n        pd.DataFrame\n            Skill data as pd.DataFrame\n        \"\"\"\n        if drop_xy:\n            return self._ser.to_frame()\n        else:\n            return self.data.copy()\n\n    def __repr__(self) -> str:\n        return repr(self.to_dataframe())\n\n    def _repr_html_(self) -> Any:\n        return self.to_dataframe()._repr_html_()\n\n    @property\n    def name(self) -> Any:\n        \"\"\"Name of the metric\"\"\"\n        return self._ser.name\n\n    def to_geodataframe(self, crs: str = \"EPSG:4326\") -> gpd.GeoDataFrame:\n        \"\"\"Convert SkillArray to geopandas.GeoDataFrame\n\n        Note: requires geopandas to be installed\n\n        Note: requires x and y columns to be present\n\n        Parameters\n        ----------\n        crs : str, optional\n            Coordinate reference system identifier passed to the\n            GeoDataFrame constructor, by default \"EPSG:4326\"\n\n        Returns\n        -------\n        gpd.GeoDataFrame\n            Skill data as GeoDataFrame\n        \"\"\"\n        import geopandas as gpd\n\n        assert \"x\" in self.data.columns\n        assert \"y\" in self.data.columns\n\n        gdf = gpd.GeoDataFrame(\n            self._ser,\n            geometry=gpd.points_from_xy(self.data.x, self.data.y),\n            crs=crs,\n        )\n\n        return gdf\n
"},{"location":"api/skill/#modelskill.skill.SkillArray.name","title":"name property","text":"
name\n

Name of the metric

"},{"location":"api/skill/#modelskill.skill.SkillArray.plot","title":"plot instance-attribute","text":"
plot = SkillArrayPlotter(self)\n

Plot using the SkillArrayPlotter

Examples:

>>> sk.rmse.plot.line()\n>>> sk.rmse.plot.bar()\n>>> sk.rmse.plot.barh()\n>>> sk.rmse.plot.grid()\n
"},{"location":"api/skill/#modelskill.skill.SkillArray.to_dataframe","title":"to_dataframe","text":"
to_dataframe(drop_xy=True)\n

Convert SkillArray to pd.DataFrame

Parameters:

Name Type Description Default drop_xy bool

Drop the x, y coordinates?, by default True

True

Returns:

Type Description DataFrame

Skill data as pd.DataFrame

Source code in modelskill/skill.py
def to_dataframe(self, drop_xy: bool = True) -> pd.DataFrame:\n    \"\"\"Convert SkillArray to pd.DataFrame\n\n    Parameters\n    ----------\n    drop_xy : bool, optional\n        Drop the x, y coordinates?, by default True\n\n    Returns\n    -------\n    pd.DataFrame\n        Skill data as pd.DataFrame\n    \"\"\"\n    if drop_xy:\n        return self._ser.to_frame()\n    else:\n        return self.data.copy()\n
"},{"location":"api/skill/#modelskill.skill.SkillArray.to_geodataframe","title":"to_geodataframe","text":"
to_geodataframe(crs='EPSG:4326')\n

Convert SkillArray to geopandas.GeoDataFrame

Note: requires geopandas to be installed

Note: requires x and y columns to be present

Parameters:

Name Type Description Default crs str

Coordinate reference system identifier passed to the GeoDataFrame constructor, by default \"EPSG:4326\"

'EPSG:4326'

Returns:

Type Description GeoDataFrame

Skill data as GeoDataFrame

Source code in modelskill/skill.py
def to_geodataframe(self, crs: str = \"EPSG:4326\") -> gpd.GeoDataFrame:\n    \"\"\"Convert SkillArray to geopandas.GeoDataFrame\n\n    Note: requires geopandas to be installed\n\n    Note: requires x and y columns to be present\n\n    Parameters\n    ----------\n    crs : str, optional\n        Coordinate reference system identifier passed to the\n        GeoDataFrame constructor, by default \"EPSG:4326\"\n\n    Returns\n    -------\n    gpd.GeoDataFrame\n        Skill data as GeoDataFrame\n    \"\"\"\n    import geopandas as gpd\n\n    assert \"x\" in self.data.columns\n    assert \"y\" in self.data.columns\n\n    gdf = gpd.GeoDataFrame(\n        self._ser,\n        geometry=gpd.points_from_xy(self.data.x, self.data.y),\n        crs=crs,\n    )\n\n    return gdf\n
"},{"location":"api/skill/#modelskill.skill.SkillArrayPlotter","title":"modelskill.skill.SkillArrayPlotter","text":"

SkillArrayPlotter object for visualization of a single metric (SkillArray)

plot.line() : line plot plot.bar() : bar chart plot.barh() : horizontal bar chart plot.grid() : colored grid

Source code in modelskill/skill.py
class SkillArrayPlotter:\n    \"\"\"SkillArrayPlotter object for visualization of a single metric (SkillArray)\n\n    plot.line() : line plot\n    plot.bar() : bar chart\n    plot.barh() : horizontal bar chart\n    plot.grid() : colored grid\n    \"\"\"\n\n    def __init__(self, skillarray: \"SkillArray\") -> None:\n        self.skillarray = skillarray\n\n    def _name_to_title_in_kwargs(self, kwargs: Any) -> None:\n        if \"title\" not in kwargs:\n            if self.skillarray.name is not None:\n                kwargs[\"title\"] = self.skillarray.name\n\n    def _get_plot_df(self, level: int | str = 0) -> pd.DataFrame:\n        ser = self.skillarray._ser\n        if isinstance(ser.index, pd.MultiIndex):\n            df = ser.unstack(level=level)\n        else:\n            df = ser.to_frame()\n        return df\n\n    # TODO hide this for now until we are certain about the API\n    # def map(self, **kwargs):\n    #     if \"model\" in self.skillarray.data.index.names:\n    #         n_models = len(self.skillarray.data.reset_index().model.unique())\n    #         if n_models > 1:\n    #             raise ValueError(\n    #                 \"map() is only possible for single model skill. Use .sel(model=...) to select a single model.\"\n    #             )\n\n    #     gdf = self.skillarray.to_geodataframe()\n    #     column = self.skillarray.name\n    #     kwargs = {\"marker_kwds\": {\"radius\": 10}} | kwargs\n\n    #     return gdf.explore(column=column, **kwargs)\n\n    def __call__(self, *args: Any, **kwds: Any) -> Any:\n        raise NotImplementedError(\n            \"It is not possible to call plot directly (has no default)! Use one of the plot methods explicitly e.g. plot.line() or plot.bar()\"\n        )\n\n    def line(\n        self,\n        level: int | str = 0,\n        **kwargs: Any,\n    ) -> Axes:\n        \"\"\"Plot statistic as a lines using pd.DataFrame.plot.line()\n\n        Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations\n\n        Parameters\n        ----------\n        level : int or str, optional\n            level to unstack, by default 0\n        **kwargs\n            key word arguments to be pased to pd.DataFrame.plot.line()\n            e.g. marker, title, figsize, ...\n\n        Examples\n        --------\n        >>> sk = cc.skill()[\"rmse\"]\n        >>> sk.plot.line()\n        >>> sk.plot.line(marker=\"o\", linestyle=':')\n        >>> sk.plot.line(color=['0.2', '0.4', '0.6'])\n        \"\"\"\n        df = self._get_plot_df(level=level)\n        self._name_to_title_in_kwargs(kwargs)\n        axes = df.plot.line(**kwargs)\n\n        xlabels = list(df.index)\n        numeric_index = all(isinstance(item, (int, float)) for item in xlabels)\n\n        if not isinstance(axes, Iterable):\n            axes = [axes]\n        for ax in axes:\n            if not isinstance(df.index, pd.DatetimeIndex):\n                if numeric_index:\n                    xlabel_positions = xlabels\n                else:\n                    xlabel_positions = np.arange(len(xlabels)).tolist()\n                ax.set_xticks(xlabel_positions)\n                ax.set_xticklabels(xlabels, rotation=90)\n        return axes\n\n    def bar(self, level: int | str = 0, **kwargs: Any) -> Axes:\n        \"\"\"Plot statistic as bar chart using pd.DataFrame.plot.bar()\n\n        Parameters\n        ----------\n        level : int or str, optional\n            level to unstack, by default 0\n        **kwargs\n            key word arguments to be pased to pd.DataFrame.plot.bar()\n            e.g. color, title, figsize, ...\n\n        Returns\n        -------\n        AxesSubplot\n\n        Examples\n        --------\n        >>> sk = cc.skill()[\"rmse\"]\n        >>> sk.plot.bar()\n        >>> sk.plot.bar(level=\"observation\")\n        >>> sk.plot.bar(title=\"Root Mean Squared Error\")\n        >>> sk.plot.bar(color=[\"red\",\"blue\"])\n        \"\"\"\n        df = self._get_plot_df(level=level)\n        self._name_to_title_in_kwargs(kwargs)\n        return df.plot.bar(**kwargs)\n\n    def barh(self, level: int | str = 0, **kwargs: Any) -> Axes:\n        \"\"\"Plot statistic as horizontal bar chart using pd.DataFrame.plot.barh()\n\n        Parameters\n        ----------\n        level : int or str, optional\n            level to unstack, by default 0\n        **kwargs\n            key word arguments to be passed to pd.DataFrame.plot.barh()\n            e.g. color, title, figsize, ...\n\n        Returns\n        -------\n        AxesSubplot\n\n        Examples\n        --------\n        >>> sk = cc.skill()[\"rmse\"]\n        >>> sk.plot.barh()\n        >>> sk.plot.barh(level=\"observation\")\n        >>> sk.plot.barh(title=\"Root Mean Squared Error\")\n        \"\"\"\n        df = self._get_plot_df(level)\n        self._name_to_title_in_kwargs(kwargs)\n        return df.plot.barh(**kwargs)\n\n    def grid(\n        self,\n        show_numbers: bool = True,\n        precision: int = 3,\n        fmt: str | None = None,\n        ax: Axes | None = None,\n        figsize: tuple[float, float] | None = None,\n        title: str | None = None,\n        cmap: str | Colormap | None = None,\n    ) -> Axes | None:\n        \"\"\"Plot statistic as a colored grid, optionally with values in the cells.\n\n        Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations\n\n        Parameters\n        ----------\n        show_numbers : bool, optional\n            should values of the static be shown in the cells?, by default True\n            if False, a colorbar will be displayed instead\n        precision : int, optional\n            number of decimals if show_numbers, by default 3\n        fmt : str, optional\n            format string, e.g. \".0%\" to show value as percentage\n        ax : Axes, optional\n            matplotlib axes, by default None\n        figsize : Tuple(float, float), optional\n            figure size, by default None\n        title : str, optional\n            plot title, by default name of statistic\n        cmap : str, optional\n            colormap, by default \"OrRd\" (\"coolwarm\" if bias)\n\n        Returns\n        -------\n        AxesSubplot\n\n        Examples\n        --------\n        >>> sk = cc.skill()[\"rmse\"]\n        >>> sk.plot.grid()\n        >>> sk.plot.grid(show_numbers=False, cmap=\"magma\")\n        >>> sk.plot.grid(precision=1)\n        >>> sk.plot.grid(fmt=\".0%\", title=\"Root Mean Squared Error\")\n        \"\"\"\n\n        s = self.skillarray\n        ser = s._ser\n\n        errors = _validate_multi_index(ser.index)  # type: ignore\n        if len(errors) > 0:\n            warnings.warn(\"plot_grid: \" + \"\\n\".join(errors))\n            # TODO raise error?\n            return None\n            # df = self.df[field]    TODO: at_least_2d...\n        df = ser.unstack()\n\n        vmin = None\n        vmax = None\n        if cmap is None:\n            cmap = \"OrRd\"\n            if s.name == \"bias\":\n                cmap = \"coolwarm\"\n                mm = ser.abs().max()\n                vmin = -mm\n                vmax = mm\n        if title is None:\n            title = s.name\n        xlabels = list(df.keys())\n        nx = len(xlabels)\n        ylabels = list(df.index)\n        ny = len(ylabels)\n\n        if (fmt is not None) and fmt[0] != \"{\":\n            fmt = \"{:\" + fmt + \"}\"\n\n        if figsize is None:\n            figsize = (nx, ny)\n        fig, ax = _get_fig_ax(ax, figsize)\n        assert ax is not None\n        pcm = ax.pcolormesh(df, cmap=cmap, vmin=vmin, vmax=vmax)\n        ax.set_xticks(np.arange(nx) + 0.5)\n        ax.set_xticklabels(xlabels, rotation=90)\n        ax.set_yticks(np.arange(ny) + 0.5)\n        ax.set_yticklabels(ylabels)\n        if show_numbers:\n            mean_val = df.to_numpy().mean()\n            for ii in range(ny):\n                for jj in range(nx):\n                    val = df.iloc[ii, jj].round(precision)\n                    col = \"w\" if val > mean_val else \"k\"\n                    if s.name == \"bias\":\n                        col = \"w\" if np.abs(val) > (0.7 * mm) else \"k\"\n                    if fmt is not None:\n                        val = fmt.format(val)\n                    ax.text(\n                        jj + 0.5,\n                        ii + 0.5,\n                        val,\n                        ha=\"center\",\n                        va=\"center\",\n                        # size=15,\n                        color=col,\n                    )\n        else:\n            fig.colorbar(pcm, ax=ax)\n        ax.set_title(title, fontsize=14)\n        return ax\n
"},{"location":"api/skill/#modelskill.skill.SkillArrayPlotter.bar","title":"bar","text":"
bar(level=0, **kwargs)\n

Plot statistic as bar chart using pd.DataFrame.plot.bar()

Parameters:

Name Type Description Default level int or str

level to unstack, by default 0

0 **kwargs Any

key word arguments to be pased to pd.DataFrame.plot.bar() e.g. color, title, figsize, ...

{}

Returns:

Type Description AxesSubplot

Examples:

>>> sk = cc.skill()[\"rmse\"]\n>>> sk.plot.bar()\n>>> sk.plot.bar(level=\"observation\")\n>>> sk.plot.bar(title=\"Root Mean Squared Error\")\n>>> sk.plot.bar(color=[\"red\",\"blue\"])\n
Source code in modelskill/skill.py
def bar(self, level: int | str = 0, **kwargs: Any) -> Axes:\n    \"\"\"Plot statistic as bar chart using pd.DataFrame.plot.bar()\n\n    Parameters\n    ----------\n    level : int or str, optional\n        level to unstack, by default 0\n    **kwargs\n        key word arguments to be pased to pd.DataFrame.plot.bar()\n        e.g. color, title, figsize, ...\n\n    Returns\n    -------\n    AxesSubplot\n\n    Examples\n    --------\n    >>> sk = cc.skill()[\"rmse\"]\n    >>> sk.plot.bar()\n    >>> sk.plot.bar(level=\"observation\")\n    >>> sk.plot.bar(title=\"Root Mean Squared Error\")\n    >>> sk.plot.bar(color=[\"red\",\"blue\"])\n    \"\"\"\n    df = self._get_plot_df(level=level)\n    self._name_to_title_in_kwargs(kwargs)\n    return df.plot.bar(**kwargs)\n
"},{"location":"api/skill/#modelskill.skill.SkillArrayPlotter.barh","title":"barh","text":"
barh(level=0, **kwargs)\n

Plot statistic as horizontal bar chart using pd.DataFrame.plot.barh()

Parameters:

Name Type Description Default level int or str

level to unstack, by default 0

0 **kwargs Any

key word arguments to be passed to pd.DataFrame.plot.barh() e.g. color, title, figsize, ...

{}

Returns:

Type Description AxesSubplot

Examples:

>>> sk = cc.skill()[\"rmse\"]\n>>> sk.plot.barh()\n>>> sk.plot.barh(level=\"observation\")\n>>> sk.plot.barh(title=\"Root Mean Squared Error\")\n
Source code in modelskill/skill.py
def barh(self, level: int | str = 0, **kwargs: Any) -> Axes:\n    \"\"\"Plot statistic as horizontal bar chart using pd.DataFrame.plot.barh()\n\n    Parameters\n    ----------\n    level : int or str, optional\n        level to unstack, by default 0\n    **kwargs\n        key word arguments to be passed to pd.DataFrame.plot.barh()\n        e.g. color, title, figsize, ...\n\n    Returns\n    -------\n    AxesSubplot\n\n    Examples\n    --------\n    >>> sk = cc.skill()[\"rmse\"]\n    >>> sk.plot.barh()\n    >>> sk.plot.barh(level=\"observation\")\n    >>> sk.plot.barh(title=\"Root Mean Squared Error\")\n    \"\"\"\n    df = self._get_plot_df(level)\n    self._name_to_title_in_kwargs(kwargs)\n    return df.plot.barh(**kwargs)\n
"},{"location":"api/skill/#modelskill.skill.SkillArrayPlotter.grid","title":"grid","text":"
grid(show_numbers=True, precision=3, fmt=None, ax=None, figsize=None, title=None, cmap=None)\n

Plot statistic as a colored grid, optionally with values in the cells.

Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations

Parameters:

Name Type Description Default show_numbers bool

should values of the static be shown in the cells?, by default True if False, a colorbar will be displayed instead

True precision int

number of decimals if show_numbers, by default 3

3 fmt str

format string, e.g. \".0%\" to show value as percentage

None ax Axes

matplotlib axes, by default None

None figsize Tuple(float, float)

figure size, by default None

None title str

plot title, by default name of statistic

None cmap str

colormap, by default \"OrRd\" (\"coolwarm\" if bias)

None

Returns:

Type Description AxesSubplot

Examples:

>>> sk = cc.skill()[\"rmse\"]\n>>> sk.plot.grid()\n>>> sk.plot.grid(show_numbers=False, cmap=\"magma\")\n>>> sk.plot.grid(precision=1)\n>>> sk.plot.grid(fmt=\".0%\", title=\"Root Mean Squared Error\")\n
Source code in modelskill/skill.py
def grid(\n    self,\n    show_numbers: bool = True,\n    precision: int = 3,\n    fmt: str | None = None,\n    ax: Axes | None = None,\n    figsize: tuple[float, float] | None = None,\n    title: str | None = None,\n    cmap: str | Colormap | None = None,\n) -> Axes | None:\n    \"\"\"Plot statistic as a colored grid, optionally with values in the cells.\n\n    Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations\n\n    Parameters\n    ----------\n    show_numbers : bool, optional\n        should values of the static be shown in the cells?, by default True\n        if False, a colorbar will be displayed instead\n    precision : int, optional\n        number of decimals if show_numbers, by default 3\n    fmt : str, optional\n        format string, e.g. \".0%\" to show value as percentage\n    ax : Axes, optional\n        matplotlib axes, by default None\n    figsize : Tuple(float, float), optional\n        figure size, by default None\n    title : str, optional\n        plot title, by default name of statistic\n    cmap : str, optional\n        colormap, by default \"OrRd\" (\"coolwarm\" if bias)\n\n    Returns\n    -------\n    AxesSubplot\n\n    Examples\n    --------\n    >>> sk = cc.skill()[\"rmse\"]\n    >>> sk.plot.grid()\n    >>> sk.plot.grid(show_numbers=False, cmap=\"magma\")\n    >>> sk.plot.grid(precision=1)\n    >>> sk.plot.grid(fmt=\".0%\", title=\"Root Mean Squared Error\")\n    \"\"\"\n\n    s = self.skillarray\n    ser = s._ser\n\n    errors = _validate_multi_index(ser.index)  # type: ignore\n    if len(errors) > 0:\n        warnings.warn(\"plot_grid: \" + \"\\n\".join(errors))\n        # TODO raise error?\n        return None\n        # df = self.df[field]    TODO: at_least_2d...\n    df = ser.unstack()\n\n    vmin = None\n    vmax = None\n    if cmap is None:\n        cmap = \"OrRd\"\n        if s.name == \"bias\":\n            cmap = \"coolwarm\"\n            mm = ser.abs().max()\n            vmin = -mm\n            vmax = mm\n    if title is None:\n        title = s.name\n    xlabels = list(df.keys())\n    nx = len(xlabels)\n    ylabels = list(df.index)\n    ny = len(ylabels)\n\n    if (fmt is not None) and fmt[0] != \"{\":\n        fmt = \"{:\" + fmt + \"}\"\n\n    if figsize is None:\n        figsize = (nx, ny)\n    fig, ax = _get_fig_ax(ax, figsize)\n    assert ax is not None\n    pcm = ax.pcolormesh(df, cmap=cmap, vmin=vmin, vmax=vmax)\n    ax.set_xticks(np.arange(nx) + 0.5)\n    ax.set_xticklabels(xlabels, rotation=90)\n    ax.set_yticks(np.arange(ny) + 0.5)\n    ax.set_yticklabels(ylabels)\n    if show_numbers:\n        mean_val = df.to_numpy().mean()\n        for ii in range(ny):\n            for jj in range(nx):\n                val = df.iloc[ii, jj].round(precision)\n                col = \"w\" if val > mean_val else \"k\"\n                if s.name == \"bias\":\n                    col = \"w\" if np.abs(val) > (0.7 * mm) else \"k\"\n                if fmt is not None:\n                    val = fmt.format(val)\n                ax.text(\n                    jj + 0.5,\n                    ii + 0.5,\n                    val,\n                    ha=\"center\",\n                    va=\"center\",\n                    # size=15,\n                    color=col,\n                )\n    else:\n        fig.colorbar(pcm, ax=ax)\n    ax.set_title(title, fontsize=14)\n    return ax\n
"},{"location":"api/skill/#modelskill.skill.SkillArrayPlotter.line","title":"line","text":"
line(level=0, **kwargs)\n

Plot statistic as a lines using pd.DataFrame.plot.line()

Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations

Parameters:

Name Type Description Default level int or str

level to unstack, by default 0

0 **kwargs Any

key word arguments to be pased to pd.DataFrame.plot.line() e.g. marker, title, figsize, ...

{}

Examples:

>>> sk = cc.skill()[\"rmse\"]\n>>> sk.plot.line()\n>>> sk.plot.line(marker=\"o\", linestyle=':')\n>>> sk.plot.line(color=['0.2', '0.4', '0.6'])\n
Source code in modelskill/skill.py
def line(\n    self,\n    level: int | str = 0,\n    **kwargs: Any,\n) -> Axes:\n    \"\"\"Plot statistic as a lines using pd.DataFrame.plot.line()\n\n    Primarily for MultiIndex skill objects, e.g. multiple models and multiple observations\n\n    Parameters\n    ----------\n    level : int or str, optional\n        level to unstack, by default 0\n    **kwargs\n        key word arguments to be pased to pd.DataFrame.plot.line()\n        e.g. marker, title, figsize, ...\n\n    Examples\n    --------\n    >>> sk = cc.skill()[\"rmse\"]\n    >>> sk.plot.line()\n    >>> sk.plot.line(marker=\"o\", linestyle=':')\n    >>> sk.plot.line(color=['0.2', '0.4', '0.6'])\n    \"\"\"\n    df = self._get_plot_df(level=level)\n    self._name_to_title_in_kwargs(kwargs)\n    axes = df.plot.line(**kwargs)\n\n    xlabels = list(df.index)\n    numeric_index = all(isinstance(item, (int, float)) for item in xlabels)\n\n    if not isinstance(axes, Iterable):\n        axes = [axes]\n    for ax in axes:\n        if not isinstance(df.index, pd.DatetimeIndex):\n            if numeric_index:\n                xlabel_positions = xlabels\n            else:\n                xlabel_positions = np.arange(len(xlabels)).tolist()\n            ax.set_xticks(xlabel_positions)\n            ax.set_xticklabels(xlabels, rotation=90)\n    return axes\n
"},{"location":"api/model/","title":"Model Result","text":"

A model result can either be a simple point/track, or spatial field (e.g. 2d dfsu file) from which data can be extracted at the observation positions by spatial interpolation. The following types are available:

  • Timeseries
    • PointModelResult - a point result from a dfs0/nc file or a DataFrame
    • TrackModelResult - a track (moving point) result from a dfs0/nc file or a DataFrame
  • SpatialField (extractable)
    • GridModelResult - a spatial field from a dfs2/nc file or a Xarray Dataset
    • DfsuModelResult - a spatial field from a dfsu file

A model result can be created by explicitly invoking one of the above classes or using the model_result() function which will return the appropriate type based on the input data (if possible).

"},{"location":"api/model/dfsu/","title":"DfsuModelResult","text":""},{"location":"api/model/dfsu/#modelskill.DfsuModelResult","title":"modelskill.DfsuModelResult","text":"

Bases: SpatialField

Construct a DfsuModelResult from a dfsu file or mikeio.Dataset/DataArray.

Parameters:

Name Type Description Default data UnstructuredType

the input data or file path

required name Optional[str]

The name of the model result, by default None (will be set to file name or item name)

None item str | int | None

If multiple items/arrays are present in the input an item must be given (as either an index or a string), by default None

None quantity Quantity

Model quantity, for MIKE files this is inferred from the EUM information

None aux_items Optional[list[int | str]]

Auxiliary items, by default None

None Source code in modelskill/model/dfsu.py
class DfsuModelResult(SpatialField):\n    \"\"\"Construct a DfsuModelResult from a dfsu file or mikeio.Dataset/DataArray.\n\n    Parameters\n    ----------\n    data : types.UnstructuredType\n        the input data or file path\n    name : Optional[str], optional\n        The name of the model result,\n        by default None (will be set to file name or item name)\n    item : str | int | None, optional\n        If multiple items/arrays are present in the input an item\n        must be given (as either an index or a string), by default None\n    quantity : Quantity, optional\n        Model quantity, for MIKE files this is inferred from the EUM information\n    aux_items : Optional[list[int | str]], optional\n        Auxiliary items, by default None\n    \"\"\"\n\n    def __init__(\n        self,\n        data: UnstructuredType,\n        *,\n        name: Optional[str] = None,\n        item: str | int | None = None,\n        quantity: Optional[Quantity] = None,\n        aux_items: Optional[list[int | str]] = None,\n    ) -> None:\n        filename = None\n\n        assert isinstance(\n            data, get_args(UnstructuredType)\n        ), \"Could not construct DfsuModelResult from provided data\"\n\n        if isinstance(data, (str, Path)):\n            if Path(data).suffix != \".dfsu\":\n                raise ValueError(f\"File must be a dfsu file, not {Path(data).suffix}\")\n            name = name or Path(data).stem\n            filename = str(data)\n            data = mikeio.open(data)\n\n        elif isinstance(data, (mikeio.DataArray, mikeio.Dataset)):\n            pass\n        else:\n            raise ValueError(\n                f\"data type must be .dfsu or dfsu-Dataset/DataArray. Not {type(data)}.\"\n            )\n\n        if isinstance(data, mikeio.DataArray):\n            if item is not None:\n                raise ValueError(\"item must be None when data is a DataArray\")\n            if aux_items is not None:\n                raise ValueError(\"aux_items must be None when data is a DataArray\")\n            item_info = data.item\n            item = data.name\n            self.sel_items = SelectedItems(values=data.name, aux=[])\n            data = mikeio.Dataset({data.name: data})\n        else:\n            item_names = [i.name for i in data.items]\n            idx = _get_idx(x=item, valid_names=item_names)\n            item_info = data.items[idx]\n\n            self.sel_items = SelectedItems.parse(\n                item_names, item=item, aux_items=aux_items\n            )\n            item = self.sel_items.values\n        if isinstance(data, mikeio.Dataset):\n            data = data[self.sel_items.all]\n\n        self.data: mikeio.dfsu.Dfsu2DH | mikeio.Dataset = data\n        self.name = name or str(item)\n        self.quantity = (\n            Quantity.from_mikeio_iteminfo(item_info) if quantity is None else quantity\n        )\n        self.filename = filename  # TODO: remove? backward compatibility\n\n    def __repr__(self) -> str:\n        res = []\n        res.append(f\"<{self.__class__.__name__}>: {self.name}\")\n        res.append(f\"Time: {self.time[0]} - {self.time[-1]}\")\n        res.append(f\"Quantity: {self.quantity}\")\n        if len(self.sel_items.aux) > 0:\n            res.append(f\"Auxiliary variables: {', '.join(self.sel_items.aux)}\")\n        return \"\\n\".join(res)\n\n    @property\n    def time(self) -> pd.DatetimeIndex:\n        return pd.DatetimeIndex(self.data.time)\n\n    def _in_domain(self, x: float, y: float) -> bool:\n        return self.data.geometry.contains([x, y])  # type: ignore\n\n    def extract(\n        self, observation: Observation, spatial_method: Optional[str] = None\n    ) -> PointModelResult | TrackModelResult:\n        \"\"\"Extract ModelResult at observation positions\n\n        Note: this method is typically not called directly, but by the match() method.\n\n        Parameters\n        ----------\n        observation : <PointObservation> or <TrackObservation>\n            positions (and times) at which modelresult should be extracted\n        spatial_method : Optional[str], optional\n            spatial selection/interpolation method, 'contained' (=isel),\n            'nearest', 'inverse_distance' (with 5 nearest points),\n            by default None = 'inverse_distance'\n\n        Returns\n        -------\n        PointModelResult or TrackModelResult\n            extracted modelresult with the same geometry as the observation\n        \"\"\"\n        method = self._parse_spatial_method(spatial_method)\n\n        _validate_overlap_in_time(self.time, observation)\n        if isinstance(observation, PointObservation):\n            return self._extract_point(observation, spatial_method=method)\n        elif isinstance(observation, TrackObservation):\n            return self._extract_track(observation, spatial_method=method)\n        else:\n            raise NotImplementedError(\n                f\"Extraction from {type(self.data)} to {type(observation)} is not implemented.\"\n            )\n\n    @staticmethod\n    def _parse_spatial_method(method: str | None) -> str | None:\n        METHOD_MAP = {\n            \"isel\": \"contained\",\n            \"contained\": \"contained\",\n            \"IDW\": \"inverse_distance\",\n            \"inverse_distance\": \"inverse_distance\",\n            \"nearest\": \"nearest\",\n            None: None,\n        }\n\n        if method not in METHOD_MAP:\n            raise ValueError(\n                f\"spatial_method for Dfsu must be 'nearest', 'contained', or 'inverse_distance'. Not {method}.\"\n            )\n        else:\n            return METHOD_MAP[method]\n\n    def _extract_point(\n        self, observation: PointObservation, spatial_method: Optional[str] = None\n    ) -> PointModelResult:\n        \"\"\"Spatially extract a PointModelResult from a DfsuModelResult\n        given a PointObservation. No time interpolation is done!\n\n        Note: 'inverse_distance' method uses 5 nearest points and is the default.\n        \"\"\"\n\n        method = spatial_method or \"inverse_distance\"\n        assert method in [\"nearest\", \"contained\", \"inverse_distance\"]\n        n_nearest = (\n            min(5, self.data.geometry.n_elements) if method == \"inverse_distance\" else 1\n        )\n\n        x, y, z = observation.x, observation.y, observation.z\n        if not self._in_domain(x, y):\n            raise ValueError(\n                f\"PointObservation '{observation.name}' ({x}, {y}) outside model domain!\"\n            )\n\n        if method == \"contained\":\n            signature = inspect.signature(self.data.geometry.find_index)\n            if \"z\" in signature.parameters and z is not None:\n                elemids = self.data.geometry.find_index(x=x, y=y, z=z)\n            else:\n                elemids = self.data.geometry.find_index(x=x, y=y)\n            if isinstance(self.data, mikeio.Dataset):\n                ds_model = self.data.isel(element=elemids)\n            else:  # Dfsu\n                ds_model = self.data.read(elements=elemids, items=self.sel_items.all)\n        else:\n            if z is not None:\n                raise NotImplementedError(\n                    \"Interpolation in 3d files is not supported, use spatial_method='contained' instead\"\n                )\n            if isinstance(self.data, mikeio.dfsu.Dfsu2DH):\n                elemids = self.data.geometry.find_nearest_elements(\n                    x, y, n_nearest=n_nearest\n                )\n                # sort elemids, to ensure consistent results with all versions of mikeio\n                if isinstance(elemids, np.ndarray):\n                    elemids = np.sort(elemids)\n\n                ds = self.data.read(elements=elemids, items=self.sel_items.all)\n                ds_model = (\n                    ds.interp(x=x, y=y, n_nearest=n_nearest) if n_nearest > 1 else ds\n                )\n            elif isinstance(self.data, mikeio.Dataset):\n                ds_model = self.data.interp(x=x, y=y, n_nearest=n_nearest)\n\n        assert isinstance(ds_model, mikeio.Dataset)\n\n        # TODO not sure why we rename here\n        assert self.name is not None\n        ds_model.rename({ds_model.items[0].name: self.name}, inplace=True)\n\n        return PointModelResult(\n            data=ds_model,\n            item=self.name,\n            x=ds_model.geometry.x,\n            y=ds_model.geometry.y,\n            name=self.name,\n            quantity=self.quantity,\n            aux_items=self.sel_items.aux,\n        )\n\n    def _extract_track(\n        self, observation: TrackObservation, spatial_method: Optional[str] = None\n    ) -> TrackModelResult:\n        \"\"\"Extract a TrackModelResult from a DfsuModelResult (when data is a Dfsu object),\n        given a TrackObservation.\n\n        Wraps MIKEIO's extract_track method (which has the default method='nearest').\n\n        MIKE IO's extract_track, inverse_distance method, uses 5 nearest points.\n        \"\"\"\n        method = spatial_method or \"inverse_distance\"\n        if method == \"contained\":\n            raise NotImplementedError(\n                \"spatial method 'contained' (=isel) not implemented for track extraction in MIKE IO\"\n            )\n        assert method in [\"nearest\", \"inverse_distance\"]\n\n        assert isinstance(\n            self.data, (mikeio.dfsu.Dfsu2DH, mikeio.DataArray, mikeio.Dataset)\n        )\n\n        track = observation.data.to_dataframe()\n\n        if isinstance(self.data, mikeio.DataArray):\n            ds_model = self.data.extract_track(track=track, method=method)\n            ds_model.rename({self.data.name: self.name}, inplace=True)\n            aux_items = None\n        else:\n            if isinstance(self.data, mikeio.dfsu.Dfsu2DH):\n                ds_model = self.data.extract_track(\n                    track=track, items=self.sel_items.all, method=method\n                )\n            elif isinstance(self.data, mikeio.Dataset):\n                ds_model = self.data[self.sel_items.all].extract_track(\n                    track=track, method=method\n                )\n            ds_model.rename({self.sel_items.values: self.name}, inplace=True)\n            aux_items = self.sel_items.aux\n\n        item_names = [i.name for i in ds_model.items]\n        x_name = \"Longitude\" if \"Longitude\" in item_names else \"x\"\n        y_name = \"Latitude\" if \"Latitude\" in item_names else \"y\"\n\n        return TrackModelResult(\n            data=ds_model.dropna(),  # TODO: not on aux cols\n            item=self.name,\n            x_item=x_name,\n            y_item=y_name,\n            name=self.name,\n            quantity=self.quantity,\n            aux_items=aux_items,\n        )\n
"},{"location":"api/model/dfsu/#modelskill.DfsuModelResult.extract","title":"extract","text":"
extract(observation, spatial_method=None)\n

Extract ModelResult at observation positions

Note: this method is typically not called directly, but by the match() method.

Parameters:

Name Type Description Default observation <PointObservation> or <TrackObservation>

positions (and times) at which modelresult should be extracted

required spatial_method Optional[str]

spatial selection/interpolation method, 'contained' (=isel), 'nearest', 'inverse_distance' (with 5 nearest points), by default None = 'inverse_distance'

None

Returns:

Type Description PointModelResult or TrackModelResult

extracted modelresult with the same geometry as the observation

Source code in modelskill/model/dfsu.py
def extract(\n    self, observation: Observation, spatial_method: Optional[str] = None\n) -> PointModelResult | TrackModelResult:\n    \"\"\"Extract ModelResult at observation positions\n\n    Note: this method is typically not called directly, but by the match() method.\n\n    Parameters\n    ----------\n    observation : <PointObservation> or <TrackObservation>\n        positions (and times) at which modelresult should be extracted\n    spatial_method : Optional[str], optional\n        spatial selection/interpolation method, 'contained' (=isel),\n        'nearest', 'inverse_distance' (with 5 nearest points),\n        by default None = 'inverse_distance'\n\n    Returns\n    -------\n    PointModelResult or TrackModelResult\n        extracted modelresult with the same geometry as the observation\n    \"\"\"\n    method = self._parse_spatial_method(spatial_method)\n\n    _validate_overlap_in_time(self.time, observation)\n    if isinstance(observation, PointObservation):\n        return self._extract_point(observation, spatial_method=method)\n    elif isinstance(observation, TrackObservation):\n        return self._extract_track(observation, spatial_method=method)\n    else:\n        raise NotImplementedError(\n            f\"Extraction from {type(self.data)} to {type(observation)} is not implemented.\"\n        )\n
"},{"location":"api/model/dummy/","title":"DummyModelResult","text":""},{"location":"api/model/dummy/#modelskill.DummyModelResult","title":"modelskill.DummyModelResult dataclass","text":"Source code in modelskill/model/dummy.py
@dataclass\nclass DummyModelResult:\n    name: str = \"dummy\"\n    data: float | None = None\n    strategy: Literal[\"mean\", \"constant\"] = \"constant\"\n    \"\"\"Dummy model result that always returns the same value.\n\n    Similar in spirit to <https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyRegressor.html>\n\n    Parameters\n    ----------\n    data : float, optional\n        The value to return if strategy is 'constant', by default None\n    name : str, optional\n        The name of the model result, by default 'dummy'\n    strategy : str, optional\n        The strategy to use, 'mean' uses the mean of the observation, 'constant' uses the value given in data, by default 'constant'\n\n    Examples\n    --------\n    >>> import pandas as pd\n    >>> import modelskill as ms\n    >>> obs = ms.PointObservation(pd.DataFrame([0.0, 1.0], index=pd.date_range(\"2000\", freq=\"H\", periods=2)), name=\"foo\")\n    >>> mr = ms.DummyModelResult(strategy='mean')\n    >>> pmr = mr.extract(obs)\n    >>> pmr.to_dataframe()\n                        dummy\n    time\n    2000-01-01 00:00:00    0.5\n    2000-01-01 01:00:00    0.5\n    \"\"\"\n\n    def __post_init__(self):\n        if self.strategy == \"constant\" and self.data is None:\n            raise ValueError(\"data must be given when strategy is 'constant'\")\n\n    def extract(\n        self,\n        observation: PointObservation | TrackObservation,\n        spatial_method: Optional[str] = None,\n    ) -> PointModelResult | TrackModelResult:\n        if spatial_method is not None:\n            raise NotImplementedError(\n                \"spatial interpolation not possible when matching point model results with point observations\"\n            )\n\n        da = observation.data[observation.name].copy()\n        if self.strategy == \"mean\":\n            da[:] = da.mean()\n        else:\n            da[:] = self.data\n\n        if isinstance(observation, PointObservation):\n            return PointModelResult(\n                data=da, x=observation.x, y=observation.y, name=self.name\n            )\n\n        elif isinstance(observation, TrackObservation):\n            data = pd.DataFrame(\n                {\n                    \"x\": observation.x,\n                    \"y\": observation.y,\n                    \"value\": da.values,\n                },\n                index=da.time,\n            )\n            return TrackModelResult(data=data, name=self.name)\n        else:\n            raise ValueError(\n                f\"observation must be a PointObservation or TrackObservation not {type(observation)}\"\n            )\n
"},{"location":"api/model/dummy/#modelskill.DummyModelResult.strategy","title":"strategy class-attribute instance-attribute","text":"
strategy = 'constant'\n

Dummy model result that always returns the same value.

Similar in spirit to https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyRegressor.html

Parameters:

Name Type Description Default data float

The value to return if strategy is 'constant', by default None

required name str

The name of the model result, by default 'dummy'

required strategy str

The strategy to use, 'mean' uses the mean of the observation, 'constant' uses the value given in data, by default 'constant'

required

Examples:

>>> import pandas as pd\n>>> import modelskill as ms\n>>> obs = ms.PointObservation(pd.DataFrame([0.0, 1.0], index=pd.date_range(\"2000\", freq=\"H\", periods=2)), name=\"foo\")\n>>> mr = ms.DummyModelResult(strategy='mean')\n>>> pmr = mr.extract(obs)\n>>> pmr.to_dataframe()\n                    dummy\ntime\n2000-01-01 00:00:00    0.5\n2000-01-01 01:00:00    0.5\n
"},{"location":"api/model/grid/","title":"GridModelResult","text":""},{"location":"api/model/grid/#modelskill.GridModelResult","title":"modelskill.GridModelResult","text":"

Bases: SpatialField

Construct a GridModelResult from a file or xarray.Dataset.

Parameters:

Name Type Description Default data GridType

the input data or file path

required name str

The name of the model result, by default None (will be set to file name or item name)

None item str or int

If multiple items/arrays are present in the input an item must be given (as either an index or a string), by default None

None quantity Quantity

Model quantity, for MIKE files this is inferred from the EUM information

None aux_items Optional[list[int | str]]

Auxiliary items, by default None

None Source code in modelskill/model/grid.py
class GridModelResult(SpatialField):\n    \"\"\"Construct a GridModelResult from a file or xarray.Dataset.\n\n    Parameters\n    ----------\n    data : types.GridType\n        the input data or file path\n    name : str, optional\n        The name of the model result,\n        by default None (will be set to file name or item name)\n    item : str or int, optional\n        If multiple items/arrays are present in the input an item\n        must be given (as either an index or a string), by default None\n    quantity : Quantity, optional\n        Model quantity, for MIKE files this is inferred from the EUM information\n    aux_items : Optional[list[int | str]], optional\n        Auxiliary items, by default None\n    \"\"\"\n\n    def __init__(\n        self,\n        data: GridType,\n        *,\n        name: Optional[str] = None,\n        item: str | int | None = None,\n        quantity: Optional[Quantity] = None,\n        aux_items: Optional[list[int | str]] = None,\n    ) -> None:\n        assert isinstance(\n            data, get_args(GridType)\n        ), \"Could not construct GridModelResult from provided data.\"\n\n        if isinstance(data, (str, Path)):\n            if \"*\" in str(data):\n                ds = xr.open_mfdataset(data)\n            else:\n                assert Path(data).exists(), f\"{data}: File does not exist.\"\n                ds = xr.open_dataset(data)\n\n        elif isinstance(data, Sequence) and all(\n            isinstance(file, (str, Path)) for file in data\n        ):\n            ds = xr.open_mfdataset(data)\n\n        elif isinstance(data, xr.DataArray):\n            if item is not None:\n                raise ValueError(f\"item must be None when data is a {type(data)}\")\n            if aux_items is not None:\n                raise ValueError(f\"aux_items must be None when data is a {type(data)}\")\n            if data.ndim < 2:\n                raise ValueError(f\"DataArray must at least 2D. Got {list(data.dims)}.\")\n            ds = data.to_dataset(name=name, promote_attrs=True)\n        elif isinstance(data, xr.Dataset):\n            assert len(data.coords) >= 2, \"Dataset must have at least 2 dimensions.\"\n            ds = data\n        else:\n            raise NotImplementedError(\n                f\"Could not construct GridModelResult from {type(data)}\"\n            )\n\n        sel_items = SelectedItems.parse(\n            list(ds.data_vars), item=item, aux_items=aux_items\n        )\n        name = name or sel_items.values\n        ds = rename_coords_xr(ds)\n\n        self.data: xr.Dataset = ds[sel_items.all]\n        self.name = name\n        self.sel_items = sel_items\n\n        # use long_name and units from data if not provided\n        if quantity is None:\n            da = self.data[sel_items.values]\n            quantity = Quantity.from_cf_attrs(da.attrs)\n\n        self.quantity = quantity\n\n    def __repr__(self) -> str:\n        res = []\n        res.append(f\"<{self.__class__.__name__}>: {self.name}\")\n        res.append(f\"Time: {self.time[0]} - {self.time[-1]}\")\n        res.append(f\"Quantity: {self.quantity}\")\n        if len(self.sel_items.aux) > 0:\n            res.append(f\"Auxiliary variables: {', '.join(self.sel_items.aux)}\")\n        return \"\\n\".join(res)\n\n    @property\n    def time(self) -> pd.DatetimeIndex:\n        return pd.DatetimeIndex(self.data.time)\n\n    def _in_domain(self, x: float, y: float) -> bool:\n        assert hasattr(self.data, \"x\") and hasattr(\n            self.data, \"y\"\n        ), \"Data has no x and/or y coordinates.\"\n        xmin = float(self.data.x.values.min())\n        xmax = float(self.data.x.values.max())\n        ymin = float(self.data.y.values.min())\n        ymax = float(self.data.y.values.max())\n        return (x >= xmin) & (x <= xmax) & (y >= ymin) & (y <= ymax)\n\n    def extract(\n        self,\n        observation: PointObservation | TrackObservation,\n        spatial_method: Optional[str] = None,\n    ) -> PointModelResult | TrackModelResult:\n        \"\"\"Extract ModelResult at observation positions\n\n        Note: this method is typically not called directly, but through the match() method.\n\n        Parameters\n        ----------\n        observation : <PointObservation> or <TrackObservation>\n            positions (and times) at which modelresult should be extracted\n        spatial_method : Optional[str], optional\n            method in xarray.Dataset.interp, typically either \"nearest\" or\n            \"linear\", by default None = 'linear'\n\n        Returns\n        -------\n        PointModelResult or TrackModelResult\n            extracted modelresult\n        \"\"\"\n        _validate_overlap_in_time(self.time, observation)\n        if isinstance(observation, PointObservation):\n            return self._extract_point(observation, spatial_method)\n        elif isinstance(observation, TrackObservation):\n            return self._extract_track(observation, spatial_method)\n        else:\n            raise NotImplementedError(\n                f\"Extraction from {type(self.data)} to {type(observation)} is not implemented.\"\n            )\n\n    def _extract_point(\n        self, observation: PointObservation, spatial_method: Optional[str] = None\n    ) -> PointModelResult:\n        \"\"\"Spatially extract a PointModelResult from a GridModelResult (when data is a xarray.Dataset),\n        given a PointObservation. No time interpolation is done!\"\"\"\n        method: str = spatial_method or \"linear\"\n\n        x, y, z = observation.x, observation.y, observation.z\n        if (x is None) or (y is None):\n            raise ValueError(\n                f\"PointObservation '{observation.name}' cannot be used for extraction \"\n                + f\"because it has None position x={x}, y={y}. Please provide position \"\n                + \"when creating PointObservation.\"\n            )\n        if not self._in_domain(x, y):\n            raise ValueError(\n                f\"PointObservation '{observation.name}' ({x}, {y}) is outside model domain!\"\n            )\n\n        assert isinstance(self.data, xr.Dataset)\n\n        # TODO: avoid runtrip to pandas if possible (potential loss of metadata)\n        if \"z\" in self.data.dims and z is not None:\n            ds = self.data.interp(\n                coords=dict(x=float(x), y=float(y), z=float(z)),\n                method=method,  # type: ignore\n            )\n        else:\n            ds = self.data.interp(coords=dict(x=float(x), y=float(y)), method=method)  # type: ignore\n        # TODO: exclude aux cols in dropna\n        df = ds.to_dataframe().drop(columns=[\"x\", \"y\"]).dropna()\n        if len(df) == 0:\n            raise ValueError(\n                f\"Spatial point extraction failed for PointObservation '{observation.name}' in GridModelResult '{self.name}'! (is point outside model domain? Consider spatial_method='nearest')\"\n            )\n        df = df.rename(columns={self.sel_items.values: self.name})\n\n        return PointModelResult(\n            data=df,\n            x=ds.x.item(),\n            y=ds.y.item(),\n            item=self.name,\n            name=self.name,\n            quantity=self.quantity,\n            aux_items=self.sel_items.aux,\n        )\n\n    def _extract_track(\n        self, observation: TrackObservation, spatial_method: Optional[str] = None\n    ) -> TrackModelResult:\n        \"\"\"Extract a TrackModelResult from a GridModelResult (when data is a xarray.Dataset),\n        given a TrackObservation.\"\"\"\n        method: str = spatial_method or \"linear\"\n\n        obs_df = observation.data.to_dataframe()\n\n        renamed_obs_data = rename_coords_pd(obs_df)\n        t = xr.DataArray(renamed_obs_data.index, dims=\"track\")\n        x = xr.DataArray(renamed_obs_data.x, dims=\"track\")\n        y = xr.DataArray(renamed_obs_data.y, dims=\"track\")\n\n        assert isinstance(self.data, xr.Dataset)\n        ds = self.data.interp(\n            coords=dict(time=t, x=x, y=y),\n            method=method,  # type: ignore\n        )\n        df = ds.to_dataframe().drop(columns=[\"time\"])\n        df = df.rename(columns={self.sel_items.values: self.name})\n\n        return TrackModelResult(\n            data=df.dropna(),  # TODO: exclude aux cols in dropna\n            item=self.name,\n            x_item=\"x\",\n            y_item=\"y\",\n            name=self.name,\n            quantity=self.quantity,\n            aux_items=self.sel_items.aux,\n        )\n
"},{"location":"api/model/grid/#modelskill.GridModelResult.extract","title":"extract","text":"
extract(observation, spatial_method=None)\n

Extract ModelResult at observation positions

Note: this method is typically not called directly, but through the match() method.

Parameters:

Name Type Description Default observation <PointObservation> or <TrackObservation>

positions (and times) at which modelresult should be extracted

required spatial_method Optional[str]

method in xarray.Dataset.interp, typically either \"nearest\" or \"linear\", by default None = 'linear'

None

Returns:

Type Description PointModelResult or TrackModelResult

extracted modelresult

Source code in modelskill/model/grid.py
def extract(\n    self,\n    observation: PointObservation | TrackObservation,\n    spatial_method: Optional[str] = None,\n) -> PointModelResult | TrackModelResult:\n    \"\"\"Extract ModelResult at observation positions\n\n    Note: this method is typically not called directly, but through the match() method.\n\n    Parameters\n    ----------\n    observation : <PointObservation> or <TrackObservation>\n        positions (and times) at which modelresult should be extracted\n    spatial_method : Optional[str], optional\n        method in xarray.Dataset.interp, typically either \"nearest\" or\n        \"linear\", by default None = 'linear'\n\n    Returns\n    -------\n    PointModelResult or TrackModelResult\n        extracted modelresult\n    \"\"\"\n    _validate_overlap_in_time(self.time, observation)\n    if isinstance(observation, PointObservation):\n        return self._extract_point(observation, spatial_method)\n    elif isinstance(observation, TrackObservation):\n        return self._extract_track(observation, spatial_method)\n    else:\n        raise NotImplementedError(\n            f\"Extraction from {type(self.data)} to {type(observation)} is not implemented.\"\n        )\n
"},{"location":"api/model/model_result/","title":"model_result()","text":""},{"location":"api/model/model_result/#modelskill.model_result","title":"modelskill.model_result","text":"
model_result(data, *, aux_items=None, gtype=None, **kwargs)\n

A factory function for creating an appropriate object based on the data input.

Parameters:

Name Type Description Default data DataInputType

The data to be used for creating the ModelResult object.

required aux_items Optional[list[int | str]]

Auxiliary items, by default None

None gtype Optional[Literal['point', 'track', 'unstructured', 'grid']]

The geometry type of the data. If not specified, it will be guessed from the data.

None **kwargs Any

Additional keyword arguments to be passed to the ModelResult constructor.

{}

Examples:

>>> import modelskill as ms\n>>> ms.model_result(\"Oresund2D.dfsu\", item=0)\n<DfsuModelResult> 'Oresund2D'\n>>> ms.model_result(\"ERA5_DutchCoast.nc\", item=\"swh\", name=\"ERA5\")\n<GridModelResult> 'ERA5'\n
Source code in modelskill/model/factory.py
def model_result(\n    data: DataInputType,\n    *,\n    aux_items: Optional[list[int | str]] = None,\n    gtype: Optional[Literal[\"point\", \"track\", \"unstructured\", \"grid\"]] = None,\n    **kwargs: Any,\n) -> Any:\n    \"\"\"A factory function for creating an appropriate object based on the data input.\n\n    Parameters\n    ----------\n    data : DataInputType\n        The data to be used for creating the ModelResult object.\n    aux_items : Optional[list[int | str]]\n        Auxiliary items, by default None\n    gtype : Optional[Literal[\"point\", \"track\", \"unstructured\", \"grid\"]]\n        The geometry type of the data. If not specified, it will be guessed from the data.\n    **kwargs\n        Additional keyword arguments to be passed to the ModelResult constructor.\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> ms.model_result(\"Oresund2D.dfsu\", item=0)\n    <DfsuModelResult> 'Oresund2D'\n    >>> ms.model_result(\"ERA5_DutchCoast.nc\", item=\"swh\", name=\"ERA5\")\n    <GridModelResult> 'ERA5'\n    \"\"\"\n    if gtype is None:\n        geometry = _guess_gtype(data)\n    else:\n        geometry = GeometryType.from_string(gtype)\n\n    return _modelresult_lookup[geometry](\n        data=data,\n        aux_items=aux_items,\n        **kwargs,\n    )\n
"},{"location":"api/model/point/","title":"PointModelResult","text":""},{"location":"api/model/point/#modelskill.PointModelResult","title":"modelskill.PointModelResult","text":"

Bases: TimeSeries, Alignable

Construct a PointModelResult from a 0d data source: dfs0 file, mikeio.Dataset/DataArray, pandas.DataFrame/Series or xarray.Dataset/DataArray

Parameters:

Name Type Description Default data (str, Path, Dataset, DataArray, DataFrame, Series, Dataset or DataArray)

filename (.dfs0 or .nc) or object with the data

required name Optional[str]

The name of the model result, by default None (will be set to file name or item name)

None x float

first coordinate of point position, inferred from data if not given, else None

None y float

second coordinate of point position, inferred from data if not given, else None

None z float

third coordinate of point position, inferred from data if not given, else None

None item str | int | None

If multiple items/arrays are present in the input an item must be given (as either an index or a string), by default None

None quantity Quantity

Model quantity, for MIKE files this is inferred from the EUM information

None aux_items Optional[list[int | str]]

Auxiliary items, by default None

None Source code in modelskill/model/point.py
class PointModelResult(TimeSeries, Alignable):\n    \"\"\"Construct a PointModelResult from a 0d data source:\n    dfs0 file, mikeio.Dataset/DataArray, pandas.DataFrame/Series\n    or xarray.Dataset/DataArray\n\n    Parameters\n    ----------\n    data : str, Path, mikeio.Dataset, mikeio.DataArray, pd.DataFrame, pd.Series, xr.Dataset or xr.DataArray\n        filename (.dfs0 or .nc) or object with the data\n    name : Optional[str], optional\n        The name of the model result,\n        by default None (will be set to file name or item name)\n    x : float, optional\n        first coordinate of point position, inferred from data if not given, else None\n    y : float, optional\n        second coordinate of point position, inferred from data if not given, else None\n    z : float, optional\n        third coordinate of point position, inferred from data if not given, else None\n    item : str | int | None, optional\n        If multiple items/arrays are present in the input an item\n        must be given (as either an index or a string), by default None\n    quantity : Quantity, optional\n        Model quantity, for MIKE files this is inferred from the EUM information\n    aux_items : Optional[list[int | str]], optional\n        Auxiliary items, by default None\n    \"\"\"\n\n    def __init__(\n        self,\n        data: PointType,\n        *,\n        name: Optional[str] = None,\n        x: Optional[float] = None,\n        y: Optional[float] = None,\n        z: Optional[float] = None,\n        item: str | int | None = None,\n        quantity: Optional[Quantity] = None,\n        aux_items: Optional[Sequence[int | str]] = None,\n    ) -> None:\n        if not self._is_input_validated(data):\n            data = _parse_point_input(\n                data,\n                name=name,\n                item=item,\n                quantity=quantity,\n                aux_items=aux_items,\n                x=x,\n                y=y,\n                z=z,\n            )\n\n        assert isinstance(data, xr.Dataset)\n\n        data_var = str(list(data.data_vars)[0])\n        data[data_var].attrs[\"kind\"] = \"model\"\n        super().__init__(data=data)\n\n    def extract(\n        self, obs: PointObservation, spatial_method: Optional[str] = None\n    ) -> PointModelResult:\n        if not isinstance(obs, PointObservation):\n            raise ValueError(f\"obs must be a PointObservation not {type(obs)}\")\n        if spatial_method is not None:\n            raise NotImplementedError(\n                \"spatial interpolation not possible when matching point model results with point observations\"\n            )\n        return self\n\n    def interp_time(self, observation: Observation, **kwargs: Any) -> PointModelResult:\n        \"\"\"\n        Interpolate model result to the time of the observation\n\n        wrapper around xarray.Dataset.interp()\n\n        Parameters\n        ----------\n        observation : Observation\n            The observation to interpolate to\n        **kwargs\n\n            Additional keyword arguments passed to xarray.interp\n\n        Returns\n        -------\n        PointModelResult\n            Interpolated model result\n        \"\"\"\n        ds = self.align(observation, **kwargs)\n        return PointModelResult(ds)\n\n    def align(\n        self,\n        observation: Observation,\n        *,\n        max_gap: float | None = None,\n        **kwargs: Any,\n    ) -> xr.Dataset:\n        new_time = observation.time\n\n        dati = self.data.dropna(\"time\").interp(\n            time=new_time, assume_sorted=True, **kwargs\n        )\n\n        pmr = PointModelResult(dati)\n        if max_gap is not None:\n            pmr = pmr._remove_model_gaps(mod_index=self.time, max_gap=max_gap)\n        return pmr.data\n\n    def _remove_model_gaps(\n        self,\n        mod_index: pd.DatetimeIndex,\n        max_gap: float | None = None,\n    ) -> PointModelResult:\n        \"\"\"Remove model gaps longer than max_gap from TimeSeries\"\"\"\n        max_gap_delta = pd.Timedelta(max_gap, \"s\")\n        valid_times = self._get_valid_times(mod_index, max_gap_delta)\n        ds = self.data.sel(time=valid_times)\n        return PointModelResult(ds)\n\n    def _get_valid_times(\n        self, mod_index: pd.DatetimeIndex, max_gap: pd.Timedelta\n    ) -> pd.DatetimeIndex:\n        \"\"\"Used only by _remove_model_gaps\"\"\"\n        obs_index = self.time\n        # init dataframe of available timesteps and their index\n        df = pd.DataFrame(index=mod_index)\n        df[\"idx\"] = range(len(df))\n\n        # for query times get available left and right index of source times\n        df = (\n            df.reindex(df.index.union(obs_index))\n            .interpolate(method=\"time\", limit_area=\"inside\")\n            .reindex(obs_index)\n            .dropna()\n        )\n        df[\"idxa\"] = np.floor(df.idx).astype(int)\n        df[\"idxb\"] = np.ceil(df.idx).astype(int)\n\n        # time of left and right source times and time delta\n        df[\"ta\"] = mod_index[df.idxa]\n        df[\"tb\"] = mod_index[df.idxb]\n        df[\"dt\"] = df.tb - df.ta\n\n        # valid query times where time delta is less than max_gap\n        valid_idx = df.dt <= max_gap\n        return df[valid_idx].index\n
"},{"location":"api/model/point/#modelskill.PointModelResult.gtype","title":"gtype property","text":"
gtype\n

Geometry type

"},{"location":"api/model/point/#modelskill.PointModelResult.n_points","title":"n_points property","text":"
n_points\n

Number of data points

"},{"location":"api/model/point/#modelskill.PointModelResult.name","title":"name property writable","text":"
name\n

Name of time series (value item name)

"},{"location":"api/model/point/#modelskill.PointModelResult.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerPlotter

Examples:

>>> obj.plot.timeseries()\n>>> obj.plot.hist()\n
"},{"location":"api/model/point/#modelskill.PointModelResult.quantity","title":"quantity property writable","text":"
quantity\n

Quantity of time series

"},{"location":"api/model/point/#modelskill.PointModelResult.time","title":"time property","text":"
time\n

Time index

"},{"location":"api/model/point/#modelskill.PointModelResult.values","title":"values property","text":"
values\n

Values as numpy array

"},{"location":"api/model/point/#modelskill.PointModelResult.x","title":"x property writable","text":"
x\n

x-coordinate

"},{"location":"api/model/point/#modelskill.PointModelResult.y","title":"y property writable","text":"
y\n

y-coordinate

"},{"location":"api/model/point/#modelskill.PointModelResult.equals","title":"equals","text":"
equals(other)\n

Check if two TimeSeries are equal

Source code in modelskill/timeseries/_timeseries.py
def equals(self, other: TimeSeries) -> bool:\n    \"\"\"Check if two TimeSeries are equal\"\"\"\n    return self.data.equals(other.data)\n
"},{"location":"api/model/point/#modelskill.PointModelResult.interp_time","title":"interp_time","text":"
interp_time(observation, **kwargs)\n

Interpolate model result to the time of the observation

wrapper around xarray.Dataset.interp()

Parameters:

Name Type Description Default observation Observation

The observation to interpolate to

required **kwargs Any

Additional keyword arguments passed to xarray.interp

{}

Returns:

Type Description PointModelResult

Interpolated model result

Source code in modelskill/model/point.py
def interp_time(self, observation: Observation, **kwargs: Any) -> PointModelResult:\n    \"\"\"\n    Interpolate model result to the time of the observation\n\n    wrapper around xarray.Dataset.interp()\n\n    Parameters\n    ----------\n    observation : Observation\n        The observation to interpolate to\n    **kwargs\n\n        Additional keyword arguments passed to xarray.interp\n\n    Returns\n    -------\n    PointModelResult\n        Interpolated model result\n    \"\"\"\n    ds = self.align(observation, **kwargs)\n    return PointModelResult(ds)\n
"},{"location":"api/model/point/#modelskill.PointModelResult.sel","title":"sel","text":"
sel(**kwargs)\n

Select data by label

Source code in modelskill/timeseries/_timeseries.py
def sel(self: T, **kwargs: Any) -> T:\n    \"\"\"Select data by label\"\"\"\n    return self.__class__(self.data.sel(**kwargs))\n
"},{"location":"api/model/point/#modelskill.PointModelResult.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/timeseries/_timeseries.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert matched data to pandas DataFrame\n\n    Include x, y coordinates only if gtype=track\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    if self.gtype == str(GeometryType.POINT):\n        # we remove the scalar coordinate variables as they\n        # will otherwise be columns in the dataframe\n        return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n    elif self.gtype == str(GeometryType.TRACK):\n        df = self.data.drop_vars([\"z\"]).to_dataframe()\n        # make sure that x, y cols are first\n        cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n        return df[cols]\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n
"},{"location":"api/model/point/#modelskill.PointModelResult.trim","title":"trim","text":"
trim(start_time=None, end_time=None, buffer='1s')\n

Trim observation data to a given time interval

Parameters:

Name Type Description Default start_time Timestamp

start time

None end_time Timestamp

end time

None buffer str

buffer time around start and end time, by default \"1s\"

'1s' Source code in modelskill/timeseries/_timeseries.py
def trim(\n    self: T,\n    start_time: Optional[pd.Timestamp] = None,\n    end_time: Optional[pd.Timestamp] = None,\n    buffer: str = \"1s\",\n) -> T:\n    \"\"\"Trim observation data to a given time interval\n\n    Parameters\n    ----------\n    start_time : pd.Timestamp\n        start time\n    end_time : pd.Timestamp\n        end time\n    buffer : str, optional\n        buffer time around start and end time, by default \"1s\"\n    \"\"\"\n    # Expand time interval with buffer\n    start_time = pd.Timestamp(start_time) - pd.Timedelta(buffer)\n    end_time = pd.Timestamp(end_time) + pd.Timedelta(buffer)\n\n    data = self.data.sel(time=slice(start_time, end_time))\n    if len(data.time) == 0:\n        raise ValueError(\n            f\"No data left after trimming to {start_time} - {end_time}\"\n        )\n    return self.__class__(data)\n
"},{"location":"api/model/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","title":"modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","text":"

Bases: TimeSeriesPlotter

Source code in modelskill/timeseries/_plotter.py
class MatplotlibTimeSeriesPlotter(TimeSeriesPlotter):\n    def __init__(self, ts) -> None:\n        self._ts = ts\n\n    def __call__(self, **kwargs):\n        # default to timeseries plot\n        self.timeseries(**kwargs)\n\n    def timeseries(\n        self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n    ):\n        \"\"\"Plot timeseries\n\n        Wraps pandas.DataFrame plot() method.\n\n        Parameters\n        ----------\n        title : str, optional\n            plot title, default: [name]\n        color : str, optional\n            plot color, by default '#d62728'\n        marker : str, optional\n            plot marker, by default '.'\n        linestyle : str, optional\n            line style, by default None\n        **kwargs\n            other keyword arguments to df.plot()\n        \"\"\"\n        kwargs[\"color\"] = self._ts._color if color is None else color\n        ax = self._ts._values_as_series.plot(\n            marker=marker, linestyle=linestyle, **kwargs\n        )\n\n        title = self._ts.name if title is None else title\n        ax.set_title(title)\n\n        ax.set_ylabel(str(self._ts.quantity))\n        return ax\n\n    def hist(self, bins=100, title=None, color=None, **kwargs):\n        \"\"\"Plot histogram of timeseries values\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            specification of bins, by default 100\n        title : str, optional\n            plot title, default: observation name\n        color : str, optional\n            plot color, by default \"#d62728\"\n        **kwargs\n            other keyword arguments to df.hist()\n\n        Returns\n        -------\n        matplotlib axes\n        \"\"\"\n        title = self._ts.name if title is None else title\n\n        kwargs[\"color\"] = self._ts._color if color is None else color\n\n        ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n        ax.set_title(title)\n        ax.set_xlabel(str(self._ts.quantity))\n        return ax\n
"},{"location":"api/model/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.hist","title":"hist","text":"
hist(bins=100, title=None, color=None, **kwargs)\n

Plot histogram of timeseries values

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

specification of bins, by default 100

100 title str

plot title, default: observation name

None color str

plot color, by default \"#d62728\"

None **kwargs

other keyword arguments to df.hist()

{}

Returns:

Type Description matplotlib axes Source code in modelskill/timeseries/_plotter.py
def hist(self, bins=100, title=None, color=None, **kwargs):\n    \"\"\"Plot histogram of timeseries values\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        specification of bins, by default 100\n    title : str, optional\n        plot title, default: observation name\n    color : str, optional\n        plot color, by default \"#d62728\"\n    **kwargs\n        other keyword arguments to df.hist()\n\n    Returns\n    -------\n    matplotlib axes\n    \"\"\"\n    title = self._ts.name if title is None else title\n\n    kwargs[\"color\"] = self._ts._color if color is None else color\n\n    ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n    ax.set_title(title)\n    ax.set_xlabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/model/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.timeseries","title":"timeseries","text":"
timeseries(title=None, color=None, marker='.', linestyle='None', **kwargs)\n

Plot timeseries

Wraps pandas.DataFrame plot() method.

Parameters:

Name Type Description Default title str

plot title, default: [name]

None color str

plot color, by default '#d62728'

None marker str

plot marker, by default '.'

'.' linestyle str

line style, by default None

'None' **kwargs

other keyword arguments to df.plot()

{} Source code in modelskill/timeseries/_plotter.py
def timeseries(\n    self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n):\n    \"\"\"Plot timeseries\n\n    Wraps pandas.DataFrame plot() method.\n\n    Parameters\n    ----------\n    title : str, optional\n        plot title, default: [name]\n    color : str, optional\n        plot color, by default '#d62728'\n    marker : str, optional\n        plot marker, by default '.'\n    linestyle : str, optional\n        line style, by default None\n    **kwargs\n        other keyword arguments to df.plot()\n    \"\"\"\n    kwargs[\"color\"] = self._ts._color if color is None else color\n    ax = self._ts._values_as_series.plot(\n        marker=marker, linestyle=linestyle, **kwargs\n    )\n\n    title = self._ts.name if title is None else title\n    ax.set_title(title)\n\n    ax.set_ylabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/model/track/","title":"TrackModelResult","text":""},{"location":"api/model/track/#modelskill.TrackModelResult","title":"modelskill.TrackModelResult","text":"

Bases: TimeSeries, Alignable

Construct a TrackModelResult from a dfs0 file, mikeio.Dataset, pandas.DataFrame or a xarray.Datasets

Parameters:

Name Type Description Default data TrackType

The input data or file path

required name Optional[str]

The name of the model result, by default None (will be set to file name or item name)

None item str | int | None

If multiple items/arrays are present in the input an item must be given (as either an index or a string), by default None

None x_item str | int | None

Item of the first coordinate of positions, by default None

0 y_item str | int | None

Item of the second coordinate of positions, by default None

1 quantity Quantity

Model quantity, for MIKE files this is inferred from the EUM information

None keep_duplicates (str, bool)

Strategy for handling duplicate timestamps (wraps xarray.Dataset.drop_duplicates) \"first\" to keep first occurrence, \"last\" to keep last occurrence, False to drop all duplicates, \"offset\" to add milliseconds to consecutive duplicates, by default \"first\"

'first' aux_items Optional[list[int | str]]

Auxiliary items, by default None

None Source code in modelskill/model/track.py
class TrackModelResult(TimeSeries, Alignable):\n    \"\"\"Construct a TrackModelResult from a dfs0 file,\n    mikeio.Dataset, pandas.DataFrame or a xarray.Datasets\n\n    Parameters\n    ----------\n    data : types.TrackType\n        The input data or file path\n    name : Optional[str], optional\n        The name of the model result,\n        by default None (will be set to file name or item name)\n    item : str | int | None, optional\n        If multiple items/arrays are present in the input an item\n        must be given (as either an index or a string), by default None\n    x_item : str | int | None, optional\n        Item of the first coordinate of positions, by default None\n    y_item : str | int | None, optional\n        Item of the second coordinate of positions, by default None\n    quantity : Quantity, optional\n        Model quantity, for MIKE files this is inferred from the EUM information\n    keep_duplicates : (str, bool), optional\n        Strategy for handling duplicate timestamps (wraps xarray.Dataset.drop_duplicates)\n        \"first\" to keep first occurrence, \"last\" to keep last occurrence,\n        False to drop all duplicates, \"offset\" to add milliseconds to\n        consecutive duplicates, by default \"first\"\n    aux_items : Optional[list[int | str]], optional\n        Auxiliary items, by default None\n    \"\"\"\n\n    def __init__(\n        self,\n        data: TrackType,\n        *,\n        name: Optional[str] = None,\n        item: str | int | None = None,\n        quantity: Optional[Quantity] = None,\n        x_item: str | int = 0,\n        y_item: str | int = 1,\n        keep_duplicates: str | bool = \"first\",\n        aux_items: Optional[Sequence[int | str]] = None,\n    ) -> None:\n        if not self._is_input_validated(data):\n            data = _parse_track_input(\n                data=data,\n                name=name,\n                item=item,\n                quantity=quantity,\n                x_item=x_item,\n                y_item=y_item,\n                keep_duplicates=keep_duplicates,\n                aux_items=aux_items,\n            )\n\n        assert isinstance(data, xr.Dataset)\n        data_var = str(list(data.data_vars)[0])\n        data[data_var].attrs[\"kind\"] = \"model\"\n        super().__init__(data=data)\n\n    def extract(\n        self, obs: TrackObservation, spatial_method: Optional[str] = None\n    ) -> TrackModelResult:\n        if not isinstance(obs, TrackObservation):\n            raise ValueError(f\"obs must be a TrackObservation not {type(obs)}\")\n        if spatial_method is not None:\n            raise NotImplementedError(\n                \"spatial interpolation not possible when matching track model results with track observations\"\n            )\n        return self\n\n    def align(self, observation: Observation, **kwargs: Any) -> xr.Dataset:\n        spatial_tolerance = 1e-3\n\n        mri = self\n        mod_df = mri.data.to_dataframe()\n        obs_df = observation.data.to_dataframe()\n\n        # 1. inner join on time\n        df = mod_df.join(obs_df, how=\"inner\", lsuffix=\"_mod\", rsuffix=\"_obs\")\n\n        # 2. remove model points outside observation track\n        n_points = len(df)\n        keep_x = np.abs((df.x_mod - df.x_obs)) < spatial_tolerance\n        keep_y = np.abs((df.y_mod - df.y_obs)) < spatial_tolerance\n        df = df[keep_x & keep_y]\n        if n_points_removed := n_points - len(df):\n            warnings.warn(\n                f\"Removed {n_points_removed} model points outside observation track (spatial_tolerance={spatial_tolerance})\"\n            )\n        return mri.data.sel(time=df.index)\n
"},{"location":"api/model/track/#modelskill.TrackModelResult.gtype","title":"gtype property","text":"
gtype\n

Geometry type

"},{"location":"api/model/track/#modelskill.TrackModelResult.n_points","title":"n_points property","text":"
n_points\n

Number of data points

"},{"location":"api/model/track/#modelskill.TrackModelResult.name","title":"name property writable","text":"
name\n

Name of time series (value item name)

"},{"location":"api/model/track/#modelskill.TrackModelResult.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerPlotter

Examples:

>>> obj.plot.timeseries()\n>>> obj.plot.hist()\n
"},{"location":"api/model/track/#modelskill.TrackModelResult.quantity","title":"quantity property writable","text":"
quantity\n

Quantity of time series

"},{"location":"api/model/track/#modelskill.TrackModelResult.time","title":"time property","text":"
time\n

Time index

"},{"location":"api/model/track/#modelskill.TrackModelResult.values","title":"values property","text":"
values\n

Values as numpy array

"},{"location":"api/model/track/#modelskill.TrackModelResult.x","title":"x property writable","text":"
x\n

x-coordinate

"},{"location":"api/model/track/#modelskill.TrackModelResult.y","title":"y property writable","text":"
y\n

y-coordinate

"},{"location":"api/model/track/#modelskill.TrackModelResult.equals","title":"equals","text":"
equals(other)\n

Check if two TimeSeries are equal

Source code in modelskill/timeseries/_timeseries.py
def equals(self, other: TimeSeries) -> bool:\n    \"\"\"Check if two TimeSeries are equal\"\"\"\n    return self.data.equals(other.data)\n
"},{"location":"api/model/track/#modelskill.TrackModelResult.sel","title":"sel","text":"
sel(**kwargs)\n

Select data by label

Source code in modelskill/timeseries/_timeseries.py
def sel(self: T, **kwargs: Any) -> T:\n    \"\"\"Select data by label\"\"\"\n    return self.__class__(self.data.sel(**kwargs))\n
"},{"location":"api/model/track/#modelskill.TrackModelResult.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/timeseries/_timeseries.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert matched data to pandas DataFrame\n\n    Include x, y coordinates only if gtype=track\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    if self.gtype == str(GeometryType.POINT):\n        # we remove the scalar coordinate variables as they\n        # will otherwise be columns in the dataframe\n        return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n    elif self.gtype == str(GeometryType.TRACK):\n        df = self.data.drop_vars([\"z\"]).to_dataframe()\n        # make sure that x, y cols are first\n        cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n        return df[cols]\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n
"},{"location":"api/model/track/#modelskill.TrackModelResult.trim","title":"trim","text":"
trim(start_time=None, end_time=None, buffer='1s')\n

Trim observation data to a given time interval

Parameters:

Name Type Description Default start_time Timestamp

start time

None end_time Timestamp

end time

None buffer str

buffer time around start and end time, by default \"1s\"

'1s' Source code in modelskill/timeseries/_timeseries.py
def trim(\n    self: T,\n    start_time: Optional[pd.Timestamp] = None,\n    end_time: Optional[pd.Timestamp] = None,\n    buffer: str = \"1s\",\n) -> T:\n    \"\"\"Trim observation data to a given time interval\n\n    Parameters\n    ----------\n    start_time : pd.Timestamp\n        start time\n    end_time : pd.Timestamp\n        end time\n    buffer : str, optional\n        buffer time around start and end time, by default \"1s\"\n    \"\"\"\n    # Expand time interval with buffer\n    start_time = pd.Timestamp(start_time) - pd.Timedelta(buffer)\n    end_time = pd.Timestamp(end_time) + pd.Timedelta(buffer)\n\n    data = self.data.sel(time=slice(start_time, end_time))\n    if len(data.time) == 0:\n        raise ValueError(\n            f\"No data left after trimming to {start_time} - {end_time}\"\n        )\n    return self.__class__(data)\n
"},{"location":"api/model/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","title":"modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","text":"

Bases: TimeSeriesPlotter

Source code in modelskill/timeseries/_plotter.py
class MatplotlibTimeSeriesPlotter(TimeSeriesPlotter):\n    def __init__(self, ts) -> None:\n        self._ts = ts\n\n    def __call__(self, **kwargs):\n        # default to timeseries plot\n        self.timeseries(**kwargs)\n\n    def timeseries(\n        self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n    ):\n        \"\"\"Plot timeseries\n\n        Wraps pandas.DataFrame plot() method.\n\n        Parameters\n        ----------\n        title : str, optional\n            plot title, default: [name]\n        color : str, optional\n            plot color, by default '#d62728'\n        marker : str, optional\n            plot marker, by default '.'\n        linestyle : str, optional\n            line style, by default None\n        **kwargs\n            other keyword arguments to df.plot()\n        \"\"\"\n        kwargs[\"color\"] = self._ts._color if color is None else color\n        ax = self._ts._values_as_series.plot(\n            marker=marker, linestyle=linestyle, **kwargs\n        )\n\n        title = self._ts.name if title is None else title\n        ax.set_title(title)\n\n        ax.set_ylabel(str(self._ts.quantity))\n        return ax\n\n    def hist(self, bins=100, title=None, color=None, **kwargs):\n        \"\"\"Plot histogram of timeseries values\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            specification of bins, by default 100\n        title : str, optional\n            plot title, default: observation name\n        color : str, optional\n            plot color, by default \"#d62728\"\n        **kwargs\n            other keyword arguments to df.hist()\n\n        Returns\n        -------\n        matplotlib axes\n        \"\"\"\n        title = self._ts.name if title is None else title\n\n        kwargs[\"color\"] = self._ts._color if color is None else color\n\n        ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n        ax.set_title(title)\n        ax.set_xlabel(str(self._ts.quantity))\n        return ax\n
"},{"location":"api/model/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.hist","title":"hist","text":"
hist(bins=100, title=None, color=None, **kwargs)\n

Plot histogram of timeseries values

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

specification of bins, by default 100

100 title str

plot title, default: observation name

None color str

plot color, by default \"#d62728\"

None **kwargs

other keyword arguments to df.hist()

{}

Returns:

Type Description matplotlib axes Source code in modelskill/timeseries/_plotter.py
def hist(self, bins=100, title=None, color=None, **kwargs):\n    \"\"\"Plot histogram of timeseries values\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        specification of bins, by default 100\n    title : str, optional\n        plot title, default: observation name\n    color : str, optional\n        plot color, by default \"#d62728\"\n    **kwargs\n        other keyword arguments to df.hist()\n\n    Returns\n    -------\n    matplotlib axes\n    \"\"\"\n    title = self._ts.name if title is None else title\n\n    kwargs[\"color\"] = self._ts._color if color is None else color\n\n    ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n    ax.set_title(title)\n    ax.set_xlabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/model/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.timeseries","title":"timeseries","text":"
timeseries(title=None, color=None, marker='.', linestyle='None', **kwargs)\n

Plot timeseries

Wraps pandas.DataFrame plot() method.

Parameters:

Name Type Description Default title str

plot title, default: [name]

None color str

plot color, by default '#d62728'

None marker str

plot marker, by default '.'

'.' linestyle str

line style, by default None

'None' **kwargs

other keyword arguments to df.plot()

{} Source code in modelskill/timeseries/_plotter.py
def timeseries(\n    self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n):\n    \"\"\"Plot timeseries\n\n    Wraps pandas.DataFrame plot() method.\n\n    Parameters\n    ----------\n    title : str, optional\n        plot title, default: [name]\n    color : str, optional\n        plot color, by default '#d62728'\n    marker : str, optional\n        plot marker, by default '.'\n    linestyle : str, optional\n        line style, by default None\n    **kwargs\n        other keyword arguments to df.plot()\n    \"\"\"\n    kwargs[\"color\"] = self._ts._color if color is None else color\n    ax = self._ts._values_as_series.plot(\n        marker=marker, linestyle=linestyle, **kwargs\n    )\n\n    title = self._ts.name if title is None else title\n    ax.set_title(title)\n\n    ax.set_ylabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/observation/","title":"Observations","text":"

ModelSkill supports two types of observations:

  • PointObservation - a point timeseries from a dfs0/nc file or a DataFrame
  • TrackObservation - a track (moving point) timeseries from a dfs0/nc file or a DataFrame

An observation can be created by explicitly invoking one of the above classes or using the observation() function which will return the appropriate type based on the input data (if possible).

"},{"location":"api/observation/observation/","title":"observation()","text":""},{"location":"api/observation/observation/#modelskill.observation","title":"modelskill.observation","text":"
observation(data, *, gtype=None, **kwargs)\n

A factory function for creating an appropriate observation object based on the data and args.

If 'x' or 'y' is given, a PointObservation is created. If 'x_item' or 'y_item' is given, a TrackObservation is created.

Parameters:

Name Type Description Default data DataInputType

The data to be used for creating the Observation object.

required gtype Optional[Literal['point', 'track']]

The geometry type of the data. If not specified, it will be guessed from the data.

None **kwargs

Additional keyword arguments to be passed to the Observation constructor.

{}

Examples:

>>> import modelskill as ms\n>>> o_pt = ms.observation(df, item=0, x=366844, y=6154291, name=\"Klagshamn\")\n>>> o_tr = ms.observation(\"lon_after_lat.dfs0\", item=\"wl\", x_item=1, y_item=0)\n
Source code in modelskill/obs.py
def observation(\n    data: DataInputType,\n    *,\n    gtype: Optional[Literal[\"point\", \"track\"]] = None,\n    **kwargs,\n):\n    \"\"\"A factory function for creating an appropriate observation object\n    based on the data and args.\n\n    If 'x' or 'y' is given, a PointObservation is created.\n    If 'x_item' or 'y_item' is given, a TrackObservation is created.\n\n    Parameters\n    ----------\n    data : DataInputType\n        The data to be used for creating the Observation object.\n    gtype : Optional[Literal[\"point\", \"track\"]]\n        The geometry type of the data. If not specified, it will be guessed from the data.\n    **kwargs\n        Additional keyword arguments to be passed to the Observation constructor.\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> o_pt = ms.observation(df, item=0, x=366844, y=6154291, name=\"Klagshamn\")\n    >>> o_tr = ms.observation(\"lon_after_lat.dfs0\", item=\"wl\", x_item=1, y_item=0)\n    \"\"\"\n    if gtype is None:\n        geometry = _guess_gtype(**kwargs)\n    else:\n        geometry = GeometryType.from_string(gtype)\n\n    return _obs_class_lookup[geometry](\n        data=data,\n        **kwargs,\n    )\n
"},{"location":"api/observation/point/","title":"PointObservation","text":""},{"location":"api/observation/point/#modelskill.PointObservation","title":"modelskill.PointObservation","text":"

Bases: Observation

Class for observations of fixed locations

Create a PointObservation from a dfs0 file or a pd.DataFrame.

Parameters:

Name Type Description Default data (str, Path, Dataset, DataArray, DataFrame, Series, Dataset or DataArray)

filename (.dfs0 or .nc) or object with the data

required item (int, str)

index or name of the wanted item/column, by default None if data contains more than one item, item must be given

None x float

x-coordinate of the observation point, inferred from data if not given, else None

None y float

y-coordinate of the observation point, inferred from data if not given, else None

None z float

z-coordinate of the observation point, inferred from data if not given, else None

None name str

user-defined name for easy identification in plots etc, by default file basename

None quantity Quantity

The quantity of the observation, for validation with model results For MIKE dfs files this is inferred from the EUM information

None aux_items list

list of names or indices of auxiliary items, by default None

None attrs dict

additional attributes to be added to the data, by default None

None weight float

weighting factor for skill scores, by default 1.0

1.0

Examples:

>>> import modelskill as ms\n>>> o1 = ms.PointObservation(\"klagshamn.dfs0\", item=0, x=366844, y=6154291, name=\"Klagshamn\")\n>>> o2 = ms.PointObservation(\"klagshamn.dfs0\", item=\"Water Level\", x=366844, y=6154291)\n>>> o3 = ms.PointObservation(df, item=0, x=366844, y=6154291, name=\"Klagshamn\")\n>>> o4 = ms.PointObservation(df[\"Water Level\"], x=366844, y=6154291)\n
Source code in modelskill/obs.py
class PointObservation(Observation):\n    \"\"\"Class for observations of fixed locations\n\n    Create a PointObservation from a dfs0 file or a pd.DataFrame.\n\n    Parameters\n    ----------\n    data : str, Path, mikeio.Dataset, mikeio.DataArray, pd.DataFrame, pd.Series, xr.Dataset or xr.DataArray\n        filename (.dfs0 or .nc) or object with the data\n    item : (int, str), optional\n        index or name of the wanted item/column, by default None\n        if data contains more than one item, item must be given\n    x : float, optional\n        x-coordinate of the observation point, inferred from data if not given, else None\n    y : float, optional\n        y-coordinate of the observation point, inferred from data if not given, else None\n    z : float, optional\n        z-coordinate of the observation point, inferred from data if not given, else None\n    name : str, optional\n        user-defined name for easy identification in plots etc, by default file basename\n    quantity : Quantity, optional\n        The quantity of the observation, for validation with model results\n        For MIKE dfs files this is inferred from the EUM information\n    aux_items : list, optional\n        list of names or indices of auxiliary items, by default None\n    attrs : dict, optional\n        additional attributes to be added to the data, by default None\n    weight : float, optional\n        weighting factor for skill scores, by default 1.0\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> o1 = ms.PointObservation(\"klagshamn.dfs0\", item=0, x=366844, y=6154291, name=\"Klagshamn\")\n    >>> o2 = ms.PointObservation(\"klagshamn.dfs0\", item=\"Water Level\", x=366844, y=6154291)\n    >>> o3 = ms.PointObservation(df, item=0, x=366844, y=6154291, name=\"Klagshamn\")\n    >>> o4 = ms.PointObservation(df[\"Water Level\"], x=366844, y=6154291)\n    \"\"\"\n\n    def __init__(\n        self,\n        data: PointType,\n        *,\n        item: Optional[int | str] = None,\n        x: Optional[float] = None,\n        y: Optional[float] = None,\n        z: Optional[float] = None,\n        name: Optional[str] = None,\n        weight: float = 1.0,\n        quantity: Optional[Quantity] = None,\n        aux_items: Optional[list[int | str]] = None,\n        attrs: Optional[dict] = None,\n    ) -> None:\n        if not self._is_input_validated(data):\n            data = _parse_point_input(\n                data,\n                name=name,\n                item=item,\n                quantity=quantity,\n                aux_items=aux_items,\n                x=x,\n                y=y,\n                z=z,\n            )\n\n        assert isinstance(data, xr.Dataset)\n        super().__init__(data=data, weight=weight, attrs=attrs)\n\n    @property\n    def z(self):\n        \"\"\"z-coordinate of observation point\"\"\"\n        return self._coordinate_values(\"z\")\n\n    @z.setter\n    def z(self, value):\n        self.data[\"z\"] = value\n
"},{"location":"api/observation/point/#modelskill.PointObservation.attrs","title":"attrs property writable","text":"
attrs\n

Attributes of the observation

"},{"location":"api/observation/point/#modelskill.PointObservation.gtype","title":"gtype property","text":"
gtype\n

Geometry type

"},{"location":"api/observation/point/#modelskill.PointObservation.n_points","title":"n_points property","text":"
n_points\n

Number of data points

"},{"location":"api/observation/point/#modelskill.PointObservation.name","title":"name property writable","text":"
name\n

Name of time series (value item name)

"},{"location":"api/observation/point/#modelskill.PointObservation.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerPlotter

Examples:

>>> obj.plot.timeseries()\n>>> obj.plot.hist()\n
"},{"location":"api/observation/point/#modelskill.PointObservation.quantity","title":"quantity property writable","text":"
quantity\n

Quantity of time series

"},{"location":"api/observation/point/#modelskill.PointObservation.time","title":"time property","text":"
time\n

Time index

"},{"location":"api/observation/point/#modelskill.PointObservation.values","title":"values property","text":"
values\n

Values as numpy array

"},{"location":"api/observation/point/#modelskill.PointObservation.weight","title":"weight property writable","text":"
weight\n

Weighting factor for skill scores

"},{"location":"api/observation/point/#modelskill.PointObservation.x","title":"x property writable","text":"
x\n

x-coordinate

"},{"location":"api/observation/point/#modelskill.PointObservation.y","title":"y property writable","text":"
y\n

y-coordinate

"},{"location":"api/observation/point/#modelskill.PointObservation.z","title":"z property writable","text":"
z\n

z-coordinate of observation point

"},{"location":"api/observation/point/#modelskill.PointObservation.equals","title":"equals","text":"
equals(other)\n

Check if two TimeSeries are equal

Source code in modelskill/timeseries/_timeseries.py
def equals(self, other: TimeSeries) -> bool:\n    \"\"\"Check if two TimeSeries are equal\"\"\"\n    return self.data.equals(other.data)\n
"},{"location":"api/observation/point/#modelskill.PointObservation.sel","title":"sel","text":"
sel(**kwargs)\n

Select data by label

Source code in modelskill/timeseries/_timeseries.py
def sel(self: T, **kwargs: Any) -> T:\n    \"\"\"Select data by label\"\"\"\n    return self.__class__(self.data.sel(**kwargs))\n
"},{"location":"api/observation/point/#modelskill.PointObservation.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/timeseries/_timeseries.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert matched data to pandas DataFrame\n\n    Include x, y coordinates only if gtype=track\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    if self.gtype == str(GeometryType.POINT):\n        # we remove the scalar coordinate variables as they\n        # will otherwise be columns in the dataframe\n        return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n    elif self.gtype == str(GeometryType.TRACK):\n        df = self.data.drop_vars([\"z\"]).to_dataframe()\n        # make sure that x, y cols are first\n        cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n        return df[cols]\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n
"},{"location":"api/observation/point/#modelskill.PointObservation.trim","title":"trim","text":"
trim(start_time=None, end_time=None, buffer='1s')\n

Trim observation data to a given time interval

Parameters:

Name Type Description Default start_time Timestamp

start time

None end_time Timestamp

end time

None buffer str

buffer time around start and end time, by default \"1s\"

'1s' Source code in modelskill/timeseries/_timeseries.py
def trim(\n    self: T,\n    start_time: Optional[pd.Timestamp] = None,\n    end_time: Optional[pd.Timestamp] = None,\n    buffer: str = \"1s\",\n) -> T:\n    \"\"\"Trim observation data to a given time interval\n\n    Parameters\n    ----------\n    start_time : pd.Timestamp\n        start time\n    end_time : pd.Timestamp\n        end time\n    buffer : str, optional\n        buffer time around start and end time, by default \"1s\"\n    \"\"\"\n    # Expand time interval with buffer\n    start_time = pd.Timestamp(start_time) - pd.Timedelta(buffer)\n    end_time = pd.Timestamp(end_time) + pd.Timedelta(buffer)\n\n    data = self.data.sel(time=slice(start_time, end_time))\n    if len(data.time) == 0:\n        raise ValueError(\n            f\"No data left after trimming to {start_time} - {end_time}\"\n        )\n    return self.__class__(data)\n
"},{"location":"api/observation/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","title":"modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","text":"

Bases: TimeSeriesPlotter

Source code in modelskill/timeseries/_plotter.py
class MatplotlibTimeSeriesPlotter(TimeSeriesPlotter):\n    def __init__(self, ts) -> None:\n        self._ts = ts\n\n    def __call__(self, **kwargs):\n        # default to timeseries plot\n        self.timeseries(**kwargs)\n\n    def timeseries(\n        self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n    ):\n        \"\"\"Plot timeseries\n\n        Wraps pandas.DataFrame plot() method.\n\n        Parameters\n        ----------\n        title : str, optional\n            plot title, default: [name]\n        color : str, optional\n            plot color, by default '#d62728'\n        marker : str, optional\n            plot marker, by default '.'\n        linestyle : str, optional\n            line style, by default None\n        **kwargs\n            other keyword arguments to df.plot()\n        \"\"\"\n        kwargs[\"color\"] = self._ts._color if color is None else color\n        ax = self._ts._values_as_series.plot(\n            marker=marker, linestyle=linestyle, **kwargs\n        )\n\n        title = self._ts.name if title is None else title\n        ax.set_title(title)\n\n        ax.set_ylabel(str(self._ts.quantity))\n        return ax\n\n    def hist(self, bins=100, title=None, color=None, **kwargs):\n        \"\"\"Plot histogram of timeseries values\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            specification of bins, by default 100\n        title : str, optional\n            plot title, default: observation name\n        color : str, optional\n            plot color, by default \"#d62728\"\n        **kwargs\n            other keyword arguments to df.hist()\n\n        Returns\n        -------\n        matplotlib axes\n        \"\"\"\n        title = self._ts.name if title is None else title\n\n        kwargs[\"color\"] = self._ts._color if color is None else color\n\n        ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n        ax.set_title(title)\n        ax.set_xlabel(str(self._ts.quantity))\n        return ax\n
"},{"location":"api/observation/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.hist","title":"hist","text":"
hist(bins=100, title=None, color=None, **kwargs)\n

Plot histogram of timeseries values

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

specification of bins, by default 100

100 title str

plot title, default: observation name

None color str

plot color, by default \"#d62728\"

None **kwargs

other keyword arguments to df.hist()

{}

Returns:

Type Description matplotlib axes Source code in modelskill/timeseries/_plotter.py
def hist(self, bins=100, title=None, color=None, **kwargs):\n    \"\"\"Plot histogram of timeseries values\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        specification of bins, by default 100\n    title : str, optional\n        plot title, default: observation name\n    color : str, optional\n        plot color, by default \"#d62728\"\n    **kwargs\n        other keyword arguments to df.hist()\n\n    Returns\n    -------\n    matplotlib axes\n    \"\"\"\n    title = self._ts.name if title is None else title\n\n    kwargs[\"color\"] = self._ts._color if color is None else color\n\n    ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n    ax.set_title(title)\n    ax.set_xlabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/observation/point/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.timeseries","title":"timeseries","text":"
timeseries(title=None, color=None, marker='.', linestyle='None', **kwargs)\n

Plot timeseries

Wraps pandas.DataFrame plot() method.

Parameters:

Name Type Description Default title str

plot title, default: [name]

None color str

plot color, by default '#d62728'

None marker str

plot marker, by default '.'

'.' linestyle str

line style, by default None

'None' **kwargs

other keyword arguments to df.plot()

{} Source code in modelskill/timeseries/_plotter.py
def timeseries(\n    self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n):\n    \"\"\"Plot timeseries\n\n    Wraps pandas.DataFrame plot() method.\n\n    Parameters\n    ----------\n    title : str, optional\n        plot title, default: [name]\n    color : str, optional\n        plot color, by default '#d62728'\n    marker : str, optional\n        plot marker, by default '.'\n    linestyle : str, optional\n        line style, by default None\n    **kwargs\n        other keyword arguments to df.plot()\n    \"\"\"\n    kwargs[\"color\"] = self._ts._color if color is None else color\n    ax = self._ts._values_as_series.plot(\n        marker=marker, linestyle=linestyle, **kwargs\n    )\n\n    title = self._ts.name if title is None else title\n    ax.set_title(title)\n\n    ax.set_ylabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/observation/track/","title":"TrackObservation","text":""},{"location":"api/observation/track/#modelskill.TrackObservation","title":"modelskill.TrackObservation","text":"

Bases: Observation

Class for observation with locations moving in space, e.g. satellite altimetry

The data needs in addition to the datetime of each single observation point also, x and y coordinates.

Create TrackObservation from dfs0 or DataFrame

Parameters:

Name Type Description Default data (str, Path, Dataset, DataFrame, Dataset)

path to dfs0 file or object with track data

required item (str, int)

item name or index of values, by default None if data contains more than one item, item must be given

None name str

user-defined name for easy identification in plots etc, by default file basename

None x_item (str, int)

item name or index of x-coordinate, by default 0

0 y_item (str, int)

item name or index of y-coordinate, by default 1

1 keep_duplicates (str, bool)

strategy for handling duplicate timestamps (xarray.Dataset.drop_duplicates): \"first\" to keep first occurrence, \"last\" to keep last occurrence, False to drop all duplicates, \"offset\" to add milliseconds to consecutive duplicates, by default \"first\"

'first' quantity Quantity

The quantity of the observation, for validation with model results For MIKE dfs files this is inferred from the EUM information

None aux_items list

list of names or indices of auxiliary items, by default None

None attrs dict

additional attributes to be added to the data, by default None

None weight float

weighting factor for skill scores, by default 1.0

1.0

Examples:

>>> import modelskill as ms\n>>> o1 = ms.TrackObservation(\"track.dfs0\", item=2, name=\"c2\")\n
>>> o1 = ms.TrackObservation(\"track.dfs0\", item=\"wind_speed\", name=\"c2\")\n
>>> o1 = ms.TrackObservation(\"lon_after_lat.dfs0\", item=\"wl\", x_item=1, y_item=0)\n
>>> o1 = ms.TrackObservation(\"track_wl.dfs0\", item=\"wl\", x_item=\"lon\", y_item=\"lat\")\n
>>> df = pd.DataFrame(\n...         {\n...             \"t\": pd.date_range(\"2010-01-01\", freq=\"10s\", periods=n),\n...             \"x\": np.linspace(0, 10, n),\n...             \"y\": np.linspace(45000, 45100, n),\n...             \"swh\": [0.1, 0.3, 0.4, 0.5, 0.3],\n...         }\n... )\n>>> df = df.set_index(\"t\")\n>>> df\n                    x        y  swh\nt\n2010-01-01 00:00:00   0.0  45000.0  0.1\n2010-01-01 00:00:10   2.5  45025.0  0.3\n2010-01-01 00:00:20   5.0  45050.0  0.4\n2010-01-01 00:00:30   7.5  45075.0  0.5\n2010-01-01 00:00:40  10.0  45100.0  0.3\n>>> t1 = TrackObservation(df, name=\"fake\")\n>>> t1.n_points\n5\n>>> t1.values\narray([0.1, 0.3, 0.4, 0.5, 0.3])\n>>> t1.time\nDatetimeIndex(['2010-01-01 00:00:00', '2010-01-01 00:00:10',\n           '2010-01-01 00:00:20', '2010-01-01 00:00:30',\n           '2010-01-01 00:00:40'],\n          dtype='datetime64[ns]', name='t', freq=None)\n>>> t1.x\narray([ 0. ,  2.5,  5. ,  7.5, 10. ])\n>>> t1.y\narray([45000., 45025., 45050., 45075., 45100.])\n
Source code in modelskill/obs.py
class TrackObservation(Observation):\n    \"\"\"Class for observation with locations moving in space, e.g. satellite altimetry\n\n    The data needs in addition to the datetime of each single observation point also, x and y coordinates.\n\n    Create TrackObservation from dfs0 or DataFrame\n\n    Parameters\n    ----------\n    data : (str, Path, mikeio.Dataset, pd.DataFrame, xr.Dataset)\n        path to dfs0 file or object with track data\n    item : (str, int), optional\n        item name or index of values, by default None\n        if data contains more than one item, item must be given\n    name : str, optional\n        user-defined name for easy identification in plots etc, by default file basename\n    x_item : (str, int), optional\n        item name or index of x-coordinate, by default 0\n    y_item : (str, int), optional\n        item name or index of y-coordinate, by default 1\n    keep_duplicates : (str, bool), optional\n        strategy for handling duplicate timestamps (xarray.Dataset.drop_duplicates):\n        \"first\" to keep first occurrence, \"last\" to keep last occurrence,\n        False to drop all duplicates, \"offset\" to add milliseconds to\n        consecutive duplicates, by default \"first\"\n    quantity : Quantity, optional\n        The quantity of the observation, for validation with model results\n        For MIKE dfs files this is inferred from the EUM information\n    aux_items : list, optional\n        list of names or indices of auxiliary items, by default None\n    attrs : dict, optional\n        additional attributes to be added to the data, by default None\n    weight : float, optional\n        weighting factor for skill scores, by default 1.0\n\n    Examples\n    --------\n    >>> import modelskill as ms\n    >>> o1 = ms.TrackObservation(\"track.dfs0\", item=2, name=\"c2\")\n\n    >>> o1 = ms.TrackObservation(\"track.dfs0\", item=\"wind_speed\", name=\"c2\")\n\n    >>> o1 = ms.TrackObservation(\"lon_after_lat.dfs0\", item=\"wl\", x_item=1, y_item=0)\n\n    >>> o1 = ms.TrackObservation(\"track_wl.dfs0\", item=\"wl\", x_item=\"lon\", y_item=\"lat\")\n\n    >>> df = pd.DataFrame(\n    ...         {\n    ...             \"t\": pd.date_range(\"2010-01-01\", freq=\"10s\", periods=n),\n    ...             \"x\": np.linspace(0, 10, n),\n    ...             \"y\": np.linspace(45000, 45100, n),\n    ...             \"swh\": [0.1, 0.3, 0.4, 0.5, 0.3],\n    ...         }\n    ... )\n    >>> df = df.set_index(\"t\")\n    >>> df\n                        x        y  swh\n    t\n    2010-01-01 00:00:00   0.0  45000.0  0.1\n    2010-01-01 00:00:10   2.5  45025.0  0.3\n    2010-01-01 00:00:20   5.0  45050.0  0.4\n    2010-01-01 00:00:30   7.5  45075.0  0.5\n    2010-01-01 00:00:40  10.0  45100.0  0.3\n    >>> t1 = TrackObservation(df, name=\"fake\")\n    >>> t1.n_points\n    5\n    >>> t1.values\n    array([0.1, 0.3, 0.4, 0.5, 0.3])\n    >>> t1.time\n    DatetimeIndex(['2010-01-01 00:00:00', '2010-01-01 00:00:10',\n               '2010-01-01 00:00:20', '2010-01-01 00:00:30',\n               '2010-01-01 00:00:40'],\n              dtype='datetime64[ns]', name='t', freq=None)\n    >>> t1.x\n    array([ 0. ,  2.5,  5. ,  7.5, 10. ])\n    >>> t1.y\n    array([45000., 45025., 45050., 45075., 45100.])\n\n    \"\"\"\n\n    def __init__(\n        self,\n        data: TrackType,\n        *,\n        item: Optional[int | str] = None,\n        name: Optional[str] = None,\n        weight: float = 1.0,\n        x_item: Optional[int | str] = 0,\n        y_item: Optional[int | str] = 1,\n        keep_duplicates: bool | str = \"first\",\n        offset_duplicates: float = 0.001,\n        quantity: Optional[Quantity] = None,\n        aux_items: Optional[list[int | str]] = None,\n        attrs: Optional[dict] = None,\n    ) -> None:\n        if not self._is_input_validated(data):\n            if offset_duplicates != 0.001:\n                warnings.warn(\n                    \"The 'offset_duplicates' argument is deprecated, use 'keep_duplicates' argument.\",\n                    FutureWarning,\n                )\n            data = _parse_track_input(\n                data=data,\n                name=name,\n                item=item,\n                quantity=quantity,\n                x_item=x_item,\n                y_item=y_item,\n                keep_duplicates=keep_duplicates,\n                offset_duplicates=offset_duplicates,\n                aux_items=aux_items,\n            )\n        assert isinstance(data, xr.Dataset)\n        super().__init__(data=data, weight=weight, attrs=attrs)\n
"},{"location":"api/observation/track/#modelskill.TrackObservation.attrs","title":"attrs property writable","text":"
attrs\n

Attributes of the observation

"},{"location":"api/observation/track/#modelskill.TrackObservation.gtype","title":"gtype property","text":"
gtype\n

Geometry type

"},{"location":"api/observation/track/#modelskill.TrackObservation.n_points","title":"n_points property","text":"
n_points\n

Number of data points

"},{"location":"api/observation/track/#modelskill.TrackObservation.name","title":"name property writable","text":"
name\n

Name of time series (value item name)

"},{"location":"api/observation/track/#modelskill.TrackObservation.plot","title":"plot instance-attribute","text":"
plot = plotter(self)\n

Plot using the ComparerPlotter

Examples:

>>> obj.plot.timeseries()\n>>> obj.plot.hist()\n
"},{"location":"api/observation/track/#modelskill.TrackObservation.quantity","title":"quantity property writable","text":"
quantity\n

Quantity of time series

"},{"location":"api/observation/track/#modelskill.TrackObservation.time","title":"time property","text":"
time\n

Time index

"},{"location":"api/observation/track/#modelskill.TrackObservation.values","title":"values property","text":"
values\n

Values as numpy array

"},{"location":"api/observation/track/#modelskill.TrackObservation.weight","title":"weight property writable","text":"
weight\n

Weighting factor for skill scores

"},{"location":"api/observation/track/#modelskill.TrackObservation.x","title":"x property writable","text":"
x\n

x-coordinate

"},{"location":"api/observation/track/#modelskill.TrackObservation.y","title":"y property writable","text":"
y\n

y-coordinate

"},{"location":"api/observation/track/#modelskill.TrackObservation.equals","title":"equals","text":"
equals(other)\n

Check if two TimeSeries are equal

Source code in modelskill/timeseries/_timeseries.py
def equals(self, other: TimeSeries) -> bool:\n    \"\"\"Check if two TimeSeries are equal\"\"\"\n    return self.data.equals(other.data)\n
"},{"location":"api/observation/track/#modelskill.TrackObservation.sel","title":"sel","text":"
sel(**kwargs)\n

Select data by label

Source code in modelskill/timeseries/_timeseries.py
def sel(self: T, **kwargs: Any) -> T:\n    \"\"\"Select data by label\"\"\"\n    return self.__class__(self.data.sel(**kwargs))\n
"},{"location":"api/observation/track/#modelskill.TrackObservation.to_dataframe","title":"to_dataframe","text":"
to_dataframe()\n

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type Description DataFrame

data as a pandas DataFrame

Source code in modelskill/timeseries/_timeseries.py
def to_dataframe(self) -> pd.DataFrame:\n    \"\"\"Convert matched data to pandas DataFrame\n\n    Include x, y coordinates only if gtype=track\n\n    Returns\n    -------\n    pd.DataFrame\n        data as a pandas DataFrame\n    \"\"\"\n    if self.gtype == str(GeometryType.POINT):\n        # we remove the scalar coordinate variables as they\n        # will otherwise be columns in the dataframe\n        return self.data.drop_vars([\"x\", \"y\", \"z\"]).to_dataframe()\n    elif self.gtype == str(GeometryType.TRACK):\n        df = self.data.drop_vars([\"z\"]).to_dataframe()\n        # make sure that x, y cols are first\n        cols = [\"x\", \"y\"] + [c for c in df.columns if c not in [\"x\", \"y\"]]\n        return df[cols]\n    else:\n        raise NotImplementedError(f\"Unknown gtype: {self.gtype}\")\n
"},{"location":"api/observation/track/#modelskill.TrackObservation.trim","title":"trim","text":"
trim(start_time=None, end_time=None, buffer='1s')\n

Trim observation data to a given time interval

Parameters:

Name Type Description Default start_time Timestamp

start time

None end_time Timestamp

end time

None buffer str

buffer time around start and end time, by default \"1s\"

'1s' Source code in modelskill/timeseries/_timeseries.py
def trim(\n    self: T,\n    start_time: Optional[pd.Timestamp] = None,\n    end_time: Optional[pd.Timestamp] = None,\n    buffer: str = \"1s\",\n) -> T:\n    \"\"\"Trim observation data to a given time interval\n\n    Parameters\n    ----------\n    start_time : pd.Timestamp\n        start time\n    end_time : pd.Timestamp\n        end time\n    buffer : str, optional\n        buffer time around start and end time, by default \"1s\"\n    \"\"\"\n    # Expand time interval with buffer\n    start_time = pd.Timestamp(start_time) - pd.Timedelta(buffer)\n    end_time = pd.Timestamp(end_time) + pd.Timedelta(buffer)\n\n    data = self.data.sel(time=slice(start_time, end_time))\n    if len(data.time) == 0:\n        raise ValueError(\n            f\"No data left after trimming to {start_time} - {end_time}\"\n        )\n    return self.__class__(data)\n
"},{"location":"api/observation/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","title":"modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter","text":"

Bases: TimeSeriesPlotter

Source code in modelskill/timeseries/_plotter.py
class MatplotlibTimeSeriesPlotter(TimeSeriesPlotter):\n    def __init__(self, ts) -> None:\n        self._ts = ts\n\n    def __call__(self, **kwargs):\n        # default to timeseries plot\n        self.timeseries(**kwargs)\n\n    def timeseries(\n        self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n    ):\n        \"\"\"Plot timeseries\n\n        Wraps pandas.DataFrame plot() method.\n\n        Parameters\n        ----------\n        title : str, optional\n            plot title, default: [name]\n        color : str, optional\n            plot color, by default '#d62728'\n        marker : str, optional\n            plot marker, by default '.'\n        linestyle : str, optional\n            line style, by default None\n        **kwargs\n            other keyword arguments to df.plot()\n        \"\"\"\n        kwargs[\"color\"] = self._ts._color if color is None else color\n        ax = self._ts._values_as_series.plot(\n            marker=marker, linestyle=linestyle, **kwargs\n        )\n\n        title = self._ts.name if title is None else title\n        ax.set_title(title)\n\n        ax.set_ylabel(str(self._ts.quantity))\n        return ax\n\n    def hist(self, bins=100, title=None, color=None, **kwargs):\n        \"\"\"Plot histogram of timeseries values\n\n        Wraps pandas.DataFrame hist() method.\n\n        Parameters\n        ----------\n        bins : int, optional\n            specification of bins, by default 100\n        title : str, optional\n            plot title, default: observation name\n        color : str, optional\n            plot color, by default \"#d62728\"\n        **kwargs\n            other keyword arguments to df.hist()\n\n        Returns\n        -------\n        matplotlib axes\n        \"\"\"\n        title = self._ts.name if title is None else title\n\n        kwargs[\"color\"] = self._ts._color if color is None else color\n\n        ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n        ax.set_title(title)\n        ax.set_xlabel(str(self._ts.quantity))\n        return ax\n
"},{"location":"api/observation/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.hist","title":"hist","text":"
hist(bins=100, title=None, color=None, **kwargs)\n

Plot histogram of timeseries values

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default bins int

specification of bins, by default 100

100 title str

plot title, default: observation name

None color str

plot color, by default \"#d62728\"

None **kwargs

other keyword arguments to df.hist()

{}

Returns:

Type Description matplotlib axes Source code in modelskill/timeseries/_plotter.py
def hist(self, bins=100, title=None, color=None, **kwargs):\n    \"\"\"Plot histogram of timeseries values\n\n    Wraps pandas.DataFrame hist() method.\n\n    Parameters\n    ----------\n    bins : int, optional\n        specification of bins, by default 100\n    title : str, optional\n        plot title, default: observation name\n    color : str, optional\n        plot color, by default \"#d62728\"\n    **kwargs\n        other keyword arguments to df.hist()\n\n    Returns\n    -------\n    matplotlib axes\n    \"\"\"\n    title = self._ts.name if title is None else title\n\n    kwargs[\"color\"] = self._ts._color if color is None else color\n\n    ax = self._ts._values_as_series.hist(bins=bins, **kwargs)\n    ax.set_title(title)\n    ax.set_xlabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"api/observation/track/#modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter.timeseries","title":"timeseries","text":"
timeseries(title=None, color=None, marker='.', linestyle='None', **kwargs)\n

Plot timeseries

Wraps pandas.DataFrame plot() method.

Parameters:

Name Type Description Default title str

plot title, default: [name]

None color str

plot color, by default '#d62728'

None marker str

plot marker, by default '.'

'.' linestyle str

line style, by default None

'None' **kwargs

other keyword arguments to df.plot()

{} Source code in modelskill/timeseries/_plotter.py
def timeseries(\n    self, title=None, color=None, marker=\".\", linestyle=\"None\", **kwargs\n):\n    \"\"\"Plot timeseries\n\n    Wraps pandas.DataFrame plot() method.\n\n    Parameters\n    ----------\n    title : str, optional\n        plot title, default: [name]\n    color : str, optional\n        plot color, by default '#d62728'\n    marker : str, optional\n        plot marker, by default '.'\n    linestyle : str, optional\n        line style, by default None\n    **kwargs\n        other keyword arguments to df.plot()\n    \"\"\"\n    kwargs[\"color\"] = self._ts._color if color is None else color\n    ax = self._ts._values_as_series.plot(\n        marker=marker, linestyle=linestyle, **kwargs\n    )\n\n    title = self._ts.name if title is None else title\n    ax.set_title(title)\n\n    ax.set_ylabel(str(self._ts.quantity))\n    return ax\n
"},{"location":"user-guide/","title":"User Guide","text":"

ModelSkill compares model results with observations. The workflow can be split in two phases:

  1. Matching - making sure that observations and model results are in the same space and time
  2. Analysis - plots and statistics of the matched data

If the observations and model results are already matched (i.e. are stored in the same data source), the from_matched() function can be used to go directly to the analysis phase. If not, the match() function can be used to match the observations and model results in space and time.

"},{"location":"user-guide/data-structures/","title":"Data Structures","text":"

The main data structures in ModelSkill can be grouped into three categories:

  • Primary data (observations and model results)
  • Comparer objects
  • Skill objects

All objects share some common principles:

  • The data container is accesssible via the data attribute.
  • The data container is an xarray object (except for the SkillTable object, which is a pandas object).
  • The main data selection method is sel, which is a wrapper around xarray.Dataset.sel.
  • All plotting are accessible via the plot accessor of the object.
"},{"location":"user-guide/data-structures/#observations-and-model-results","title":"Observations and model results","text":"

The primary data of ModelSkill are the data that needs to be compared: observations and model results. The underlying data structures are very similar and can be grouped according to the spatial dimensionality (gtype) of the data:

  • point: 0D time series data
  • track: 0D time series data at moving locations (trajectories)
  • grid: gridded 2D data
  • dfsu: flexible mesh 2D data

Point and track data are both TimeSeries objects, while grid and dfsu data are both SpatialField objects. TimeSeries objects are ready to be compared whereas data from SpatialField object needs to be extracted first (the extracted object will be of the TimeSeries type).

TimeSeries objects contains its data in an xarray.Dataset with the actual data in the first DataArray and optional auxilliary data in the following DataArrays. The DataArrays have a kind attribute with either observation or model.

"},{"location":"user-guide/data-structures/#comparer-objects","title":"Comparer objects","text":"

Comparer objects are results of a matching procedure (between observations and model results) or constructed directly from already matched data. A comparison of a single observation and one or more model results are stored in a Comparer object. A comparison of multiple observations and one or more model results are stored in a ComparerCollection object which is a collection of Comparer objects.

The matched data in a Comparer is stored in an xarray.Dataset which can be accessed via the data attribute. The Dataset has an attribute gtype which is a string describing the type of data (e.g. point, track). The first DataArray in the Dataset is the observation data, the next DataArrays are model result data and optionally additional DataArrays are auxilliarye data. Each of the DataArrays have a kind attribute with either observation, model or aux.

Both Comparer and ComparerCollection have a plot accessor for plotting the data (e.g. cmp.plot.timeseries() or cmp.plot.scatter()).

"},{"location":"user-guide/data-structures/#skill-objects","title":"Skill objects","text":"

Calling a skill method on a comparer object will return a skill object with skill scores (statistics) from comparing observation and model result data using different metrics (e.g. root mean square error). Two skill objects are currently implemented: SkillTable and SkillGrid. The first is relevant for all ModelSkill users while the latter is relevant for users of the track data (e.g. MetOcean studies using satellite altimetry data).

If c is a comparer object, then the following skill methods are available:

  • c.skill() -> SkillTable
  • c.mean_skill() -> SkillTable
  • c.gridded_skill() -> SkillGrid
"},{"location":"user-guide/data-structures/#skilltable","title":"SkillTable","text":""},{"location":"user-guide/getting-started/","title":"Getting started","text":"

This page describes the typical ModelSkill workflow for comparing model results and observations.

"},{"location":"user-guide/getting-started/#workflow","title":"Workflow","text":"

The typical ModelSkill workflow consists of these four steps:

  1. Define Observations
  2. Define ModelResults
  3. Match observations and ModelResults in space and time
  4. Do analysis, plotting, etc with a Comparer
"},{"location":"user-guide/getting-started/#define-observations","title":"Define Observations","text":"

The first step is to define the measurements to be used for the skill assessment. Two types of observation are available:

  • PointObservation
  • TrackObservation

Let's assume that we have one PointObservation and one TrackObservation (name is used to identify the observation, similar to the name of the model above).

hkna = ms.PointObservation(\"HKNA_Hm0.dfs0\", item=0,\n                            x=4.2420, y=52.6887,\n                            name=\"HKNA\")\nc2 = ms.TrackObservation(\"Alti_c2_Dutch.dfs0\", item=3,\n                          name=\"c2\")\n

In this case both observations are provided as .dfs0 files but pandas dataframes are also supported in case data are stored in another file format.

Both PointObservation and TrackObservation need the path of the data file, the item number (or item name) and a name. A PointObservation further needs to be initialized with it\\'s x-, y-position.

"},{"location":"user-guide/getting-started/#define-modelresults","title":"Define ModelResults","text":"

The result of a simulation is stored in one or more result files, e.g. dfsu, dfs0, nc, csv.

The name is used to identify the model result in the plots and tables.

import modelskill as ms\nmr = ms.DfsuModelResult(\"SW/HKZN_local_2017_DutchCoast.dfsu\", \n                         item=\"Sign. Wave Height\",\n                         name='HKZN_local')\n
"},{"location":"user-guide/getting-started/#match-observations-and-modelresults","title":"Match observations and ModelResults","text":"

This match() method returns a Comparer (a single observation) or a ComparerCollection (multiple observations) for further analysis and plotting.

cc = ms.match([hkna, c2], mr)\n
"},{"location":"user-guide/getting-started/#do-analysis-plotting-etc-with-a-comparer","title":"Do analysis, plotting, etc with a Comparer","text":"

The object returned by the match() method is a Comparer/ComparerCollection. It holds the matched observation and model data and has methods for plotting and skill assessment.

The primary comparer methods are:

  • skill() which returns a SkillTable with the skill scores
  • various plot methods of the comparer objects (e.g. plot.scatter(), plot.timeseries())
  • sel() method for selecting data
"},{"location":"user-guide/getting-started/#save-load-the-comparercollection","title":"Save / load the ComparerCollection","text":"

It can be useful to save the comparer collection for later use. This can be done using the save() method:

cc.save(\"my_comparer_collection.msk\")\n

The comparer collection can be loaded again from disk, using the load() method:

cc = ms.load(\"my_comparer_collection.msk\")\n
"},{"location":"user-guide/getting-started/#filtering","title":"Filtering","text":"

In order to select only a subset of the data for analysis, the comparer has a sel() method which returns a new comparer with the selected data.

This method allow filtering of the data in several ways:

  • on observation by specifying name or index of one or more observations
  • on model (if more than one is compared) by giving name or index
  • temporal using the time (or start and end) arguments
  • spatial using the area argument given as a bounding box or a polygon
"},{"location":"user-guide/matching/","title":"Matching","text":"

Once observations and model results have been defined, the next step is to match them. This is done using the match() function which handles the allignment of the observation and model result data in space and time. Note that if the data is already matched, the from_matched() function can be used to create a Comparer directly from the matched data and the matching described here is not needed.

The observation is considered the truth and the model result data is therefore interpolated to the observation data positions.

The matching process will be different depending on the geometry of observation and model result:

  • Geometries are the same (e.g. both are point time series): only temporal matching is needed
  • Geometries are different (e.g. observation is a point time series and model result is a grid): data is first spatially extracted from the model result and then matched in time.
"},{"location":"user-guide/matching/#temporal-matching","title":"Temporal matching","text":"

Temporal matching is done by interpolating the model result data to the observation data time points; it is carried out after spatial matching when applicable. The interpolation is linear in time and done inside the match() function.

"},{"location":"user-guide/matching/#matching-of-time-series","title":"Matching of time series","text":"

If observation and model result are of the same geometry, the matching is done one observation at a time. Several model results can be matched to the same observation. The result of the matching process is a Comparer object which contains the matched data.

In the most simple cases, one observation to one model result, the match() function can be used directly, without creating Observation and ModelResult objects first:

>>> cmp = ms.match('obs.dfs0', 'model.dfs0', obs_item='obs_WL', mod_item='WL')\n

In all other cases, the observations and model results needs to be defined first.

>>> o = ms.observation('obs.dfs0', item='waterlevel')\n>>> mr1 = ms.model_result('model1.dfs0', item='WL1')\n>>> mr2 = ms.model_result('model2.dfs0', item='WL2')\n>>> cmp = ms.match(o, [mr1, mr2])\n

In most cases, several observations needs to matched with several model results. This can be done by constructing a list of Comparer objects and then combining them into a ComparerCollection:

>>> cmps = []\n>>> for o in observations:\n>>>     mr1 = ...\n>>>     mr2 = ...\n>>>     cmps.append(ms.match(o, [mr1, mr2]))\n>>> cc = ms.ComparerCollection(cmps)\n
"},{"location":"user-guide/matching/#matching-with-dfsu-or-grid-model-result","title":"Matching with dfsu or grid model result","text":"

If the model result is a SpatialField, i.e., either a GridModelResult or a DfsuModelResult, and the observation is of lower dimension (e.g. point), then the model result needs to be extracted before matching can be done. This can be done \"offline\" before using ModelSkill, e.g., using MIKE tools or MIKE IO, or as part of the matching process using ModelSkill. We will here focus on the latter.

In this situation, multiple observations can be matched to the same model result, in which case the match function returns a ComparerCollection instead of a Comparer which is the returned object for single observation matching.

>>> o1 = ms.observation('obs1.dfs0', item='waterlevel')\n>>> o2 = ms.observation('obs2.dfs0', item='waterlevel')\n>>> mr = ms.model_result('model.dfsu', item='WaterLevel')\n>>> cc = ms.match([o1, o2], mr)   # returns a ComparerCollection\n

Matching PointObservation with SpatialField model results consists of two steps:

  1. Extracting data from the model result at the spatial position of the observation, which returns a PointModelResult
  2. Matching the extracted data with the observation data in time

Matching TrackObservation with SpatialField model results is for technical reasons handled in one step, i.e., the data is extracted in both space and time.

The spatial matching method (selection or interpolation) can be specified using the spatial_method argument of the match() function. The default method depends on the type of observation and model result as specified in the sections below.

"},{"location":"user-guide/matching/#extracting-data-from-a-dfsumodelresult","title":"Extracting data from a DfsuModelResult","text":"

Extracting data for a specific point position from the flexible mesh dfsu files can be done in several ways (specified by the spatial_method argument of the match() function):

  • Selection of the \"contained\" element
  • Selection of the \"nearest\" element (often the same as the contained element, but not always)
  • Interpolation with \"inverse_distance\" weighting (IDW) using the five nearest elements (default)

The default (inverse_distance) is not necessarily the best method in all cases. When the extracted position is close to the model boundary, \"contained\" may be a better choice.

>>> cc = ms.match([o1, o2], mr_dfsu, spatial_method='contained')   \n

Note that extraction of track data does not currently support the \"contained\" method.

Note that the extraction of point data from 3D dfsu files is not yet fully supported. It is recommended to extract the data \"offline\" prior to using ModelSkill.

"},{"location":"user-guide/matching/#extracting-data-from-a-gridmodelresult","title":"Extracting data from a GridModelResult","text":"

Extracting data from a GridModelResult is done through xarray's interp() function. The spatial_method argument of the match() function is passed on to the interp() function as the method argument. The default method is \"linear\" which is the recommended method for most cases. Close to land where the grid model result data is often missing, \"nearest\" may be a better choice.

>>> cc = ms.match([o1, o2], mr_netcdf, spatial_method='nearest')   \n
"},{"location":"user-guide/matching/#event-based-matching-and-handling-of-gaps","title":"Event-based matching and handling of gaps","text":"

If the model result data contains gaps either because only events are stored or because of missing data, the max_model_gap argument of the match() function can be used to specify the maximum allowed gap (in seconds) in the model result data. This will avoid interpolating model data over long gaps in the model result data!

"},{"location":"user-guide/matching/#multiple-model-results-with-different-temporal-coverage","title":"Multiple model results with different temporal coverage","text":"

If the model results have different temporal coverage, the match() function will only match the overlapping time period to ensure that the model results are comparable. The Comparer object will contain the matched data for the overlapping period only.

"},{"location":"user-guide/overview/","title":"Overview","text":"

ModelSkill compares model results with observations. The workflow can be split in two phases:

  1. Matching - making sure that observations and model results are in the same space and time
  2. Analysis - plots and statistics of the matched data

If the observations and model results are already matched (i.e. are stored in the same data source), the from_matched() function can be used to go directly to the analysis phase. If not, the match() function can be used to match the observations and model results in space and time.

"},{"location":"user-guide/overview/#matching","title":"Matching","text":"

If the observations and model results are not in the same data source (e.g. dfs0 file), they will need to be defined and then matched in space and time with the match() function. In simple cases, observations and model results can be defined directly in the match() function:

import modelskill as ms\ncmp = ms.match(\"obs.dfs0\", \"model.dfs0\", obs_item=\"obs_WL\", mod_item=\"WL\")\n

But in most cases, the observations and model results will need to be defined separately first.

"},{"location":"user-guide/overview/#define-observations","title":"Define observations","text":"

The observations can be defined as either a PointObservation or a TrackObservation (a moving point).

o1 = ms.PointObservation(\"stn1.dfs0\", item=\"obs_WL\")\no2 = ms.PointObservation(\"stn2.dfs0\", item=\"obs_WL\")\n

The item needs to be specified as either the item number or the item name if the input file contains multiple items. Several other parameters can be specified, such as the name of the observation, the x- and y-position, and the quantity type and unit of the observation.

"},{"location":"user-guide/overview/#define-model-results","title":"Define model results","text":"

A model result will either be a simple point/track like the observations, or spatial field (e.g. 2d dfsu file) from which the model results will be extracted at the observation positions. The following types are available:

  • PointModelResult - a point result from a dfs0/nc file or a DataFrame
  • TrackModelResult - a track result from a dfs0/nc file or a DataFrame
  • GridModelResult - a spatial field from a dfs2/nc file or a Xarray Dataset
  • DfsuModelResult - a spatial field from a dfsu file
mr1 = ms.PointModelResult(\"model.dfs0\", item=\"WL_stn1\")\nmr2 = ms.PointModelResult(\"model.dfs0\", item=\"WL_stn2\")\n
"},{"location":"user-guide/overview/#match-observations-and-model-results","title":"Match observations and model results","text":"

The match() function will interpolate the model results to the time (and space) of the observations and return a collection of Comparer objects that can be used for analysis.

cc1 = ms.match(o1, mr1)\ncc2 = ms.match(o2, mr2)\ncc = cc1 + cc2\n
"},{"location":"user-guide/overview/#analysis","title":"Analysis","text":"

Once the observations and model results are matched, the Comparer object can be used for analysis and plotting.

"},{"location":"user-guide/plotting/","title":"Plotting","text":""},{"location":"user-guide/plotting/#plotting-observations-and-model-results","title":"Plotting observations and model results","text":"

PointObservations and PointModelResults can be plotted using their plot accessor:

>>> o.plot.timeseries()\n>>> mr.plot.timeseries()\n>>> mr.plot.hist()\n

Only the observation time series is shown here:

"},{"location":"user-guide/plotting/#plotting-temporal-coverage","title":"Plotting temporal coverage","text":"

The temporal coverage of observations and model results can be plotted using the temporal_coverage function in the plotting module:

>>> o1 = ms.PointObservation('HKNA.dfs0', item=0, x=4.2420, y=52.6887)\n>>> o2 = ms.PointObservation('EPL.dfs0', item=0, x=3.2760, y=51.9990)\n>>> o3 = ms.TrackObservation(\"Alti_c2.dfs0\", item=3)\n>>> mr = ms.DfsuModelResult('HKZN_local.dfsu', item=0)\n>>> ms.plotting.temporal_coverage(obs=[o1, o2, o3], mod=mr)\n

"},{"location":"user-guide/plotting/#plotting-spatial-overview","title":"Plotting spatial overview","text":"

The spatial coverage of observations and model results can be plotted using the spatial_overview function in the plotting module:

>>> ms.plotting.spatial_overview([o1, o2, o3], mr)\n

"},{"location":"user-guide/plotting/#plotting-compared-data","title":"Plotting compared data","text":"

The plot accessor on a Comparer or ComparerCollection object can be used to plot the compared data:

>>> cmp.plot.timeseries()\n>>> cc.plot.timeseries()\n>>> cc.plot.scatter()\n
"},{"location":"user-guide/plotting/#plotting-taylor-diagrams","title":"Plotting Taylor diagrams","text":"

A Taylor diagram shows how well a model result matches an observation in terms of correlation, standard deviation and root mean square error. The taylor plot can be accessed through the Comparer plot accessor or the ComparerCollection plot accessor:

>>> cc = ms.match([o1, o2, o3], [mr_CMEMS, mr_ERA5, mr_MIKE21SW])\n>>> cc.plot.taylor()\n

The radial distance from the point to the observation point is the standard deviation ratio, the angle is the correlation coefficient and the distance from the observation point to the model point is the root mean square error ratio. The closer the model point is to the observation point, the better the model result matches the observation. The closer the model point is to the origin, the better the model result matches the observation in terms of standard deviation and root mean square error. The closer the model point is to the horizontal axis, the better the model result matches the observation in terms of correlation.

"},{"location":"user-guide/plotting/#plotting-directional-data-eg-wind-or-currents","title":"Plotting directional data (e.g. wind or currents)","text":"

Directional data can be plotted using the wind_rose function in the plotting module. The function takes an array-like structure with speed and direction as columns (from one or two sources) and plots a wind rose:

>>> df = pd.read_csv('wind.csv', index_col=0, parse_dates=True)\n>>> ms.plotting.wind_rose(df)\n

"},{"location":"user-guide/selecting-data/","title":"Selecting/filtering data","text":"

The primary data filtering method of ModelSkill is the sel() method which is accesible on most ModelSkill data structures. The sel() method is a wrapper around xarray.Dataset.sel() and can be used to select data based on time, location and/or variable. The sel() method returns a new data structure of the same type with the selected data.

"},{"location":"user-guide/selecting-data/#timeseries-data","title":"TimeSeries data","text":"

Point and track timeseries data of both observation and model result kinds are stored in TimeSeries objects which uses xarray.Dataset as data container. The sel() method can be used to select data based on time and returns a new TimeSeries object with the selected data.

>>> o = ms.observation('obs.nc', item='waterlevel')\n>>> o_1month = o.sel(time=slice('2018-01-01', '2018-02-01'))\n
"},{"location":"user-guide/selecting-data/#comparer-objects","title":"Comparer objects","text":"

Comparer and ComparerCollection contain matched data from observations and model results. The sel() method can be used to select data based on time, model, quantity or other criteria and returns a new comparer object with the selected data.

>>> cmp = ms.match(o, [m1, m2])\n>>> cmp_1month = cmp.sel(time=slice('2018-01-01', '2018-02-01'))\n>>> cmp_m1 = cmp.sel(model='m1')\n
"},{"location":"user-guide/selecting-data/#skill-objects","title":"Skill objects","text":"

The skill() and mean_skill() methods return a SkillTable object with skill scores from comparing observation and model result data using different metrics (e.g. root mean square error). The data of the SkillTable object is stored in a (MultiIndex) pandas.DataFrame which can be accessed via the data attribute. The sel() method can be used to select specific rows and returns a new SkillTable object with the selected data.

>>> sk = cmp.skill()\n>>> sk_m1 = sk.sel(model='m1')\n
"},{"location":"user-guide/skill/","title":"Skill","text":"

Matched data can be analysed statistically using the skill() function. The function returns a Skill object which contains the statistical results. The Skill object can be printed to the console or saved to a file using the save() function.

```python

"},{"location":"user-guide/terminology/","title":"Terminology","text":"

ModelSkill is a library for assessing the skill of numerical models. It provides tools for comparing model results with observations, plotting the results and calculating validation metrics. This page defines some of the key terms used in the documentation.

"},{"location":"user-guide/terminology/#skill","title":"Skill","text":"

Skill refers to the ability of a numerical model to accurately represent the real-world phenomenon it aims to simulate. It is a measure of how well the model performs in reproducing the observed system. Skill can be assessed using various metrics, such as accuracy, precision, and reliability, depending on the specific goals of the model and the nature of the data. In ModelSkill, skill is also a specific method on Comparer objects that returns a SkillTable with aggregated skill scores per observation and model for a list of selected metrics.

"},{"location":"user-guide/terminology/#validation","title":"Validation","text":"

Validation is the process of assessing the model's performance by comparing its output to real-world observations or data collected from the system being modeled. It helps ensure that the model accurately represents the system it simulates. Validation is typically performed before the model is used for prediction or decision-making.

"},{"location":"user-guide/terminology/#calibration","title":"Calibration","text":"

Calibration is the process of adjusting the model's parameters or settings to improve its performance. It involves fine-tuning the model to better match observed data. Calibration aims to reduce discrepancies between model predictions and actual measurements. At the end of the calibration process, the calibrated model should be validated with independent data.

"},{"location":"user-guide/terminology/#performance","title":"Performance","text":"

Performance is a measure of how well a numerical model operates in reproducing the observed system. It can be assessed using various metrics, such as accuracy, precision, and reliability, depending on the specific goals of the model and the nature of the data. In this context, performance is synonymous with skill.

"},{"location":"user-guide/terminology/#timeseries","title":"Timeseries","text":"

A timeseries is a sequence of data points in time. In ModelSkill, The data can either be from observations or model results. Timeseries can univariate or multivariate; ModelSkill primarily supports univariate timeseries. Multivariate timeseries can be assessed one variable at a time. Timeseries can also have different spatial dimensions, such as point, track, line, or area.

"},{"location":"user-guide/terminology/#observation","title":"Observation","text":"

An observation refers to real-world data or measurements collected from the system you are modeling. Observations serve as a reference for assessing the model's performance. These data points are used to compare with the model's predictions during validation and calibration. Observations are usually based on field measurements or laboratory experiments, but for the purposes of model validation, they can also be derived from other models (e.g. a reference model). ModelSkill supports point and track observation types.

"},{"location":"user-guide/terminology/#measurement","title":"Measurement","text":"

A measurement is called observation in ModelSkill.

"},{"location":"user-guide/terminology/#model-result","title":"Model result","text":"

A model result is the output of any type of numerical model. It is the data generated by the model during a simulation. Model results can be compared with observations to assess the model's performance. In the context of validation, the term \"model result\" is often used interchangeably with \"model output\" or \"model prediction\". ModelSkill supports point, track, dfsu and grid model result types.

"},{"location":"user-guide/terminology/#metric","title":"Metric","text":"

A metric is a quantitative measure (a mathematical expression) used to evaluate the performance of a numerical model. Metrics provide a standardized way to assess the model's accuracy, precision, and other attributes. A metric aggregates the skill of a model into a single number. See list of metrics supported by ModelSkill.

"},{"location":"user-guide/terminology/#score","title":"Score","text":"

A score is a numerical value that summarizes the model's performance based on chosen metrics. Scores can be used to rank or compare different models or model configurations. In the context of validation, the \"skill score\" or \"validation score\" often quantifies the model's overall performance. The score of a model is a single number, calculated as a weighted average for all time-steps, observations and variables. If you want to perform automated calibration, you can use the score as the objective function. In ModelSkill, score is also a specific method on Comparer objects that returns a single number aggregated score using a specific metric.

"},{"location":"user-guide/terminology/#matched-data","title":"Matched data","text":"

In ModelSkill, observations and model results are matched when they refer to the same positions in space and time. If the observations and model results are already matched, the from_matched function can be used to create a Comparer directly. Otherwise, the match function can be used to match the observations and model results in space and time.

"},{"location":"user-guide/terminology/#match","title":"match()","text":"

The function match is used to match a model result with observations. It returns a Comparer object or a ComparerCollection object.

"},{"location":"user-guide/terminology/#comparer","title":"Comparer","text":"

A Comparer is an object that stores the matched observation and model result data for a single observation. It is used to calculate validation metrics and generate plots. A Comparer can be created using the match function.

"},{"location":"user-guide/terminology/#comparercollection","title":"ComparerCollection","text":"

A ComparerCollection is a collection of Comparers. It is used to compare multiple observations with one or more model results. A ComparerCollection can be created using the match function or by passing a list of Comparers to the ComparerCollection constructor.

"},{"location":"user-guide/terminology/#connector","title":"Connector","text":"

In past versions of FMSkill/ModelSkill, the Connector class was used to connect observations and model results. This class has been deprecated and is no longer in use.

"},{"location":"user-guide/terminology/#abbreviations","title":"Abbreviations","text":"Abbreviation Meaning ms ModelSkill o or obs Observation mr or mod Model result cmp Comparer cc ComparerCollection sk SkillTable mtr Metric q Quantity"},{"location":"user-guide/vision/","title":"Vision","text":"

ModelSkill would like to be your modelling companion. It should be indispensable good such that you want to use it every time you do a MIKE simulation.

"},{"location":"user-guide/vision/#objective","title":"Objective","text":"

We want ModelSkill to make it easy to

  • assess the skill of a model by comparing with measurements
  • assess model skill also when result is split on several files (2d, 3d, yearly, ...)
  • compare the skill of different calibration runs
  • compare your model with other models
  • use a wide range of common evaluation metrics
  • create common plots such as time series, scatter and taylor diagrams
  • do aggregations - assess for all observations, geographic areas, monthly, ...
  • do filtering - assess for a subset of observations, geographic areas, ...
  • make fast comparisons (optimized code)

And it should be

  • Difficult to make mistakes by verifying input
  • Trustworthy by having >95% test coverage
  • Easy to install ($ pip install modelskill)
  • Easy to get started by providing many notebook examples and documentation
"},{"location":"user-guide/vision/#scope","title":"Scope","text":"

ModelSkill wants to balance general and specific needs:

  • It should be general enough to cover >90% of MIKE simulations

  • It should be general enough to cover generic modelling irrespective of software.

  • But specific enough to be useful

    • Support dfs files (using mikeio)
    • Handle circular variables such as wave direction
"},{"location":"user-guide/vision/#limitations","title":"Limitations","text":"

ModelSkill does not wish to cover

  • Extreme value analysis
  • Deterministic wave analysis such as crossing analysis
  • Rare alternative file types
  • Rarely used model result types
  • Rare observation types
  • Anything project specific
"},{"location":"user-guide/vision/#future","title":"Future","text":""},{"location":"user-guide/vision/#forecast-skill","title":"Forecast skill","text":"

It should be possible to compare forecasts with observations using forecast lead time as a dimension. Planned 2024.

"},{"location":"user-guide/vision/#better-support-for-3d-data","title":"Better support for 3D data","text":"

Currently 3D data is supported only as point data and only if data has already been extracted from model result files. It should be possible to extract date from 3D files directly. Furthermore, vertical columns data should be supported as an observation type with z as a dimension. Planned 2024.

"},{"location":"user-guide/vision/#web-app","title":"Web app","text":"

Create a web app that wraps this library.

"},{"location":"user-guide/vision/#automatic-reports","title":"Automatic reports","text":"

Both static as markdown, docx, pptx and interactive as html.

"}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml index 4f5c1fd5..9b73b0e1 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,167 +2,167 @@ https://dhi.github.io/modelskill/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/license/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/comparer/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/comparercollection/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/gridded_skill/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/matching/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/metrics/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/plotting/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/quantity/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/settings/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/skill/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/model/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/model/dfsu/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/model/dummy/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/model/grid/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/model/model_result/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/model/point/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/model/track/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/observation/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/observation/observation/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/observation/point/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/api/observation/track/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/user-guide/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/user-guide/data-structures/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/user-guide/getting-started/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/user-guide/matching/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/user-guide/overview/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/user-guide/plotting/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/user-guide/selecting-data/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/user-guide/skill/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/user-guide/terminology/ - 2024-10-25 + 2024-11-13 daily https://dhi.github.io/modelskill/user-guide/vision/ - 2024-10-25 + 2024-11-13 daily \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index b63e4ae86b99b9d41da3cf2071adfa3fcd688d7a..3c45b460e19aa09187a28cacf926511e7f8b8952 100644 GIT binary patch literal 433 zcmV;i0Z#rOiwFn+>@#Nq|8r?{Wo=<_E_iKh0M*#dQiCuM0N}e%(cunh)q^^e-h2W* zJ2Bb9PD6s(O>5uY&~_Zo9-J%(AR%PF3EBM6?!HI2Izl82ep_vtb+rO-f<@nNtM|9p z`l;G=cWsJ}fLFP&r)@PbPcKTHPN#;DVbl{fA@+*QkQ1bCnQGlUSKZwzA9DrQyUWqK zV7j>$%GKBbn;v;cpA7~z1`Cc3=$vZRTx|K4CeF{h`}O@}z1h^8hg{^Awr5CY9U%%~n&2E5&S6qGxQ}Qp*pJd!4Hd4OTwpH0F~5@bjC?MfnWwwCF-!mGBBFdwiZI$eB^l5(j%N|lc{&mS zy_`QHP;x8+JO%VJq#U(56fO;)1o5Z^Ebx|6nQ93=cj+y?|E5f>`-B!`AgyLy6USsY z}gjG%+s?{r_-q+WEk~CO^CfBGvoxRTc%n!PgQrf%Ew&6_3m=C zE|_kvg>p4^z@|qY(r1G~jlqJW13IT#H5XgHrHS+NZoA&z*PC^{eaJ;_d5dCZ(7uO# zgs;wObmvwra;_Zp>-m$+p#e`&uuxA}xXy%e(h;H%rU}l0;T$G~gZqfqg8e8xhGy%i z*$jE8DV+%%(NN*a$pz-}8}ln^&&cP(nR&XK8?*F}E+We3qzI$kQ<4Eq<9HS!ou?xa z(98KF0wu>Hz*9gkL&{N`L*df!Nf3`(zyfb6m8q7{bC=%I`)|tBx=(092GVNQHE~Ra zQyy?-p!@5BFuQeRurkmBHdqU7IOdQ;ILXio*l#Vg1ef0zxdYG9C&XO%=4L@