From ca133840e45a7c26da5feda0432577b358fd1268 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Sat, 24 Jun 2023 13:25:24 +0200 Subject: [PATCH] distplot --- notebooks/wp3/uncertainty_densityplot.ipynb | 320 ++++++++++++++++++++ 1 file changed, 320 insertions(+) create mode 100644 notebooks/wp3/uncertainty_densityplot.ipynb diff --git a/notebooks/wp3/uncertainty_densityplot.ipynb b/notebooks/wp3/uncertainty_densityplot.ipynb new file mode 100644 index 0000000..fd3044a --- /dev/null +++ b/notebooks/wp3/uncertainty_densityplot.ipynb @@ -0,0 +1,320 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "43f58c96", + "metadata": {}, + "source": [ + "# Uncertainty in seasonal forecasts for a single model system" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "c7ce5c2e", + "metadata": {}, + "source": [ + "## Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48246700", + "metadata": {}, + "outputs": [], + "source": [ + "import calendar\n", + "\n", + "import numpy as np\n", + "import plotly.figure_factory as ff\n", + "import xarray as xr\n", + "from c3s_eqc_automatic_quality_control import diagnostics, download, utils" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "5586e956", + "metadata": {}, + "source": [ + "## Define Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a3dfa6f", + "metadata": {}, + "outputs": [], + "source": [ + "# Model\n", + "centre = \"ecmwf\"\n", + "system = \"51\"\n", + "\n", + "# Time\n", + "year_forecast = 2023\n", + "year_start_hindcast = 1993\n", + "year_stop_hindcast = 2016\n", + "month = 6\n", + "\n", + "# Region\n", + "region_name = \"Southern Norway\"\n", + "lat_slice = slice(64, 58)\n", + "lon_slice = slice(4, 14)\n", + "\n", + "# Download parameters\n", + "chunks = {\"year\": 1}\n", + "n_jobs = 1" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "62c65518", + "metadata": {}, + "source": [ + "## Define request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ca06bda", + "metadata": {}, + "outputs": [], + "source": [ + "collection_id = \"seasonal-monthly-single-levels\"\n", + "\n", + "request = {\n", + " \"format\": \"grib\",\n", + " \"originating_centre\": centre,\n", + " \"system\": system,\n", + " \"variable\": \"2m_temperature\",\n", + " \"product_type\": \"monthly_mean\",\n", + " \"leadtime_month\": list(map(str, range(1, 7))),\n", + " \"area\": [89.5, -179.5, -89.5, 179.5],\n", + " \"grid\": \"1/1\",\n", + " \"month\": f\"{month:02d}\",\n", + "}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "51a4fdbf", + "metadata": {}, + "source": [ + "## Functions to cache" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b859c808-ea82-4f63-a6eb-bf58229c2e9d", + "metadata": {}, + "outputs": [], + "source": [ + "def regionalised_mean(ds, lon_slice, lat_slice, weights):\n", + " ds = utils.regionalise(ds, lon_slice=lon_slice, lat_slice=lat_slice)\n", + " ds = diagnostics.spatial_weighted_mean(ds, weights=weights)\n", + " with xr.set_options(keep_attrs=True):\n", + " ds[\"t2m\"] -= 273.15\n", + " ds[\"t2m\"].attrs[\"units\"] = \"°C\"\n", + " return ds" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "90606022", + "metadata": {}, + "source": [ + "## Download and transform" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a939f1e-7e91-4501-9ffc-1eca70253a36", + "metadata": {}, + "outputs": [], + "source": [ + "datasets = {}\n", + "for model, years in {\n", + " \"hindcast\": range(year_start_hindcast, year_stop_hindcast + 1),\n", + " \"forecast\": [year_forecast],\n", + "}.items():\n", + " ds = download.download_and_transform(\n", + " collection_id,\n", + " request | {\"year\": list(map(str, years))},\n", + " chunks=chunks,\n", + " n_jobs=n_jobs,\n", + " transform_func=regionalised_mean,\n", + " transform_func_kwargs={\n", + " \"lon_slice\": lon_slice,\n", + " \"lat_slice\": lat_slice,\n", + " \"weights\": False,\n", + " },\n", + " backend_kwargs={\n", + " \"time_dims\": (\n", + " \"forecastMonth\",\n", + " \"indexing_time\" if centre in [\"ukmo\", \"jma\", \"ncep\"] else \"time\",\n", + " )\n", + " },\n", + " cached_open_mfdataset_kwargs={\n", + " \"combine\": \"nested\",\n", + " \"concat_dim\": \"forecast_reference_time\",\n", + " },\n", + " )\n", + " datasets[model] = ds" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "5ecce3e6", + "metadata": {}, + "source": [ + "## Density plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b2d38a8", + "metadata": {}, + "outputs": [], + "source": [ + "def get_limits(data, xfactor, yfactor):\n", + " ylim = [0, max([max(d.y) for d in data]) * yfactor]\n", + " xlim = [func([func(d.x) for d in data]) for func in (min, max)]\n", + " xshift = abs(xlim[1] - xlim[0]) * xfactor\n", + " xlim = [x + xshift * sign for x, sign in zip(xlim, (-1, 1))]\n", + " return xlim, ylim\n", + "\n", + "\n", + "# Density plot for each lead time\n", + "for leadtime_month, ds_forecast in datasets[\"forecast\"].groupby(\"leadtime_month\"):\n", + " ds_hindcast = datasets[\"hindcast\"].sel(leadtime_month=leadtime_month)\n", + "\n", + " colors = [(26, 150, 65), (100, 50, 150)]\n", + " values = [\n", + " ds_hindcast[\"t2m\"].values.flatten(),\n", + " ds_forecast[\"t2m\"].values.flatten(),\n", + " ]\n", + " labels = [centre + \" climatology\", centre + \" forecast\"]\n", + " fig = ff.create_distplot(\n", + " values,\n", + " labels,\n", + " show_hist=False,\n", + " show_rug=True,\n", + " colors=[f\"rgb{color}\" for color in colors],\n", + " curve_type=\"kde\",\n", + " )\n", + " for color, data in zip(colors, fig.data):\n", + " # Fill area under distline\n", + " fig.add_scatter(\n", + " x=data.x,\n", + " y=data.y,\n", + " fill=\"tozeroy\",\n", + " mode=\"none\",\n", + " fillcolor=f\"rgba{color + (.4,)}\",\n", + " showlegend=False,\n", + " )\n", + "\n", + " xlim, ylim = get_limits(fig.data[:2], xfactor=0.03, yfactor=1.4)\n", + " quantiles = np.quantile(values[0], [1 / 3, 2 / 3]).tolist()\n", + " scatter_dicts = {\n", + " \"lower tercile\": {\n", + " \"color\": (0, 180, 250, 0.1),\n", + " \"text\": \"COLD\",\n", + " \"mask\": values[1] <= quantiles[0],\n", + " \"xlim\": [xlim[0], quantiles[0]],\n", + " },\n", + " \"middle tercile\": {\n", + " \"color\": (230, 230, 0, 0.1),\n", + " \"text\": \"NEAR AVERAGE\",\n", + " \"mask\": (values[1] > quantiles[0]) & (values[1] <= quantiles[1]),\n", + " \"xlim\": quantiles,\n", + " },\n", + " \"upper tercile\": {\n", + " \"color\": (250, 50, 0, 0.1),\n", + " \"text\": \"WARM\",\n", + " \"mask\": values[1] > quantiles[1],\n", + " \"xlim\": [quantiles[1], xlim[1]],\n", + " },\n", + " }\n", + " for i, (name, scatter_dict) in enumerate(scatter_dicts.items()):\n", + " # Add background color and text\n", + " fig.add_scatter(\n", + " x=scatter_dict[\"xlim\"],\n", + " y=[ylim[1]] * 2,\n", + " fill=\"tozeroy\",\n", + " mode=\"none\",\n", + " fillcolor=f\"rgba{scatter_dict['color']}\",\n", + " name=name,\n", + " )\n", + " percentage = 100 * scatter_dict[\"mask\"].sum() / values[1].size\n", + " text_color = tuple(c // 2 for c in scatter_dict[\"color\"][:-1]) + (0.4,)\n", + " fig.add_scatter(\n", + " x=[sum(scatter_dict[\"xlim\"]) / 2],\n", + " y=[ylim[1] * 0.98],\n", + " mode=\"text\",\n", + " name=\"\",\n", + " text=f\"{scatter_dict['text']}
{round(percentage)}%\",\n", + " textfont=dict(size=18, color=f\"rgba{text_color}\"),\n", + " textposition=\"bottom center\",\n", + " showlegend=False,\n", + " )\n", + "\n", + " # Title and labels\n", + " forecast_reference_time = ds_forecast[\"forecast_reference_time\"].dt.date.values\n", + " title = (\n", + " f\"Density plot of {ds_forecast['t2m'].attrs['long_name']}\"\n", + " f\" over {region_name} for {calendar.month_abbr[leadtime_month]} {year_forecast},\"\n", + " f\"
from {centre} with start time {forecast_reference_time}.\"\n", + " f\" Hindcast period from {year_start_hindcast} to {year_stop_hindcast}.\"\n", + " )\n", + " fig.update_layout(\n", + " title=dict(text=title, font={\"size\": 22}),\n", + " yaxis_range=ylim,\n", + " xaxis_range=xlim,\n", + " font_size=17,\n", + " autosize=False,\n", + " width=900,\n", + " height=500,\n", + " )\n", + " fig.update_xaxes(\n", + " title_text=(\n", + " f\"Mean {ds_forecast['t2m'].attrs['long_name']}\"\n", + " f\" ({ds_forecast['t2m'].attrs['units']})\"\n", + " )\n", + " )\n", + " fig.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}