From 8b340fb32df2c2a7bee7d26dbd0db95d7261401e Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Thu, 28 Mar 2024 16:46:17 +0100 Subject: [PATCH] cleanup --- notebooks/wp5/ozone_merged_uv.ipynb | 196 ++++++++++++---------------- 1 file changed, 84 insertions(+), 112 deletions(-) diff --git a/notebooks/wp5/ozone_merged_uv.ipynb b/notebooks/wp5/ozone_merged_uv.ipynb index e88a17c..f633b8e 100644 --- a/notebooks/wp5/ozone_merged_uv.ipynb +++ b/notebooks/wp5/ozone_merged_uv.ipynb @@ -20,15 +20,13 @@ "metadata": {}, "outputs": [], "source": [ - "import warnings\n", + "import calendar\n", "\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import xarray as xr\n", "from c3s_eqc_automatic_quality_control import diagnostics, download, plot, utils\n", "\n", - "warnings.filterwarnings(\"ignore\")\n", - "\n", "plt.style.use(\"seaborn-v0_8-notebook\")" ] }, @@ -45,26 +43,40 @@ "metadata": {}, "outputs": [], "source": [ + "# Time\n", + "year_start = 1996\n", + "year_stop = 2022\n", + "\n", + "# Regions\n", "region_slices = {\n", - " \"global\": {\"lat_slice\": slice(-90, 90), \"lon_slice\": slice(0, 360)},\n", - " \"tropics\": {\"lat_slice\": slice(-25, 25), \"lon_slice\": slice(0, 360)},\n", - " \"northern hemisphere mid-latitudes\": {\n", + " \"global\": {\n", + " \"lat_slice\": slice(-90, 90),\n", + " \"lon_slice\": slice(0, 360),\n", + " },\n", + " \"tropics\": {\n", + " \"lat_slice\": slice(-25, 25),\n", + " \"lon_slice\": slice(0, 360),\n", + " },\n", + " \"NH mid-latitudes\": {\n", " \"lat_slice\": slice(30, 60),\n", " \"lon_slice\": slice(0, 360),\n", " },\n", - " \"southern hemisphere mid-latitudes\": {\n", + " \"SH mid-latitudes\": {\n", " \"lat_slice\": slice(-30, -60),\n", " \"lon_slice\": slice(0, 360),\n", " },\n", - " \"northern hemisphere polar\": {\n", + " \"NH polar\": {\n", " \"lat_slice\": slice(60, 90),\n", " \"lon_slice\": slice(0, 360),\n", " },\n", - " \"southern hemisphere polar\": {\n", + " \"SH polar\": {\n", " \"lat_slice\": slice(-60, -90),\n", " \"lon_slice\": slice(0, 360),\n", " },\n", - "}" + "}\n", + "\n", + "# Variable to show\n", + "varname = \"total_ozone_column\"" ] }, { @@ -82,14 +94,6 @@ "outputs": [], "source": [ "collection_id = \"satellite-ozone-v1\"\n", - "\n", - "chunks = {\"year\": 1}\n", - "\n", - "year_start = 1996\n", - "year_stop = 2022\n", - "\n", - "varname = \"total_ozone_column\"\n", - "\n", "requests = {\n", " \"format\": \"zip\",\n", " \"processing_level\": \"level_3\",\n", @@ -115,6 +119,12 @@ "metadata": {}, "outputs": [], "source": [ + "def convert_source_to_time(ds):\n", + " # Naming convention: YYYYMM-*.nc\n", + " ds[\"source\"] = pd.to_datetime(ds[\"source\"].str.slice(None, 6), format=\"%Y%m\")\n", + " return ds.rename(source=\"time\")\n", + "\n", + "\n", "def spatial_weighted_mean(ds, lon_slice, lat_slice):\n", " ds = convert_source_to_time(ds)\n", " ds = utils.regionalise(ds, lon_slice=lon_slice, lat_slice=lat_slice)\n", @@ -123,13 +133,7 @@ "\n", "def time_weighted_mean(ds):\n", " ds = convert_source_to_time(ds)\n", - " return diagnostics.time_weighted_mean(ds)\n", - "\n", - "\n", - "def convert_source_to_time(ds):\n", - " # Naming convention: YYYYMM-*.nc\n", - " ds[\"source\"] = pd.to_datetime(ds[\"source\"].str.slice(None, 6), format=\"%Y%m\")\n", - " return ds.rename(source=\"time\")" + " return diagnostics.time_weighted_mean(ds)" ] }, { @@ -145,41 +149,33 @@ "metadata": {}, "outputs": [], "source": [ - "ds_map = download.download_and_transform(\n", - " collection_id,\n", - " requests,\n", - " transform_func=convert_source_to_time,\n", - " split_all=True,\n", - ")\n", + "kwargs = {\"collection_id\": collection_id, \"requests\": requests, \"chunks\": {\"year\": 1}}\n", "\n", - "dataarrays = []\n", - "for region, slices in region_slices.items():\n", + "# Timeseries\n", + "datasets = []\n", + "for region, transform_func_kwargs in region_slices.items():\n", + " print(f\"{region=}\")\n", " ds = download.download_and_transform(\n", - " collection_id,\n", - " requests,\n", + " **kwargs,\n", " transform_func=spatial_weighted_mean,\n", - " transform_func_kwargs=slices,\n", - " chunks=chunks,\n", + " transform_func_kwargs=transform_func_kwargs,\n", " drop_variables=\"time_bounds\",\n", " )\n", - " dataarrays.append(ds[varname].expand_dims(region=[region]))\n", - "da = xr.concat(dataarrays, \"region\")\n", + " datasets.append(ds.expand_dims(region=[region]))\n", + "ds_timeseries = xr.concat(datasets, \"region\")\n", "\n", - "DU_factor = ds_map[\"total_ozone_column\"].attrs[\"multiplication_factor_to_convert_to_DU\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Results" + "# Original data\n", + "ds = download.download_and_transform(\n", + " **kwargs,\n", + " transform_func=convert_source_to_time,\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Data Coverage" + "## Plot Data Coverage" ] }, { @@ -188,31 +184,22 @@ "metadata": {}, "outputs": [], "source": [ - "nobs_time = ds_map[\"total_ozone_column_number_of_observations\"].sum(dim=\"time\")\n", - "nobs_lat = ds_map[\"total_ozone_column_number_of_observations\"].sum(dim=\"longitude\")\n", - "nobs_lon = ds_map[\"total_ozone_column_number_of_observations\"].sum(dim=\"latitude\")\n", - "\n", - "plot.projected_map(nobs_time, show_stats=False)\n", - "\n", - "fig, axes = plt.subplots(2)\n", - "ax1 = axes[0].contourf(\n", - " nobs_lat[\"time\"], nobs_lat[\"latitude\"], nobs_lat.transpose(), extend=\"max\"\n", - ")\n", - "axes[0].set_ylabel(\"Latitude\")\n", - "axes[0].set_xlabel(\"Year\")\n", - "\n", - "ax2 = axes[1].contourf(\n", - " nobs_lon[\"time\"], nobs_lon[\"longitude\"], nobs_lon.transpose(), extend=\"max\"\n", - ")\n", - "axes[1].set_ylabel(\"Longitude\")\n", - "axes[1].set_xlabel(\"Year\")\n", - "\n", - "# plt.contourf(ds_obs_lat[\"time\"],ds_obs_lat[\"latitude\"],\n", - "# ds_obs_lat.transpose(),\n", - "# extend='both')\n", + "da_obs = ds[f\"{varname}_number_of_observations\"]\n", + "plot.projected_map(da_obs.sum(dim=\"time\", keep_attrs=True), show_stats=False)\n", "\n", - "cbar = fig.colorbar(ax1, ax=axes.ravel().tolist(), shrink=0.95)\n", - "cbar.set_label(\"Mean number of observations %\")" + "fig, axs = plt.subplots(2, 1)\n", + "for ax, dim in zip(axs.flatten(), (\"longitude\", \"latitude\")):\n", + " plot_obj = da_obs.sum(dim, keep_attrs=True).plot.contourf(\n", + " x=\"time\",\n", + " ax=ax,\n", + " levels=range(0, 10_500 + 1, 1_500),\n", + " extend=\"max\",\n", + " add_colorbar=False,\n", + " )\n", + "label = da_obs.attrs[\"long_name\"]\n", + "isplit = len(label) // 2\n", + "label = \"\\n\".join([label[:isplit], label[isplit:]])\n", + "_ = fig.colorbar(plot_obj, ax=axs.flatten(), label=label)" ] }, { @@ -228,13 +215,17 @@ "metadata": {}, "outputs": [], "source": [ + "da = ds[varname]\n", + "with xr.set_options(keep_attrs=True):\n", + " da = da * da.attrs[\"multiplication_factor_to_convert_to_DU\"]\n", + "da.attrs[\"units\"] = \"DU\"\n", + "\n", + "contour_kwargs = {\"levels\": range(160, 460, 10), \"extend\": \"both\", \"cmap\": \"RdGy_r\"}\n", "map = plot.projected_map(\n", - " ds_map[\"total_ozone_column\"].mean(\"time\") * DU_factor,\n", - " cmap=\"RdGy_r\",\n", - " levels=range(160, 460, 10),\n", - " robust=True,\n", - " extend=\"both\",\n", + " da.mean(\"time\", keep_attrs=True),\n", + " plot_func=\"contourf\",\n", " show_stats=False,\n", + " **contour_kwargs,\n", ")" ] }, @@ -251,38 +242,11 @@ "metadata": {}, "outputs": [], "source": [ - "climatology_lon = ds_map.groupby(\"time.month\").mean([\"time\", \"longitude\"])\n", - "\n", - "fig, ax = plt.subplots()\n", - "im = ax.contourf(\n", - " climatology_lon[\"total_ozone_column\"][\"month\"],\n", - " climatology_lon[\"total_ozone_column\"][\"latitude\"],\n", - " climatology_lon[\"total_ozone_column\"].transpose(\"latitude\", \"month\") * DU_factor,\n", - " cmap=\"RdGy_r\",\n", - " extend=\"both\",\n", - " levels=range(160, 460, 10),\n", - ")\n", - "ax.set_title(\"TCO [DU]\")\n", - "ax.set_ylabel(\"Latitude\")\n", - "ax.set_xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])\n", - "ax.set_xticklabels(\n", - " [\n", - " \"JAN\",\n", - " \"FEB\",\n", - " \"MAR\",\n", - " \"APR\",\n", - " \"MAY\",\n", - " \"JUN\",\n", - " \"JUL\",\n", - " \"AUG\",\n", - " \"SEP\",\n", - " \"OCT\",\n", - " \"NOV\",\n", - " \"DEC\",\n", - " ],\n", - " size=10,\n", - ")\n", - "fig.colorbar(im)" + "da_annual_cycle = da.groupby(\"time.month\").mean([\"time\", \"longitude\"], keep_attrs=True)\n", + "da_annual_cycle[\"month\"] = [\n", + " calendar.month_abbr[m] for m in da_annual_cycle[\"month\"].values\n", + "]\n", + "_ = da_annual_cycle.plot.contourf(x=\"month\", **contour_kwargs)" ] }, { @@ -298,9 +262,17 @@ "metadata": {}, "outputs": [], "source": [ - "da_anomaly = DU_factor * (da.groupby(\"time.month\") - da.groupby(\"time.month\").mean())\n", - "\n", - "fig = da_anomaly.plot(row=\"region\", col_wrap=3)" + "with xr.set_options(keep_attrs=True):\n", + " da_anomaly = (\n", + " ds_timeseries[varname].groupby(\"time.month\")\n", + " - ds_timeseries[varname].groupby(\"time.month\").mean()\n", + " )\n", + " da_anomaly *= da_anomaly.attrs[\"multiplication_factor_to_convert_to_DU\"]\n", + "da_anomaly.attrs[\"units\"] = \"DU\"\n", + "facet = da_anomaly.plot(row=\"region\", col_wrap=2)\n", + "for ax in facet.axs.flatten():\n", + " ax.grid()\n", + "facet.fig.autofmt_xdate(rotation=45)" ] } ],