Skip to content

Commit

Permalink
add quality filter
Browse files Browse the repository at this point in the history
  • Loading branch information
malmans2 committed Jul 4, 2024
1 parent 8148aac commit 7996c02
Showing 1 changed file with 56 additions and 96 deletions.
152 changes: 56 additions & 96 deletions notebooks/wp5/lake_water_temperature_outlier_detection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"import cartopy.crs as ccrs\n",
"import matplotlib.cbook\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import xarray as xr\n",
"from c3s_eqc_automatic_quality_control import diagnostics, download, plot, utils\n",
Expand Down Expand Up @@ -53,14 +54,11 @@
"stop = \"2000-01\"\n",
"\n",
"# Region\n",
"lon_slice = slice(28, 41)\n",
"lat_slice = slice(-16, 4)\n",
"lon_slice = slice(-92.10, -84.80)\n",
"lat_slice = slice(46.30, 49.00)\n",
"\n",
"# Variable\n",
"varname = \"lake_surface_water_temperature\"\n",
"\n",
"# Lakes to show in the analysis\n",
"lakeids = [3, 7, 10]"
"varname = \"lake_surface_water_temperature\""
]
},
{
Expand Down Expand Up @@ -91,7 +89,7 @@
"id": "7",
"metadata": {},
"source": [
"## Define function to extract region and compute spatial weighted mean"
"## Download data"
]
},
{
Expand All @@ -101,31 +99,24 @@
"metadata": {},
"outputs": [],
"source": [
"def spatial_weighted_mean_of_region(ds, lon_slice, lat_slice, varname, lakeids):\n",
" ds = ds[[varname, \"lakeid\"]]\n",
" ds = ds.chunk({\"time\": 1, \"latitude\": 1_200, \"longitude\": 2_400})\n",
" ds = utils.regionalise(ds, lon_slice=lon_slice, lat_slice=lat_slice)\n",
" dataarrays = []\n",
" for lakeid in lakeids:\n",
" da = ds[varname].where(ds[\"lakeid\"] == lakeid)\n",
" da = diagnostics.spatial_weighted_mean(da)\n",
" dataarrays.append(da.expand_dims(lakeid=[lakeid]))\n",
" return xr.concat(dataarrays, \"lakeid\").to_dataset()\n",
"\n",
"\n",
"def get_lakeid(ds, lon_slice, lat_slice):\n",
" da = ds[\"lakeid\"].isel(time=0)\n",
" da = da.chunk({\"latitude\": 1_200, \"longitude\": 2_400})\n",
" da = utils.regionalise(da, lon_slice=lon_slice, lat_slice=lat_slice)\n",
" return da.to_dataset()"
"requests = download.update_request_date(\n",
" request, start=start, stop=stop, stringify_dates=True\n",
")\n",
"ds = download.download_and_transform(\n",
" collection_id,\n",
" requests,\n",
" chunks={\"year\": 1, \"month\": 1},\n",
" transform_func=utils.regionalise,\n",
" transform_func_kwargs={\"lon_slice\": lon_slice, \"lat_slice\": lat_slice},\n",
")"
]
},
{
"cell_type": "markdown",
"id": "9",
"metadata": {},
"source": [
"## Download data"
"## Plot lakeid"
]
},
{
Expand All @@ -135,72 +126,47 @@
"metadata": {},
"outputs": [],
"source": [
"chunks = {\"year\": 1, \"month\": 1}\n",
"requests = download.update_request_date(\n",
" request, start=start, stop=stop, stringify_dates=True\n",
")\n",
"ds = download.download_and_transform(\n",
" collection_id,\n",
" requests,\n",
" chunks=chunks,\n",
" transform_func=spatial_weighted_mean_of_region,\n",
" transform_func_kwargs={\n",
" \"lon_slice\": lon_slice,\n",
" \"lat_slice\": lat_slice,\n",
" \"varname\": varname,\n",
" \"lakeids\": lakeids,\n",
" },\n",
")\n",
"da = ds[varname].compute()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11",
"metadata": {},
"outputs": [],
"source": [
"# We use one of the request previously cached\n",
"single_request = {\n",
" k: v if isinstance(v, str) or k not in chunks else v[0]\n",
" for k, v in requests[0].items()\n",
"}\n",
"da_map = download.download_and_transform(\n",
" collection_id,\n",
" single_request,\n",
" chunks=chunks,\n",
" transform_func=get_lakeid,\n",
" transform_func_kwargs={\n",
" \"lon_slice\": lon_slice,\n",
" \"lat_slice\": lat_slice,\n",
" },\n",
")[\"lakeid\"]"
"_ = plot.projected_map(\n",
" ds[\"lakeid\"].isel(time=0), projection=ccrs.PlateCarree(), show_stats=False\n",
")"
]
},
{
"cell_type": "markdown",
"id": "12",
"id": "11",
"metadata": {},
"source": [
"## Plot projected map of lake IDs"
"## Reindex"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13",
"id": "12",
"metadata": {},
"outputs": [],
"source": [
"for da_to_plot in [da_map, da_map.where(da_map.isin(lakeids))]:\n",
" _ = plot.projected_map(da_to_plot, projection=ccrs.PlateCarree(), show_stats=False)\n",
" plt.show()"
"# Reindex using lakeids\n",
"lakeid = (\n",
" xr.DataArray(np.unique(ds[\"lakeid\"].values), dims=(\"lakeid\"))\n",
" .dropna(\"lakeid\")\n",
" .astype(int)\n",
")\n",
"ds = ds.drop_vars(\"lakeid\").where(ds[\"lakeid\"] == lakeid).assign_coords(lakeid=lakeid)\n",
"\n",
"# Reindex using min_quality_level\n",
"min_quality_level = xr.DataArray([0, 4], dims=(\"min_quality_level\"))\n",
"ds = ds.where(ds[\"quality_level\"] >= min_quality_level).assign_coords(\n",
" min_quality_level=min_quality_level\n",
")\n",
"\n",
"# Spatial weighted mean\n",
"da = diagnostics.spatial_weighted_mean(ds[varname])"
]
},
{
"cell_type": "markdown",
"id": "14",
"id": "13",
"metadata": {},
"source": [
"## Plot spatial weighted mean"
Expand All @@ -209,27 +175,24 @@
{
"cell_type": "code",
"execution_count": null,
"id": "15",
"id": "14",
"metadata": {},
"outputs": [],
"source": [
"for lakeid, da_lakeid in da.groupby(\"lakeid\"):\n",
" da_lakeid.dropna(\"time\").plot(label=lakeid)\n",
"plt.legend(title=\"lake ID\")\n",
"da.dropna(\"time\").plot(hue=\"min_quality_level\", col=\"lakeid\", figsize=(10, 6))\n",
"plt.grid()\n",
"plt.title(\"Spatial weighted mean\")\n",
"plt.show()\n",
"\n",
"# Print missing values\n",
"missings = da.isnull().sum(\"time\") / da.sizes[\"time\"] * 100\n",
"id_digits = max(map(len, da[\"lakeid\"].astype(str).values))\n",
"for lakeid, missing in missings.groupby(\"lakeid\"):\n",
" print(f\"Missing values of lake ID {lakeid:<{id_digits}}: {missing.values:.2f} %\")"
"missings = da.sel(min_quality_level=0).isnull().sum(\"time\") / da.sizes[\"time\"] * 100\n",
"for lakeid, missing in missings.groupby(\"lakeid\", squeeze=False):\n",
" print(f\"Missing values of lake ID {lakeid}: {float(missing.squeeze()):.2f} %\")"
]
},
{
"cell_type": "markdown",
"id": "16",
"id": "15",
"metadata": {},
"source": [
"## Boxplot"
Expand All @@ -238,31 +201,28 @@
{
"cell_type": "code",
"execution_count": null,
"id": "17",
"id": "16",
"metadata": {},
"outputs": [],
"source": [
"df = da.to_dataframe()\n",
"df.boxplot(by=\"lakeid\")\n",
"df.boxplot(by=([\"lakeid\", \"min_quality_level\"]))\n",
"plt.ylabel(f\"{da.attrs['long_name']} [{da.attrs['units']}]\")\n",
"plt.show()\n",
"\n",
"# Print statistics\n",
"boxplot_stats = {\n",
" lakeid: matplotlib.cbook.boxplot_stats(df_lakeid.dropna().values.squeeze())\n",
" for lakeid, df_lakeid in df.groupby(\"lakeid\")\n",
"}\n",
"for lakeid, df_lakeid in df.groupby(\"lakeid\"):\n",
" values = df_lakeid.dropna().values.squeeze()\n",
" (boxplot_stats[lakeid],) = matplotlib.cbook.boxplot_stats(values)\n",
"boxplot_stats = pd.DataFrame(boxplot_stats)\n",
"boxplot_stats"
"# Show stats\n",
"stats = {}\n",
"for label, grouped_df in df.groupby([\"lakeid\", \"min_quality_level\"]):\n",
" (stats[label],) = matplotlib.cbook.boxplot_stats(\n",
" grouped_df.dropna().values.squeeze()\n",
" )\n",
"pd.DataFrame(stats)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -276,7 +236,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 7996c02

Please sign in to comment.