From b32f058413fc6be46aac945d45a04b219bbf5c1c Mon Sep 17 00:00:00 2001 From: ayoubft <63267601+ayoubft@users.noreply.github.com> Date: Sat, 14 Oct 2023 21:14:10 +0200 Subject: [PATCH 01/16] add notebook for chunking methods --- docs/chunking.ipynb | 891 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 891 insertions(+) create mode 100644 docs/chunking.ipynb diff --git a/docs/chunking.ipynb b/docs/chunking.ipynb new file mode 100644 index 00000000..c171dea6 --- /dev/null +++ b/docs/chunking.ipynb @@ -0,0 +1,891 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e515b4bd-a302-45a9-8464-56b67a73a46c", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "96e9149e-fc6d-4048-8e45-a29966e5c6b8", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from itertools import product\n", + "import numpy as np\n", + "\n", + "import xarray as xr\n", + "import xbitinfo as xb" + ] + }, + { + "cell_type": "markdown", + "id": "b64e0873-0a27-4757-947a-4a559a102288", + "metadata": {}, + "source": [ + "## Data loading" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "320224c9-06e2-428a-8614-8ed0d15eee82", + "metadata": {}, + "outputs": [], + "source": [ + "# load data\n", + "ds = xr.tutorial.load_dataset(\"air_temperature\") \n", + "chunks = {'lat':5,'lon':10} # Defining chunks that will be used for the reading/bitrounding/writing\n", + "ds = ds.chunk(chunks) # Apply chunking" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f3120040-79f1-4a7f-a61f-afec9fb3ca5b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.Dataset>\n", + "Dimensions: (lat: 25, time: 2920, lon: 53)\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Data variables:\n", + " air (time, lat, lon) float32 dask.array<chunksize=(2920, 5, 10), meta=np.ndarray>\n", + "Attributes:\n", + " Conventions: COARDS\n", + " title: 4x daily NMC reanalysis (1948)\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...
<xarray.Dataset>\n", + "Dimensions: (lat: 25, time: 2920, lon: 53)\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Data variables:\n", + " air (time, lat, lon) float32 dask.array<chunksize=(2920, 5, 10), meta=np.ndarray>\n", + "Attributes:\n", + " Conventions: COARDS\n", + " title: 4x daily NMC reanalysis (1948)\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...