From 09a4409607609463b32058a69b5b0d37f105a608 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Tue, 5 Nov 2024 16:01:57 +0000 Subject: [PATCH 1/6] rad-if experiments - [ ] Closes #182 --- feedstock/iids_pr.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/feedstock/iids_pr.yaml b/feedstock/iids_pr.yaml index f6a0fa6..f41446b 100644 --- a/feedstock/iids_pr.yaml +++ b/feedstock/iids_pr.yaml @@ -1 +1,2 @@ - - "CMIP6.*.*.[CNRM-CM6-1,CanESM5].historical.r1i1p1f1.Omon.[tos, so].*.*" + # - "CMIP6.*.*.[CNRM-CM6-1,CanESM5].historical.r1i1p1f1.Omon.[tos, so].*.*" + - 'CMIP6.RFMIP.*.*.rad-irf.*.Efx.*.*.*' From a463ce2c13bc26ad5c994d841f1921e360c7bd64 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Tue, 5 Nov 2024 16:13:52 +0000 Subject: [PATCH 2/6] fix tests for datasets without time. --- feedstock/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/feedstock/requirements.txt b/feedstock/requirements.txt index cef01a8..9c7be31 100644 --- a/feedstock/requirements.txt +++ b/feedstock/requirements.txt @@ -1,4 +1,5 @@ -leap-data-management-utils==0.0.12 +git+https://github.com/leap-stc/leap-data-management-utils.git@allow-no-time-in-tests +#leap-data-management-utils==0.0.12 git+https://github.com/jbusecke/pangeo-forge-esgf.git@new-request-scheme dynamic-chunks==0.0.3 #git+https://github.com/pangeo-forge/pangeo-forge-recipes@feature/concurrency-control From bccf7b036eae48f8b4ec4bf16aab832f9a448e12 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Tue, 5 Nov 2024 17:10:07 +0000 Subject: [PATCH 3/6] Update recipe.py --- feedstock/recipe.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/feedstock/recipe.py b/feedstock/recipe.py index c225243..8ee3952 100644 --- a/feedstock/recipe.py +++ b/feedstock/recipe.py @@ -158,6 +158,13 @@ async def get_recipe_inputs(): for iid, data in recipe_data.items(): urls = get_sorted_http_urls_from_iid_dict(data) pattern = pattern_from_file_sequence(urls, concat_dim="time") + + # to accomodate single file we cannot parse target chunks (https://github.com/pangeo-forge/pangeo-forge-recipes/issues/275) + if len(urls)>1: + chunk_fn = dynamic_chunking_func + else: + chunk_fn = None + recipes[iid] = ( f"Creating {iid}" >> beam.Create(pattern.items()) # | CheckpointFileTransfer( @@ -177,7 +184,7 @@ async def get_recipe_inputs(): | StoreToZarr( store_name=f"{iid}.zarr", combine_dims=pattern.combine_dim_keys, - dynamic_chunking_fn=dynamic_chunking_func, + dynamic_chunking_fn=chunk_fn, ) | InjectAttrs({"pangeo_forge_api_responses": data}) | ConsolidateDimensionCoordinates() From ed01e98664874588ae9415167231560d83bea7e9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Nov 2024 17:10:15 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- feedstock/recipe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/feedstock/recipe.py b/feedstock/recipe.py index 8ee3952..994a4ce 100644 --- a/feedstock/recipe.py +++ b/feedstock/recipe.py @@ -160,11 +160,11 @@ async def get_recipe_inputs(): pattern = pattern_from_file_sequence(urls, concat_dim="time") # to accomodate single file we cannot parse target chunks (https://github.com/pangeo-forge/pangeo-forge-recipes/issues/275) - if len(urls)>1: + if len(urls) > 1: chunk_fn = dynamic_chunking_func else: chunk_fn = None - + recipes[iid] = ( f"Creating {iid}" >> beam.Create(pattern.items()) # | CheckpointFileTransfer( From d3cbe358f1a8baf66d90a0c2743279946c3bca9b Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Tue, 5 Nov 2024 19:02:36 +0000 Subject: [PATCH 5/6] Update requirements.txt --- feedstock/requirements.txt | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/feedstock/requirements.txt b/feedstock/requirements.txt index 9c7be31..54a2e2b 100644 --- a/feedstock/requirements.txt +++ b/feedstock/requirements.txt @@ -2,13 +2,7 @@ git+https://github.com/leap-stc/leap-data-management-utils.git@allow-no-time-in- #leap-data-management-utils==0.0.12 git+https://github.com/jbusecke/pangeo-forge-esgf.git@new-request-scheme dynamic-chunks==0.0.3 -#git+https://github.com/pangeo-forge/pangeo-forge-recipes@feature/concurrency-control -git+https://github.com/ranchodeluxe/xarray@ranchodeluxe-patch-1#egg=xarray -git+https://github.com/ranchodeluxe/rioxarray -git+https://github.com/ranchodeluxe/datatree@main#egg=xarray-datatree -git+https://github.com/pangeo-forge/pangeo-forge-recipes@jb/xarray-hack #see @gc/cached_disabled but with cache -#git+https://github.com/moradology/httpfs-sync.git@feature/pool-reuse -#httpfs-sync>=0.0.2 +pangeo-forge-recipes==0.10.8 zarr==2.16.1 gcsfs apache-beam[gcp] From 1512078ba3f35fd526dc44cf9a95ed8632149511 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Wed, 6 Nov 2024 11:53:48 +0000 Subject: [PATCH 6/6] Update recipe.py --- feedstock/recipe.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/feedstock/recipe.py b/feedstock/recipe.py index 994a4ce..c426832 100644 --- a/feedstock/recipe.py +++ b/feedstock/recipe.py @@ -162,8 +162,10 @@ async def get_recipe_inputs(): # to accomodate single file we cannot parse target chunks (https://github.com/pangeo-forge/pangeo-forge-recipes/issues/275) if len(urls) > 1: chunk_fn = dynamic_chunking_func + combine_dims = pattern.combine_dim_keys else: chunk_fn = None + combine_dims = [] recipes[iid] = ( f"Creating {iid}" >> beam.Create(pattern.items()) @@ -183,7 +185,7 @@ async def get_recipe_inputs(): | Preprocessor() | StoreToZarr( store_name=f"{iid}.zarr", - combine_dims=pattern.combine_dim_keys, + combine_dims=combine_dims, dynamic_chunking_fn=chunk_fn, ) | InjectAttrs({"pangeo_forge_api_responses": data})