NCAS-CMS · valeriupredoi · Sep 14, 2023 · Aug 15, 2023
diff --git a/activestorage/netcdf_to_zarr.py b/activestorage/netcdf_to_zarr.py
@@ -4,40 +4,31 @@
 import ujson
 import fsspec
 import s3fs
+import tempfile
 
 from activestorage.config import *
 from kerchunk.hdf import SingleHdf5ToZarr
 
 
-def gen_json(file_url, fs, fs2, varname, **so):
+def gen_json(file_url, fs, fs2, outf, **so):
     """Generate a json file that contains the kerchunk-ed data for Zarr."""
-    # set some name for the output json file
-    fname = os.path.splitext(file_url)[0]
-    if "s3:" in fname:
-        fname = os.path.basename(fname)
-    outf = f'{fname}_{varname}.json' # vanilla file name
-
-    # write it out if it's not there
-    if not os.path.isfile(outf):
-        with fs.open(file_url, **so) as infile:
-            # FIXME need to disentangle HDF5 errors if not OSError (most are)
-            try:
-                h5chunks = SingleHdf5ToZarr(infile, file_url, inline_threshold=0)
-            except OSError as exc:
-                raiser_1 = f"Unable to open file {file_url}. "
-                raiser_2 = "Check if file is netCDF3 or netCDF-classic"
-                print(raiser_1 + raiser_2)
-                raise exc
-
-            # inline threshold adjusts the Size below which binary blocks are
-            # included directly in the output
-            # a higher inline threshold can result in a larger json file but
-            # faster loading time
-            # for active storage, we don't want anything inline
-#            fname = os.path.splitext(file_url)[0]
-#            outf = f'{fname}_{varname}.json' # vanilla file name
-            with fs2.open(outf, 'wb') as f:
-                f.write(ujson.dumps(h5chunks.translate()).encode())
+    with fs.open(file_url, **so) as infile:
+        # FIXME need to disentangle HDF5 errors if not OSError (most are)
+        try:
+            h5chunks = SingleHdf5ToZarr(infile, file_url, inline_threshold=0)
+        except OSError as exc:
+            raiser_1 = f"Unable to open file {file_url}. "
+            raiser_2 = "Check if file is netCDF3 or netCDF-classic"
+            print(raiser_1 + raiser_2)
+            raise exc
+
+        # inline threshold adjusts the Size below which binary blocks are
+        # included directly in the output
+        # a higher inline threshold can result in a larger json file but
+        # faster loading time
+        # for active storage, we don't want anything inline
+        with fs2.open(outf, 'wb') as f:
+            f.write(ujson.dumps(h5chunks.translate()).encode())
 
     return outf
 
@@ -85,10 +76,13 @@ def load_netcdf_zarr_generic(fileloc, varname, storage_type, build_dummy=True):
         so = {}
 
     fs2 = fsspec.filesystem('')  # local file system to save final json to
-    out_json = gen_json(fileloc, fs, fs2, varname)
 
-    # open this monster
-    print(f"Attempting to open and convert {fileloc}.")
-    ref_ds = open_zarr_group(out_json, varname)
+    # Write the Zarr group JSON to a temporary file.
+    with tempfile.NamedTemporaryFile() as out_json:
+        gen_json(fileloc, fs, fs2, out_json.name)
+
+        # open this monster
+        print(f"Attempting to open and convert {fileloc}.")
+        ref_ds = open_zarr_group(out_json.name, varname)
 
     return ref_ds