Change document structure to place channel name inside object

- This change will mean channels are stored inside the actual objects themselves, rather than being keys to the objects. This will make it easier for both front and backend code to model these structures and access data via channel name - I've also gone through a number of files to clean up little things, sort out a couple of quick TODOs etc.
ral-facilities · Jun 6, 2022 · d0324ac · d0324ac
1 parent 888a49e
commit d0324ac
Show file tree

Hide file tree

Showing 6 changed files with 44 additions and 42 deletions.
diff --git a/operationsgateway_api/src/data_encoding.py b/operationsgateway_api/src/data_encoding.py
@@ -43,6 +43,13 @@ def data_conversion(value):
                 new_new_value = DataEncoding.data_conversion(inner_value)
                 value[inner_key] = new_new_value
             new_value = value
+        elif isinstance(value, list):
+            # For channel data
+            new_list = []
+            for inner_value in value:
+                new_new_value = DataEncoding.data_conversion(inner_value)
+                new_list.append(new_new_value)
+            new_value = new_list
         elif isinstance(value, np.int64) or isinstance(value, np.uint64):
             new_value = int(value)
         elif isinstance(value, np.float64):

diff --git a/operationsgateway_api/src/hdf_handler.py b/operationsgateway_api/src/hdf_handler.py
@@ -38,7 +38,7 @@ def extract_hdf_data(file_path=None, hdf_file=None):
 
         record_id = ObjectId()
 
-        record = {"_id": record_id, "metadata": {}, "channels": {}}
+        record = {"_id": record_id, "metadata": {}, "channels": []}
         waveforms = []
         images = {}
 
@@ -48,29 +48,28 @@ def extract_hdf_data(file_path=None, hdf_file=None):
             # Adding metadata of shot
             record["metadata"][metadata_key] = metadata_value
 
-        for column_name, value in hdf_file.items():
+        for channel_name, value in hdf_file.items():
+            channel_data = {"name": channel_name}
+
             if value.attrs["channel_dtype"] == "image":
                 # TODO - should we use a directory per ID? Will need a bit of code added
                 # to create directories for each ID to prevent a FileNotFoundError when
                 # saving the images
                 # TODO - put as a constant/put elsewhere?
                 image_path = (
                     f"{Config.config.mongodb.image_store_directory}/"
-                    f"{record['metadata']['shotnum']}_{column_name}.png"
+                    f"{record['metadata']['shotnum']}_{channel_name}.png"
                 )
                 image_data = value["data"][()]
                 images[image_path] = image_data
 
-                record["channels"][column_name] = {
-                    "metadata": {},
-                    "image_path": image_path,
-                }
+                channel_data["image_path"] = image_path
             elif value.attrs["channel_dtype"] == "rgb-image":
                 # TODO - when we don't want random noise anymore, we could probably
                 # combine this code with greyscale images, its the same implementation
                 image_path = (
                     f"{Config.config.mongodb.image_store_directory}/"
-                    f"{record['metadata']['shotnum']}_{column_name}.png"
+                    f"{record['metadata']['shotnum']}_{channel_name}.png"
                 )
 
                 # Gives random noise, where only example RGB I have sends full black
@@ -81,16 +80,11 @@ def extract_hdf_data(file_path=None, hdf_file=None):
                     size=(300, 400, 3),
                     dtype=np.uint8,
                 )
-                # image_data = value["data"][()]
                 images[image_path] = image_data
 
-                record["channels"][column_name] = {
-                    "metadata": {},
-                    "image_path": image_path,
-                }
+                channel_data["image_path"] = image_path
             elif value.attrs["channel_dtype"] == "scalar":
-                record["channels"][column_name] = {"metadata": {}, "data": None}
-                record["channels"][column_name]["data"] = value["data"][()]
+                channel_data["data"] = value["data"][()]
             elif value.attrs["channel_dtype"] == "waveform":
                 # Create a object ID here so it can be assigned to the waveform document
                 # and the record before data insertion. This way, we can send the data
@@ -99,20 +93,17 @@ def extract_hdf_data(file_path=None, hdf_file=None):
                 # which wouldn't be as efficient
                 waveform_id = ObjectId()
                 log.debug("Waveform ID: %s", waveform_id)
-                record["channels"][column_name] = {
-                    "metadata": {},
-                    "waveform_id": waveform_id,
-                }
+                channel_data["waveform_id"] = waveform_id
 
                 waveforms.append(
                     {"_id": waveform_id, "x": value["x"][()], "y": value["y"][()]},
                 )
 
             # Adding channel metadata
-            for column_metadata_key, column_metadata_value in value.attrs.items():
-                record["channels"][column_name]["metadata"][
-                    column_metadata_key
-                ] = column_metadata_value
+            channel_data["metadata"] = dict(value.attrs)
+
+            # Adding the processed channel to the record
+            record["channels"].append(channel_data)
 
         return record, waveforms, images
 

diff --git a/operationsgateway_api/src/helpers.py b/operationsgateway_api/src/helpers.py
@@ -87,14 +87,14 @@ def create_image_thumbnails(image_paths):
 
 
 def store_image_thumbnails(record, thumbnails):
-    # TODO - need to store thumbnails from waveforms - need to create them first
     for image_path, thumbnail in thumbnails.items():
         # TODO - extracting the channel name from the path should be thoroughly unit
         # tested. Probably best to put this code into its own function
         shot_channel = image_path.split("/")[-1].split("_")[1:]
         channel_name = "_".join(shot_channel).split(".")[0]
 
-        record["channels"][channel_name]["thumbnail"] = thumbnail
+        channel = search_for_channel(record, channel_name)
+        channel["thumbnail"] = thumbnail
 
 
 def create_image_plot(x, y, buf):
@@ -115,14 +115,15 @@ def convert_image_to_base64(image: Image.Image) -> bytes:
 
 
 def create_waveform_thumbnails(waveforms):
-    # TODO - could probably combine image and waveform thumbnail creation into a single
-    # function if the path was given directly/abstracted away
     thumbnails = {}
+
     for waveform in waveforms:
         with BytesIO() as plot_buf:
             # TODO - S307 linting error
             create_image_plot(
-                list(eval(waveform["x"])), list(eval(waveform["y"])), plot_buf,
+                list(eval(waveform["x"])),
+                list(eval(waveform["y"])),
+                plot_buf,
             )
             waveform_image = Image.open(plot_buf)
             create_thumbnail(waveform_image, (100, 100))
@@ -137,21 +138,27 @@ def store_waveform_thumbnails(record, thumbnails):
     # function really. Or combining it with the image version of this function so we can
     # abstract fetching the channel name
     for _id, thumbnail in thumbnails.items():
-        for channel_name, value in record["channels"].items():
+        for channel in record["channels"]:
             try:
-                if ObjectId(_id) == value["waveform_id"]:
-                    record["channels"][channel_name]["thumbnail"] = thumbnail
+                if ObjectId(_id) == channel["waveform_id"]:
+                    channel["thumbnail"] = thumbnail
             except KeyError:
                 # A KeyError here will be because the channel isn't a waveform. This is
                 # normal behaviour and is acceptable to pass
                 pass
 
 
 def truncate_thumbnail_output(record):
-    for value in record["channels"].values():
+    for channel in record["channels"]:
         try:
-            value["thumbnail"] = value["thumbnail"][:50]
+            channel["thumbnail"] = channel["thumbnail"][:50]
         except KeyError:
             # If there's no thumbnails (e.g. if channel isn't an image or waveform) then
             # a KeyError will be raised. This is normal behaviour, so acceptable to pass
             pass
+
+
+def search_for_channel(record, channel_name):
+    for channel in record["channels"]:
+        if channel_name in channel["name"]:
+            return channel
diff --git a/operationsgateway_api/src/mongo/interface.py b/operationsgateway_api/src/mongo/interface.py
@@ -21,7 +21,6 @@ def get_collection_object(collection_name):
 
         return getattr(ConnectionInstance.db_connection, collection_name)
 
-    # TODO - might need to make .records more generic if we use more than one collection
     @staticmethod
     def find(
         collection_name="images",
@@ -32,7 +31,7 @@ def find(
         projection=None,  # noqa: B006
     ):
         """
-        Creates a query to find documents in the records collection based on filters
+        Creates a query to find documents a collection based on filters
 
         Due to Motor being asynchronous, the query is executed in `query_to_list()`, not
         in this function
@@ -117,11 +116,10 @@ async def update_one(collection_name, filter_={}, update={}):  # noqa: B006
             update,
         )
 
-    # TODO - insert_many() needed?
     @staticmethod
     async def insert_one(collection_name, data):
         """
-        Using the input data, insert a single document into the records collection
+        Using the input data, insert a single document into a given collection
         """
 
         log.info("Sending insert_one() to MongoDB, collection: %s", collection_name)
@@ -141,11 +139,10 @@ async def insert_many(collection_name, data):
         return await collection.insert_many(data)
 
     @staticmethod
-    # TODO - collection could be better named
-    def get_inserted_id(collection):
+    def get_inserted_id(document):
         """
         Get the ID of the newly added document. This is likely to be used in
         correpsondance with `insert_one()`
         """
 
-        return str(collection.inserted_id)
+        return str(document.inserted_id)
diff --git a/operationsgateway_api/src/routes/images.py b/operationsgateway_api/src/routes/images.py
@@ -17,7 +17,6 @@ async def get_full_image(
     string_response: bool = False,
 ):
     if string_response:
-        # TODO - could we make this async? Benefits?
         with open(
             f"{Config.config.mongodb.image_store_directory}/{shot_num}_{channel_name}"
             ".png",

diff --git a/operationsgateway_api/src/routes/waveforms.py b/operationsgateway_api/src/routes/waveforms.py
@@ -20,5 +20,6 @@ async def get_waveform_by_id(id_: str):
         {"_id": DataEncoding.encode_object_id(id_)},
     )
 
-    # TODO - need to make that model more generic, not specific to records
+    # TODO - need to make that model more generic, not specific to records. Or make a
+    # separate model probably
     return Record.construct(waveform.keys(), **waveform)