Skip to content

Commit

Permalink
Change document structure to place channel name inside object
Browse files Browse the repository at this point in the history
- This change will mean channels are stored inside the actual objects themselves, rather than being keys to the objects. This will make it easier for both front and backend code to model these structures and access data via channel name
- I've also gone through a number of files to clean up little things, sort out a couple of quick TODOs etc.
  • Loading branch information
MRichards99 committed Jun 6, 2022
1 parent 888a49e commit d0324ac
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 42 deletions.
7 changes: 7 additions & 0 deletions operationsgateway_api/src/data_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ def data_conversion(value):
new_new_value = DataEncoding.data_conversion(inner_value)
value[inner_key] = new_new_value
new_value = value
elif isinstance(value, list):
# For channel data
new_list = []
for inner_value in value:
new_new_value = DataEncoding.data_conversion(inner_value)
new_list.append(new_new_value)
new_value = new_list
elif isinstance(value, np.int64) or isinstance(value, np.uint64):
new_value = int(value)
elif isinstance(value, np.float64):
Expand Down
37 changes: 14 additions & 23 deletions operationsgateway_api/src/hdf_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def extract_hdf_data(file_path=None, hdf_file=None):

record_id = ObjectId()

record = {"_id": record_id, "metadata": {}, "channels": {}}
record = {"_id": record_id, "metadata": {}, "channels": []}
waveforms = []
images = {}

Expand All @@ -48,29 +48,28 @@ def extract_hdf_data(file_path=None, hdf_file=None):
# Adding metadata of shot
record["metadata"][metadata_key] = metadata_value

for column_name, value in hdf_file.items():
for channel_name, value in hdf_file.items():
channel_data = {"name": channel_name}

if value.attrs["channel_dtype"] == "image":
# TODO - should we use a directory per ID? Will need a bit of code added
# to create directories for each ID to prevent a FileNotFoundError when
# saving the images
# TODO - put as a constant/put elsewhere?
image_path = (
f"{Config.config.mongodb.image_store_directory}/"
f"{record['metadata']['shotnum']}_{column_name}.png"
f"{record['metadata']['shotnum']}_{channel_name}.png"
)
image_data = value["data"][()]
images[image_path] = image_data

record["channels"][column_name] = {
"metadata": {},
"image_path": image_path,
}
channel_data["image_path"] = image_path
elif value.attrs["channel_dtype"] == "rgb-image":
# TODO - when we don't want random noise anymore, we could probably
# combine this code with greyscale images, its the same implementation
image_path = (
f"{Config.config.mongodb.image_store_directory}/"
f"{record['metadata']['shotnum']}_{column_name}.png"
f"{record['metadata']['shotnum']}_{channel_name}.png"
)

# Gives random noise, where only example RGB I have sends full black
Expand All @@ -81,16 +80,11 @@ def extract_hdf_data(file_path=None, hdf_file=None):
size=(300, 400, 3),
dtype=np.uint8,
)
# image_data = value["data"][()]
images[image_path] = image_data

record["channels"][column_name] = {
"metadata": {},
"image_path": image_path,
}
channel_data["image_path"] = image_path
elif value.attrs["channel_dtype"] == "scalar":
record["channels"][column_name] = {"metadata": {}, "data": None}
record["channels"][column_name]["data"] = value["data"][()]
channel_data["data"] = value["data"][()]
elif value.attrs["channel_dtype"] == "waveform":
# Create a object ID here so it can be assigned to the waveform document
# and the record before data insertion. This way, we can send the data
Expand All @@ -99,20 +93,17 @@ def extract_hdf_data(file_path=None, hdf_file=None):
# which wouldn't be as efficient
waveform_id = ObjectId()
log.debug("Waveform ID: %s", waveform_id)
record["channels"][column_name] = {
"metadata": {},
"waveform_id": waveform_id,
}
channel_data["waveform_id"] = waveform_id

waveforms.append(
{"_id": waveform_id, "x": value["x"][()], "y": value["y"][()]},
)

# Adding channel metadata
for column_metadata_key, column_metadata_value in value.attrs.items():
record["channels"][column_name]["metadata"][
column_metadata_key
] = column_metadata_value
channel_data["metadata"] = dict(value.attrs)

# Adding the processed channel to the record
record["channels"].append(channel_data)

return record, waveforms, images

Expand Down
27 changes: 17 additions & 10 deletions operationsgateway_api/src/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,14 @@ def create_image_thumbnails(image_paths):


def store_image_thumbnails(record, thumbnails):
# TODO - need to store thumbnails from waveforms - need to create them first
for image_path, thumbnail in thumbnails.items():
# TODO - extracting the channel name from the path should be thoroughly unit
# tested. Probably best to put this code into its own function
shot_channel = image_path.split("/")[-1].split("_")[1:]
channel_name = "_".join(shot_channel).split(".")[0]

record["channels"][channel_name]["thumbnail"] = thumbnail
channel = search_for_channel(record, channel_name)
channel["thumbnail"] = thumbnail


def create_image_plot(x, y, buf):
Expand All @@ -115,14 +115,15 @@ def convert_image_to_base64(image: Image.Image) -> bytes:


def create_waveform_thumbnails(waveforms):
# TODO - could probably combine image and waveform thumbnail creation into a single
# function if the path was given directly/abstracted away
thumbnails = {}

for waveform in waveforms:
with BytesIO() as plot_buf:
# TODO - S307 linting error
create_image_plot(
list(eval(waveform["x"])), list(eval(waveform["y"])), plot_buf,
list(eval(waveform["x"])),
list(eval(waveform["y"])),
plot_buf,
)
waveform_image = Image.open(plot_buf)
create_thumbnail(waveform_image, (100, 100))
Expand All @@ -137,21 +138,27 @@ def store_waveform_thumbnails(record, thumbnails):
# function really. Or combining it with the image version of this function so we can
# abstract fetching the channel name
for _id, thumbnail in thumbnails.items():
for channel_name, value in record["channels"].items():
for channel in record["channels"]:
try:
if ObjectId(_id) == value["waveform_id"]:
record["channels"][channel_name]["thumbnail"] = thumbnail
if ObjectId(_id) == channel["waveform_id"]:
channel["thumbnail"] = thumbnail
except KeyError:
# A KeyError here will be because the channel isn't a waveform. This is
# normal behaviour and is acceptable to pass
pass


def truncate_thumbnail_output(record):
for value in record["channels"].values():
for channel in record["channels"]:
try:
value["thumbnail"] = value["thumbnail"][:50]
channel["thumbnail"] = channel["thumbnail"][:50]
except KeyError:
# If there's no thumbnails (e.g. if channel isn't an image or waveform) then
# a KeyError will be raised. This is normal behaviour, so acceptable to pass
pass


def search_for_channel(record, channel_name):
for channel in record["channels"]:
if channel_name in channel["name"]:
return channel
11 changes: 4 additions & 7 deletions operationsgateway_api/src/mongo/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def get_collection_object(collection_name):

return getattr(ConnectionInstance.db_connection, collection_name)

# TODO - might need to make .records more generic if we use more than one collection
@staticmethod
def find(
collection_name="images",
Expand All @@ -32,7 +31,7 @@ def find(
projection=None, # noqa: B006
):
"""
Creates a query to find documents in the records collection based on filters
Creates a query to find documents a collection based on filters
Due to Motor being asynchronous, the query is executed in `query_to_list()`, not
in this function
Expand Down Expand Up @@ -117,11 +116,10 @@ async def update_one(collection_name, filter_={}, update={}): # noqa: B006
update,
)

# TODO - insert_many() needed?
@staticmethod
async def insert_one(collection_name, data):
"""
Using the input data, insert a single document into the records collection
Using the input data, insert a single document into a given collection
"""

log.info("Sending insert_one() to MongoDB, collection: %s", collection_name)
Expand All @@ -141,11 +139,10 @@ async def insert_many(collection_name, data):
return await collection.insert_many(data)

@staticmethod
# TODO - collection could be better named
def get_inserted_id(collection):
def get_inserted_id(document):
"""
Get the ID of the newly added document. This is likely to be used in
correpsondance with `insert_one()`
"""

return str(collection.inserted_id)
return str(document.inserted_id)
1 change: 0 additions & 1 deletion operationsgateway_api/src/routes/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ async def get_full_image(
string_response: bool = False,
):
if string_response:
# TODO - could we make this async? Benefits?
with open(
f"{Config.config.mongodb.image_store_directory}/{shot_num}_{channel_name}"
".png",
Expand Down
3 changes: 2 additions & 1 deletion operationsgateway_api/src/routes/waveforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ async def get_waveform_by_id(id_: str):
{"_id": DataEncoding.encode_object_id(id_)},
)

# TODO - need to make that model more generic, not specific to records
# TODO - need to make that model more generic, not specific to records. Or make a
# separate model probably
return Record.construct(waveform.keys(), **waveform)

0 comments on commit d0324ac

Please sign in to comment.