From a14719e3da6a6dd19de239b96f7d722aea8cb668 Mon Sep 17 00:00:00 2001 From: munrojm Date: Wed, 28 Feb 2024 13:53:45 -0800 Subject: [PATCH] Fix s3 decoding --- mp_api/client/core/client.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mp_api/client/core/client.py b/mp_api/client/core/client.py index d3d78210c..07529cd85 100644 --- a/mp_api/client/core/client.py +++ b/mp_api/client/core/client.py @@ -394,12 +394,10 @@ def _query_open_data( ) if "jsonl" in key: - data = str([doc.strip() for doc in file.read().strip().split("\n")]) - data = data.replace("'", "").replace("\\", "\\\\") + decoded_data = [decoder(jline) for jline in file.read().splitlines()] else: - data = file.read() + decoded_data = decoder(file.read()) - decoded_data = decoder(data) decoded_data = ( [decoded_data] if not isinstance(decoded_data, list) else decoded_data ) @@ -466,6 +464,7 @@ def _query_resource( ) and num_chunks is None and "substrates" not in self.suffix + and "phonon" not in self.suffix ) if fields: @@ -504,7 +503,11 @@ def _query_resource( f"{suffix}" if is_tasks else f"collections/{db_version}/{suffix}" ) objects = self.s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix) - keys = [doc["Key"] for doc in objects["Contents"]] + keys = ( + [doc["Key"] for doc in objects["Contents"]] + if "Contents" in objects + else [] + ) decoder = MontyDecoder().decode if self.monty_decode else json.loads