From 548b1376c1aa65aa20bc203d83c21b28ddb1083c Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 9 Oct 2024 07:33:20 +0200 Subject: [PATCH] MongoDB: Fixed BSON decoding of `{"$date": 1180690093000}` timestamps --- src/commons_codec/transform/mongodb.py | 18 ++++++++++-------- .../transform/mongodb/test_mongodb_convert.py | 10 ++++++++-- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/commons_codec/transform/mongodb.py b/src/commons_codec/transform/mongodb.py index 744ed54..0220033 100644 --- a/src/commons_codec/transform/mongodb.py +++ b/src/commons_codec/transform/mongodb.py @@ -121,15 +121,17 @@ def decode_extended_json(self, value: t.Dict[str, t.Any]) -> t.Any: # Special handling for datetime representation in NUMBERLONG format (emulated depth-first). type_ = next(iter(value)) # Get key of first item in dictionary. - is_date_numberlong = type_ == "$date" and "$numberLong" in value["$date"] - if is_date_numberlong: - try: - out = dt.datetime.fromtimestamp(int(value["$date"]["$numberLong"]) / 1000, tz=dt.timezone.utc) - except ValueError as ex: - logger.error(f"Decoding legacy timestamp failed: {ex}. value={value}") - out = 0 - else: + if type_ == "$date" and isinstance(value["$date"], dict): + value = {"$date": int(value["$date"]["$numberLong"])} + + # Invoke BSON decoder. + try: out = object_hook(value) + except bson.errors.InvalidBSON as ex: + logger.error(f"Decoding BSON value failed: {ex}. value={value}") + out = None + if "Python int too large to convert to C int" in str(ex): + out = 0 is_bson = isinstance(out, all_bson_types()) diff --git a/tests/transform/mongodb/test_mongodb_convert.py b/tests/transform/mongodb/test_mongodb_convert.py index a3e5afa..9e6b298 100644 --- a/tests/transform/mongodb/test_mongodb_convert.py +++ b/tests/transform/mongodb/test_mongodb_convert.py @@ -104,7 +104,7 @@ class DateConversionCase: DateConversionCase( converter=MongoDBCrateDBConverter(), data_in={"$date": {"$numberLong": "1655210544987"}}, - data_out=dt.datetime(2022, 6, 14, 12, 42, 24, 987000, tzinfo=dt.timezone.utc), + data_out=dt.datetime(2022, 6, 14, 12, 42, 24, 987000), ), DateConversionCase( converter=MongoDBCrateDBConverter(timestamp_to_epoch=True, timestamp_use_milliseconds=True), @@ -124,7 +124,12 @@ class DateConversionCase: DateConversionCase( converter=MongoDBCrateDBConverter(timestamp_to_iso8601=True), data_in={"$date": {"$numberLong": "1655210544987"}}, - data_out="2022-06-14T12:42:24.987000+00:00", + data_out="2022-06-14T12:42:24.987000", + ), + DateConversionCase( + converter=MongoDBCrateDBConverter(timestamp_to_iso8601=True), + data_in={"$date": 1180690093000}, + data_out="2007-06-01T09:28:13", ), ] @@ -136,6 +141,7 @@ class DateConversionCase: "epochms-$date-legacy", "iso8601-$date-canonical", "iso8601-$date-legacy", + "iso8601-$date-ultra-legacy", ]