From 05e5ffe27008d69e5f4c1b968e5cc6ba314ccd96 Mon Sep 17 00:00:00 2001 From: eric Date: Tue, 11 Feb 2025 09:50:12 -0700 Subject: [PATCH] ignore type --- .../data_imports/pipelines/sql_database/arrow_helpers.py | 4 +++- .../pipelines/sql_database/test/test_arrow_helpers.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/posthog/temporal/data_imports/pipelines/sql_database/arrow_helpers.py b/posthog/temporal/data_imports/pipelines/sql_database/arrow_helpers.py index 8b00de98415ba..1feb778955271 100644 --- a/posthog/temporal/data_imports/pipelines/sql_database/arrow_helpers.py +++ b/posthog/temporal/data_imports/pipelines/sql_database/arrow_helpers.py @@ -123,10 +123,12 @@ def row_tuples_to_arrow(rows: Sequence[RowAny], columns: TTableSchemaColumns, tz if issubclass(py_type, bytes) or issubclass(py_type, str): # For bytes/str columns, ensure any dict values are serialized to JSON strings - columnar_known_types[field.name] = [ + # Convert to numpy array after processing + processed_values = [ None if x is None else json_dumps(x) if isinstance(x, dict | list) else x for x in columnar_known_types[field.name] ] + columnar_known_types[field.name] = np.array(processed_values, dtype=object) # If there are unknown type columns, first create a table to infer their types if columnar_unknown_types: diff --git a/posthog/temporal/data_imports/pipelines/sql_database/test/test_arrow_helpers.py b/posthog/temporal/data_imports/pipelines/sql_database/test/test_arrow_helpers.py index 141179ee8ab6e..5e5264eb3b692 100644 --- a/posthog/temporal/data_imports/pipelines/sql_database/test/test_arrow_helpers.py +++ b/posthog/temporal/data_imports/pipelines/sql_database/test/test_arrow_helpers.py @@ -29,7 +29,7 @@ def test_row_tuples_to_arrow_string_column_with_dict(): columns = {"string_col": {"name": "string_col", "data_type": "text", "nullable": True}} # This should now succeed and serialize the dictionary to JSON - table = row_tuples_to_arrow(rows, columns, "UTC") + table = row_tuples_to_arrow(rows, columns, "UTC") # type: ignore # Verify the results assert table.column("string_col")[0].as_py() == ""