From 4ce312f3879b14e741da5b9c48165945a05d135b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Fri, 6 Sep 2024 12:34:17 -0600 Subject: [PATCH] feat(taps): SQL taps now emit schemas with `maxLength` when applicable (#2651) --- singer_sdk/connectors/sql.py | 7 +++---- singer_sdk/streams/sql.py | 2 -- tests/core/test_connector_sql.py | 1 - tests/samples/conftest.py | 5 +++-- tests/samples/test_tap_sqlite.py | 14 +++++++++----- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index 697a74cb2..949ec523f 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -166,15 +166,14 @@ def float_to_jsonschema(self, column_type: sa.types.Numeric) -> dict: # noqa: A return th.NumberType.type_dict # type: ignore[no-any-return] @to_jsonschema.register - def string_to_jsonschema(self, column_type: sa.types.String) -> dict: # noqa: ARG002, PLR6301 + def string_to_jsonschema(self, column_type: sa.types.String) -> dict: # noqa: PLR6301 """Return a JSON Schema representation of a generic string type. Args: column_type (:column_type:`String`): The column type. """ - # TODO: Enable support for maxLength. - # if sa_type.length: - # return StringType(max_length=sa_type.length).type_dict # noqa: ERA001 + if column_type.length: + return th.StringType(max_length=column_type.length).type_dict # type: ignore[no-any-return] return th.StringType.type_dict # type: ignore[no-any-return] @to_jsonschema.register diff --git a/singer_sdk/streams/sql.py b/singer_sdk/streams/sql.py index 2877a505b..de6447cd2 100644 --- a/singer_sdk/streams/sql.py +++ b/singer_sdk/streams/sql.py @@ -210,8 +210,6 @@ def get_records(self, context: Context | None) -> t.Iterable[dict[str, t.Any]]: with self.connector._connect() as conn: # noqa: SLF001 for record in conn.execute(query).mappings(): - # TODO: Standardize record mapping type - # https://github.com/meltano/sdk/issues/2096 transformed_record = self.post_process(dict(record)) if transformed_record is None: # Record filtered out during post_process() diff --git a/tests/core/test_connector_sql.py b/tests/core/test_connector_sql.py index 5b866366a..f76f30525 100644 --- a/tests/core/test_connector_sql.py +++ b/tests/core/test_connector_sql.py @@ -405,7 +405,6 @@ def test_fully_qualified_name_empty_error(): pytest.param( sa.types.VARCHAR(length=127), {"type": ["string"], "maxLength": 127}, - marks=pytest.mark.xfail, id="varchar-length", ), pytest.param(sa.types.TEXT(), {"type": ["string"]}, id="text"), diff --git a/tests/samples/conftest.py b/tests/samples/conftest.py index 90cb80dec..6580c0d0c 100644 --- a/tests/samples/conftest.py +++ b/tests/samples/conftest.py @@ -29,14 +29,15 @@ def _sqlite_sample_db(sqlite_connector): f""" CREATE TABLE t{t} ( c1 int PRIMARY KEY NOT NULL, - c2 varchar(10) NOT NULL + c2 varchar(10) NOT NULL, + c3 text NOT NULL ) """ ), ) for x in range(100): conn.execute( - sa.text(f"INSERT INTO t{t} VALUES ({x}, 'x={x}')"), # noqa: S608 + sa.text(f"INSERT INTO t{t} VALUES ({x}, 'x={x}', 'y={x}')"), # noqa: S608 ) diff --git a/tests/samples/test_tap_sqlite.py b/tests/samples/test_tap_sqlite.py index b019d9449..ff83de338 100644 --- a/tests/samples/test_tap_sqlite.py +++ b/tests/samples/test_tap_sqlite.py @@ -70,7 +70,7 @@ def test_sqlite_discovery(sqlite_sample_tap: SQLTap): sqlite_sample_tap.sync_all() stream = t.cast(SQLStream, sqlite_sample_tap.streams["main-t1"]) schema = stream.schema - assert len(schema["properties"]) == 2 + assert len(schema["properties"]) == 3 assert stream.name == stream.tap_stream_id == "main-t1" md_map = MetadataMapping.from_iterable(stream.catalog_entry["metadata"]) @@ -90,13 +90,17 @@ def test_sqlite_discovery(sqlite_sample_tap: SQLTap): def test_sqlite_input_catalog(sqlite_sample_tap: SQLTap): sqlite_sample_tap.sync_all() stream = t.cast(SQLStream, sqlite_sample_tap.streams["main-t1"]) - assert len(stream.schema["properties"]) == 2 - assert len(stream.stream_maps[0].transformed_schema["properties"]) == 2 + assert len(stream.schema["properties"]) == 3 + assert len(stream.stream_maps[0].transformed_schema["properties"]) == 3 for schema in [stream.schema, stream.stream_maps[0].transformed_schema]: - assert len(schema["properties"]) == 2 + assert len(schema["properties"]) == 3 assert schema["properties"]["c1"] == {"type": ["integer"]} - assert schema["properties"]["c2"] == {"type": ["string", "null"]} + assert schema["properties"]["c2"] == { + "type": ["string", "null"], + "maxLength": 10, + } + assert schema["properties"]["c3"] == {"type": ["string", "null"]} assert stream.name == stream.tap_stream_id == "main-t1" md_map = MetadataMapping.from_iterable(stream.catalog_entry["metadata"])