From 57380842df10e82cad204cbc60878e29dc5ec72a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Thu, 19 Sep 2024 16:31:13 -0600 Subject: [PATCH] Use JSON schema `title` for setting labels --- hub_utils/meltano_util.py | 3 +- tests/data/tap_with_rich_config_schema.json | 33 ++++++++++++ tests/test_meltano_utils.py | 56 +++++++++++++++------ 3 files changed, 77 insertions(+), 15 deletions(-) create mode 100644 tests/data/tap_with_rich_config_schema.json diff --git a/hub_utils/meltano_util.py b/hub_utils/meltano_util.py index f3b8de6..d4e1b55 100644 --- a/hub_utils/meltano_util.py +++ b/hub_utils/meltano_util.py @@ -277,6 +277,7 @@ def _parse_sdk_about_settings(sdk_about_dict, enforce_desc=False): base_required = settings_raw.get("required", []) for settings in MeltanoUtil._traverse_schema_properties(settings_raw): name = settings.get("name") + title = settings.get("title") description = MeltanoUtil._handle_description( MeltanoUtil._clean_description(settings.get("description")), name, @@ -284,7 +285,7 @@ def _parse_sdk_about_settings(sdk_about_dict, enforce_desc=False): ) setting_details = { "name": name, - "label": MeltanoUtil._get_label(name), + "label": title or MeltanoUtil._get_label(name), "description": description, } kind = MeltanoUtil._get_kind_from_type( diff --git a/tests/data/tap_with_rich_config_schema.json b/tests/data/tap_with_rich_config_schema.json new file mode 100644 index 0000000..dbff7dc --- /dev/null +++ b/tests/data/tap_with_rich_config_schema.json @@ -0,0 +1,33 @@ +{ + "name": "tap-example", + "description": "Singer.io tap for extracting data from example", + "version": "0.1.0", + "sdk_version": "0.40.0", + "capabilities": [ + "catalog", + "state", + "discover", + "about", + "stream-maps", + "schema-flattening" + ], + "settings": { + "type": "object", + "properties": { + "username": { + "type": "string", + "description": "The username to use when authenticating with the API" + }, + "password": { + "type": "string", + "description": "The password to use when authenticating with the API" + }, + "cmo": { + "type": ["string", "null"], + "title": "Client Management Organization", + "description": "The client management organization to use when authenticating with the API" + } + }, + "required": ["username", "password"] + } +} diff --git a/tests/test_meltano_utils.py b/tests/test_meltano_utils.py index f49172a..1f489ea 100644 --- a/tests/test_meltano_utils.py +++ b/tests/test_meltano_utils.py @@ -124,6 +124,34 @@ def test_sdk_about_parsing_2(): ] +def test_sdk_about_parsing_3(): + sdk_about_dict = _read_data('tap_with_rich_config_schema.json') + + settings, _, _ = MeltanoUtil._parse_sdk_about_settings(sdk_about_dict) + + assert settings == [ + { + "name": "username", + "label": "Username", + "description": "The username to use when authenticating with the API", + "kind": "string" + }, + { + "name": "password", + "label": "Password", + "description": "The password to use when authenticating with the API", + "kind": "password", + "sensitive": True, + }, + { + "name": "cmo", + "label": "Client Management Organization", + "description": "The client management organization to use when authenticating with the API", + "kind": "string" + }, + ] + + def test_sdk_about_parsing_airbyte(): sdk_about_dict = _read_data('airbyte_s3_about.json') @@ -145,39 +173,39 @@ def test_sdk_about_parsing_airbyte(): }, { "name": "connector_config.dataset", - "label": "Connector Config Dataset", + "label": "Output Stream Name", "description": "The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.", "kind": "string" }, { "name": "connector_config.path_pattern", - "label": "Connector Config Path Pattern", + "label": "Pattern of files to replicate", "description": "A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See this page to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern ** to pick up all files.", "kind": "string" }, # Format { "name": "connector_config.format.filetype", - "label": "Connector Config Format Filetype", + "label": "Filetype", "description": "Csv, Parquet", "kind": "string" }, { "name": "connector_config.format.delimiter", - "label": "Connector Config Format Delimiter", + "label": "Delimiter", "description": "The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\\t'.", "kind": "string", "value": "," }, { "name": "connector_config.format.columns", - "label": "Connector Config Format Columns", + "label": "Selected Columns", "description": "If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.", "kind": "array" }, { "name": "connector_config.format.buffer_size", - "label": "Connector Config Format Buffer Size", + "label": "Buffer Size", "description": "Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide.", "kind": "integer", "value": 2 @@ -185,35 +213,35 @@ def test_sdk_about_parsing_airbyte(): # End after tweaks { "name": "connector_config.schema", - "label": "Connector Config Schema", + "label": "Manually enforced data schema", "description": "Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of { \"column\" : \"type\" }, where types are valid JSON Schema datatypes. Leave as {} to auto-infer the schema.", "kind": "string", "value": "{}" }, { "name": "connector_config.provider.bucket", - "label": "Connector Config Provider Bucket", + "label": "Bucket", "description": "Name of the S3 bucket where the file(s) exist.", "kind": "password", "sensitive": True, }, { "name": "connector_config.provider.aws_access_key_id", - "label": "Connector Config Provider AWS Access Key ID", + "label": "AWS Access Key ID", "description": "In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.", "kind": "password", "sensitive": True, }, { "name": "connector_config.provider.aws_secret_access_key", - "label": "Connector Config Provider AWS Secret Access Key", + "label": "AWS Secret Access Key", "description": "In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.", "kind": "password", "sensitive": True, }, { "name": "connector_config.provider.path_prefix", - "label": "Connector Config Provider Path Prefix", + "label": "Path Prefix", "description": "By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate.", "kind": "password", "sensitive": True, @@ -221,7 +249,7 @@ def test_sdk_about_parsing_airbyte(): }, { "name": "connector_config.provider.endpoint", - "label": "Connector Config Provider Endpoint", + "label": "Endpoint", "description": "Endpoint to an S3 compatible service. Leave empty to use AWS.", "kind": "password", "sensitive": True, @@ -294,7 +322,7 @@ def test_airbyte_array_enum_array(): assert settings == [ { "name": "connector_config.metrics", - "label": "Connector Config Metrics", + "label": "Metrics to ingest", "description": "Select at least one metric to query.", "kind": "array", } @@ -316,7 +344,7 @@ def test_airbyte_array_enum_string(): assert settings == [ { "name": "connector_config.region", - "label": "Connector Config Region", + "label": "AWS Region", "description": "AWS Region of the SQS Queue", "kind": "options", "options": [