From 1ae695b558a4accdfe34599cc6cc6a082234a686 Mon Sep 17 00:00:00 2001 From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Mon, 21 Oct 2024 19:53:46 -0700 Subject: [PATCH] implement choice of delimiter for seed files (#1122) (#1380) * implement choice of delimiter for seed files * adding change log entry * implementation of test of TestBigQuerySeedWithUniqueDelimiter * Update dbt/adapters/bigquery/impl.py Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> * Update dbt/include/bigquery/macros/materializations/seed.sql Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> * Update dbt/include/bigquery/macros/materializations/seed.sql Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> * Update dbt/adapters/bigquery/impl.py Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> * Update .changes/unreleased/Fixes-20240226-233024.yaml --------- Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com> (cherry picked from commit 0d37ba6032d1c1d4d1661ebe53af5469ba9e6bd9) Co-authored-by: salimmoulouel <36620917+salimmoulouel@users.noreply.github.com> --- .../unreleased/Fixes-20240226-233024.yaml | 6 ++++ dbt/adapters/bigquery/impl.py | 6 ++-- .../bigquery/macros/materializations/seed.sql | 2 +- tests/functional/adapter/test_simple_seed.py | 36 ++++++++++++++++++- 4 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240226-233024.yaml diff --git a/.changes/unreleased/Fixes-20240226-233024.yaml b/.changes/unreleased/Fixes-20240226-233024.yaml new file mode 100644 index 000000000..efb1b077c --- /dev/null +++ b/.changes/unreleased/Fixes-20240226-233024.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: make seed delimiter configurable via `field_delimeter` in model config +time: 2024-02-26T23:30:24.141213+01:00 +custom: + Author: salimmoulouel + Issue: "1119" diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 700c03719..36222f44f 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -654,7 +654,9 @@ def alter_table_add_columns(self, relation, columns): client.update_table(new_table, ["schema"]) @available.parse_none - def load_dataframe(self, database, schema, table_name, agate_table, column_override): + def load_dataframe( + self, database, schema, table_name, agate_table, column_override, field_delimiter + ): bq_schema = self._agate_to_schema(agate_table, column_override) conn = self.connections.get_thread_connection() client = conn.handle @@ -664,7 +666,7 @@ def load_dataframe(self, database, schema, table_name, agate_table, column_overr load_config = google.cloud.bigquery.LoadJobConfig() load_config.skip_leading_rows = 1 load_config.schema = bq_schema - + load_config.field_delimiter = field_delimiter with open(agate_table.original_abspath, "rb") as f: job = client.load_table_from_file(f, table_ref, rewind=True, job_config=load_config) diff --git a/dbt/include/bigquery/macros/materializations/seed.sql b/dbt/include/bigquery/macros/materializations/seed.sql index 6ac7337f3..c89d00598 100644 --- a/dbt/include/bigquery/macros/materializations/seed.sql +++ b/dbt/include/bigquery/macros/materializations/seed.sql @@ -11,7 +11,7 @@ {%- set column_override = model['config'].get('column_types', {}) -%} {{ adapter.load_dataframe(model['database'], model['schema'], model['alias'], - agate_table, column_override) }} + agate_table, column_override, model['config']['delimiter']) }} {% call statement() %} alter table {{ this.render() }} set {{ bigquery_table_options(config, model) }} diff --git a/tests/functional/adapter/test_simple_seed.py b/tests/functional/adapter/test_simple_seed.py index b01f99346..5ec19d420 100644 --- a/tests/functional/adapter/test_simple_seed.py +++ b/tests/functional/adapter/test_simple_seed.py @@ -5,7 +5,6 @@ from dbt.tests.adapter.simple_seed.test_seed import BaseTestEmptySeed from dbt.tests.adapter.utils.base_utils import run_dbt - _SEED_CONFIGS_CSV = """ seed_id,stuff 1,a @@ -156,3 +155,38 @@ def test__bigquery_seed_table_with_labels_config_bigquery(self, project): class TestBigQueryEmptySeed(BaseTestEmptySeed): pass + + +class TestBigQuerySeedWithUniqueDelimiter(TestSimpleSeedConfigs): + @pytest.fixture(scope="class") + def seeds(self): + return { + "seed_enabled.csv": seeds__enabled_in_config_csv.replace(",", "|"), + "seed_tricky.csv": seeds__tricky_csv.replace(",", "\t"), + "seed_configs.csv": _SEED_CONFIGS_CSV, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "config-version": 2, + "seeds": { + "test": { + "enabled": False, + "quote_columns": True, + "seed_enabled": { + "enabled": True, + "+column_types": self.seed_enabled_types(), + "delimiter": "|", + }, + "seed_tricky": { + "enabled": True, + "+column_types": self.seed_tricky_types(), + "delimiter": "\t", + }, + "seed_configs": { + "enabled": True, + }, + }, + }, + }