diff --git a/docs/content/faq/execution.md b/docs/content/faq/execution.md index d60a075..eae7d3d 100644 --- a/docs/content/faq/execution.md +++ b/docs/content/faq/execution.md @@ -12,7 +12,7 @@ Please refer to the [Getting Started]({{%relref "getting_started/_index.md" %}}) **Q. How do I create metadata DLT-META ?** DLT-META needs following metadata files: -- [Onboarding File](https://github.com/databrickslabs/dlt-meta/blob/main/examples/onboarding.json) captures input/output metadata +- [Onboarding File](https://github.com/databrickslabs/dlt-meta/blob/main/examples/onboarding.template) captures input/output metadata - [Data Quality Rules File](https://github.com/databrickslabs/dlt-meta/tree/main/examples/dqe) captures data quality rules - [Silver transformation File](https://github.com/databrickslabs/dlt-meta/blob/main/examples/silver_transformations.json) captures processing logic as sql @@ -28,7 +28,7 @@ DLT-META uses data_flow_group to launch DLT pipelines, so all the tables belongs **Q. Can we run onboarding for bronze layer only?** Yes! Please follow below steps: -1. Bronze Metadata preparation ([example](https://github.com/databrickslabs/dlt-meta/blob/main/integration_tests/conf/cloudfiles-onboarding_A2.template)) +1. Bronze Metadata preparation ([example](https://github.com/databrickslabs/dlt-meta/blob/main/examples/bronze_onboarding.template)) 2. Onboarding Job - Option#1: [DLT-META CLI](https://databrickslabs.github.io/dlt-meta/getting_started/dltmeta_cli/#onboardjob) - Option#2: [Manual Job](https://databrickslabs.github.io/dlt-meta/getting_started/dltmeta_manual/#onboardjob) @@ -37,7 +37,7 @@ Yes! Please follow below steps: { "onboard_layer": "bronze", "database": "dlt_demo", - "onboarding_file_path": "dbfs:/onboarding_files/users_onboarding.json", + "onboarding_file_path": "dbfs:/dlt-meta/conf/onboarding.json", "bronze_dataflowspec_table": "bronze_dataflowspec_table", "import_author": "Ravi", "version": "v1", @@ -50,7 +50,7 @@ Yes! Please follow below steps: ``` onboarding_params_map = { "database": "uc_name.dlt_demo", - "onboarding_file_path": "dbfs:/onboarding_files/users_onboarding.json", + "onboarding_file_path": "dbfs:/dlt-meta/conf/onboarding.json", "bronze_dataflowspec_table": "bronze_dataflowspec_table", "overwrite": "True", "env": "dev", diff --git a/docs/content/getting_started/dltmeta_manual.md b/docs/content/getting_started/dltmeta_manual.md index 0d8f4e1..0b64804 100644 --- a/docs/content/getting_started/dltmeta_manual.md +++ b/docs/content/getting_started/dltmeta_manual.md @@ -33,7 +33,7 @@ draft: false { "onboard_layer": "bronze_silver", "database": "dlt_demo", - "onboarding_file_path": "dbfs:/onboarding_files/users_onboarding.json", + "onboarding_file_path": "dbfs:/dlt-meta/conf/onboarding.json", "silver_dataflowspec_table": "silver_dataflowspec_table", "silver_dataflowspec_path": "dbfs:/onboarding_tables_cdc/silver", "bronze_dataflowspec_table": "bronze_dataflowspec_table", @@ -51,7 +51,7 @@ draft: false { "onboard_layer": "bronze_silver", "database": "uc_name.dlt_demo", - "onboarding_file_path": "dbfs:/onboarding_files/users_onboarding.json", + "onboarding_file_path": "dbfs:/dlt-meta/conf/onboarding.json", "silver_dataflowspec_table": "silver_dataflowspec_table", "bronze_dataflowspec_table": "bronze_dataflowspec_table", "import_author": "Ravi", @@ -78,7 +78,7 @@ Alternatly you can enter keyword arguments, click + Add and enter a key and valu ```python onboarding_params_map = { "database": "dlt_demo", - "onboarding_file_path": "dbfs:/onboarding_files/users_onboarding.json", + "onboarding_file_path": "dbfs:/dlt-meta/conf/onboarding.json", "bronze_dataflowspec_table": "bronze_dataflowspec_table", "bronze_dataflowspec_path": "dbfs:/onboarding_tables_cdc/bronze", "silver_dataflowspec_table": "silver_dataflowspec_table", @@ -96,7 +96,7 @@ OnboardDataflowspec(spark, onboarding_params_map).onboard_dataflow_specs() ```python onboarding_params_map = { "database": "uc_name.dlt_demo", - "onboarding_file_path": "dbfs:/onboarding_files/users_onboarding.json", + "onboarding_file_path": "dbfs:/dlt-meta/conf/onboarding.json",, "bronze_dataflowspec_table": "bronze_dataflowspec_table", "silver_dataflowspec_table": "silver_dataflowspec_table", "overwrite": "True", diff --git a/examples/bronze_onboarding.json b/examples/bronze_onboarding.template similarity index 57% rename from examples/bronze_onboarding.json rename to examples/bronze_onboarding.template index a6ebae6..60aa73c 100644 --- a/examples/bronze_onboarding.json +++ b/examples/bronze_onboarding.template @@ -7,7 +7,7 @@ "source_details": { "source_database": "APP", "source_table": "CUSTOMERS", - "source_path_it": "dbfs:/tmp/DLT-META//eedf48c2a67647f2b438ba4a3a757e7a/integration_tests/resources/data/customers_delta", + "source_path_it": "{dbfs_path}/resources/data/customers_delta", "source_metadata": { "include_autoloader_metadata_column": "True", "autoloader_metadata_col_name": "source_metadata", @@ -16,24 +16,24 @@ "input_file_path": "_metadata.file_path" } }, - "source_schema_path": "dbfs:/tmp/DLT-META//eedf48c2a67647f2b438ba4a3a757e7a/integration_tests/resources/customers.ddl" + "source_schema_path": "{dbfs_path}/resources/customers.ddl" }, - "bronze_database_it": "ravi_dlt_meta_uc.dlt_meta_bronze_it_eedf48c2a67647f2b438ba4a3a757e7a", + "bronze_database_it": "{uc_catalog_name}.{bronze_schema}", "bronze_table": "customers_delta", "bronze_reader_options": { "cloudFiles.format": "json", "cloudFiles.inferColumnTypes": "true", "cloudFiles.rescuedDataColumn": "_rescued_data" }, - "bronze_table_path_it": "dbfs:/tmp/DLT-META//eedf48c2a67647f2b438ba4a3a757e7a/data/bronze/customers_delta", + "bronze_table_path_it": "{dbfs_path}data/bronze/customers_delta", "bronze_table_properties": { "pipelines.autoOptimize.managed": "true", "pipelines.autoOptimize.zOrderCols": "id, email" }, - "bronze_data_quality_expectations_json_it": "dbfs:/tmp/DLT-META//eedf48c2a67647f2b438ba4a3a757e7a/integration_tests/conf/dqe/customers/bronze_data_quality_expectations.json", - "bronze_database_quarantine_it": "ravi_dlt_meta_uc.dlt_meta_bronze_it_eedf48c2a67647f2b438ba4a3a757e7a", + "bronze_data_quality_expectations_json_it": "{dbfs_path}/conf/dqe/customers/bronze_data_quality_expectations.json", + "bronze_database_quarantine_it": "{uc_catalog_name}.{bronze_schema}", "bronze_quarantine_table": "customers_delta_quarantine", - "bronze_quarantine_table_path_it": "dbfs:/tmp/DLT-META//eedf48c2a67647f2b438ba4a3a757e7a/data/bronze/customers_quarantine_delta", + "bronze_quarantine_table_path_it": "{dbfs_path}data/bronze/customers_quarantine_delta", "bronze_quarantine_table_properties": { "pipelines.reset.allowed": "false", "pipelines.autoOptimize.zOrderCols": "id, email" diff --git a/examples/onboarding.json b/examples/onboarding.template similarity index 100% rename from examples/onboarding.json rename to examples/onboarding.template diff --git a/integration_tests/conf/onboarding_A2.json b/integration_tests/conf/onboarding_A2.json deleted file mode 100644 index 832a61a..0000000 --- a/integration_tests/conf/onboarding_A2.json +++ /dev/null @@ -1 +0,0 @@ -[{"data_flow_id": "103", "data_flow_group": "A2", "source_system": "MYSQL", "source_format": "cloudFiles", "source_details": {"source_database": "APP", "source_table": "CUSTOMERS", "source_path_it": "dbfs:/tmp/DLT-META//eedf48c2a67647f2b438ba4a3a757e7a/integration_tests/resources/data/customers_delta", "source_metadata": {"include_autoloader_metadata_column": "True", "autoloader_metadata_col_name": "source_metadata", "select_metadata_cols": {"input_file_name": "_metadata.file_name", "input_file_path": "_metadata.file_path"}}, "source_schema_path": "dbfs:/tmp/DLT-META//eedf48c2a67647f2b438ba4a3a757e7a/integration_tests/resources/customers.ddl"}, "bronze_database_it": "ravi_dlt_meta_uc.dlt_meta_bronze_it_eedf48c2a67647f2b438ba4a3a757e7a", "bronze_table": "customers_delta", "bronze_reader_options": {"cloudFiles.format": "json", "cloudFiles.inferColumnTypes": "true", "cloudFiles.rescuedDataColumn": "_rescued_data"}, "bronze_table_path_it": "dbfs:/tmp/DLT-META//eedf48c2a67647f2b438ba4a3a757e7a/data/bronze/customers_delta", "bronze_table_properties": {"pipelines.autoOptimize.managed": "true", "pipelines.autoOptimize.zOrderCols": "id, email"}, "bronze_data_quality_expectations_json_it": "dbfs:/tmp/DLT-META//eedf48c2a67647f2b438ba4a3a757e7a/integration_tests/conf/dqe/customers/bronze_data_quality_expectations.json", "bronze_database_quarantine_it": "ravi_dlt_meta_uc.dlt_meta_bronze_it_eedf48c2a67647f2b438ba4a3a757e7a", "bronze_quarantine_table": "customers_delta_quarantine", "bronze_quarantine_table_path_it": "dbfs:/tmp/DLT-META//eedf48c2a67647f2b438ba4a3a757e7a/data/bronze/customers_quarantine_delta", "bronze_quarantine_table_properties": {"pipelines.reset.allowed": "false", "pipelines.autoOptimize.zOrderCols": "id, email"}}] \ No newline at end of file