diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 20bed6099cdae3..0382f821a31ca3 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -307,6 +307,7 @@ module.exports = { }, { "DataHub Cloud Release History": [ + "docs/managed-datahub/release-notes/v_0_3_5", "docs/managed-datahub/release-notes/v_0_3_4", "docs/managed-datahub/release-notes/v_0_3_3", "docs/managed-datahub/release-notes/v_0_3_2", diff --git a/docs/managed-datahub/release-notes/v_0_3_5.md b/docs/managed-datahub/release-notes/v_0_3_5.md new file mode 100644 index 00000000000000..468f2bd59a918b --- /dev/null +++ b/docs/managed-datahub/release-notes/v_0_3_5.md @@ -0,0 +1,33 @@ +# v0.3.5 +--- + +Release Availability Date +--- +02-Sep-2024 + +Recommended CLI/SDK +--- +- `v0.14.0.2` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.14.0.2 + +If you are using an older CLI/SDK version, then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, GitHub Actions, Airflow, in Python SDK somewhere, Java SDK, etc. This is a strong recommendation to upgrade, as we keep on pushing fixes in the CLI, and it helps us support you better. + +## Release Changelog +--- + +- All changes in https://github.com/datahub-project/datahub/releases/tag/v0.14.0.2 + - Note Breaking Changes: https://datahubproject.io/docs/how/updating-datahub/#0140 + +- Product changes + - Misc fixes and improvements for the Snowflake Tag Propagation Automation (Beta) + - Misc fixes and improvements for the Glossary Term Tag Propagation Automation (Beta) + - Misc fixes and improvements for the Column Docs Propagation Automation (Beta) + - Minor UX improvements on the groups profile page + - Add 'Explore All' button to search dropdown permanently + - Add toggle to filter out transformations in lineage viz + - Misc fixes and minor improvements around the subscriptions and slack integrations experience + - The new slack ingestion source enables one-click subscriptions for your users by automatically hydrating users' memberID. New users who sign up will also automatically have their memberIDs hydrated. + - Please reach out to the Acryl team to get assistance in setting this up. + - Installing or Re-installing the slackbot will now enable the `/datahub` command and ensure your `botToken` is compatible with the new slack ingestion source. You can reach out to the Acryl team for assistance with re-installation. + +- Ingestion changes + - New configuration for dbt lineage, "merges" sources with their sibling in viz: set `prefer_sql_parser_lineage` and `skip_sources_in_lineage` in ingestion; set flag `HIDE_DBT_SOURCE_IN_LINEAGE=true` in gms diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 22ff8025aa0a06..cbe3a6c250c1e7 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -301,6 +301,7 @@ databricks = { # 0.1.11 appears to have authentication issues with azure databricks + # 0.22.0 has support for `include_browse` in metadata list apis "databricks-sdk>=0.30.0", "pyspark~=3.3.0", "requests", diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/hive_metastore_proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/hive_metastore_proxy.py index c99fe3b09c5bb5..eea10d940bd1c8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/hive_metastore_proxy.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/hive_metastore_proxy.py @@ -135,8 +135,8 @@ def get_table_names(self, schema_name: str) -> List[str]: def get_view_names(self, schema_name: str) -> List[str]: try: rows = self._execute_sql(f"SHOW VIEWS FROM `{schema_name}`") - # 3 columns - database, tableName, isTemporary - return [row.tableName for row in rows] + # 4 columns - namespace, viewName, isTemporary, isMaterialized + return [row.viewName for row in rows] except Exception as e: self.report.report_warning("Failed to get views for schema", schema_name) logger.warning( diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py index 112acd8101297f..bd987c2da7c764 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py @@ -109,7 +109,7 @@ def __init__( self.hive_metastore_proxy = hive_metastore_proxy def check_basic_connectivity(self) -> bool: - return bool(self._workspace_client.catalogs.list()) + return bool(self._workspace_client.catalogs.list(include_browse=True)) def assigned_metastore(self) -> Optional[Metastore]: response = self._workspace_client.metastores.summary() @@ -119,7 +119,7 @@ def catalogs(self, metastore: Optional[Metastore]) -> Iterable[Catalog]: if self.hive_metastore_proxy: yield self.hive_metastore_proxy.hive_metastore_catalog(metastore) - response = self._workspace_client.catalogs.list() + response = self._workspace_client.catalogs.list(include_browse=True) if not response: logger.info("Catalogs not found") return @@ -131,7 +131,9 @@ def catalogs(self, metastore: Optional[Metastore]) -> Iterable[Catalog]: def catalog( self, catalog_name: str, metastore: Optional[Metastore] ) -> Optional[Catalog]: - response = self._workspace_client.catalogs.get(catalog_name) + response = self._workspace_client.catalogs.get( + catalog_name, include_browse=True + ) if not response: logger.info(f"Catalog {catalog_name} not found") return None @@ -148,7 +150,9 @@ def schemas(self, catalog: Catalog) -> Iterable[Schema]: ): yield from self.hive_metastore_proxy.hive_metastore_schemas(catalog) return - response = self._workspace_client.schemas.list(catalog_name=catalog.name) + response = self._workspace_client.schemas.list( + catalog_name=catalog.name, include_browse=True + ) if not response: logger.info(f"Schemas not found for catalog {catalog.id}") return @@ -166,7 +170,9 @@ def tables(self, schema: Schema) -> Iterable[Table]: return with patch("databricks.sdk.service.catalog.TableInfo", TableInfoWithGeneration): response = self._workspace_client.tables.list( - catalog_name=schema.catalog.name, schema_name=schema.name + catalog_name=schema.catalog.name, + schema_name=schema.name, + include_browse=True, ) if not response: logger.info(f"Tables not found for schema {schema.id}") diff --git a/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py b/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py index 22a48efdec41d7..c078f1b77fd1be 100644 --- a/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py +++ b/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py @@ -276,6 +276,9 @@ def register_mock_data(workspace_client): TableEntry = namedtuple("TableEntry", ["database", "tableName", "isTemporary"]) +ViewEntry = namedtuple( + "ViewEntry", ["namespace", "viewName", "isTemporary", "isMaterialized"] +) def mock_hive_sql(query): @@ -418,7 +421,7 @@ def mock_hive_sql(query): TableEntry("bronze_kambi", "view1", False), ] elif query == "SHOW VIEWS FROM `bronze_kambi`": - return [TableEntry("bronze_kambi", "view1", False)] + return [ViewEntry("bronze_kambi", "view1", False, False)] return []