datahub-project · david-leifker · Sep 12, 2024 · Sep 7, 2024
diff --git a/datahub-upgrade/src/main/resources/application.properties b/datahub-upgrade/src/main/resources/application.properties
@@ -3,3 +3,4 @@ management.health.neo4j.enabled=false
 ingestion.enabled=false
 spring.main.allow-bean-definition-overriding=true
 entityClient.impl=restli
+metadataChangeProposal.throttle.updateIntervalMs=0
diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java
@@ -1,12 +1,18 @@
 package com.linkedin.datahub.upgrade;
 
-import static org.testng.AssertJUnit.*;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertTrue;
 
 import com.linkedin.datahub.upgrade.restoreindices.RestoreIndices;
 import com.linkedin.datahub.upgrade.system.BlockingSystemUpgrade;
+import com.linkedin.metadata.dao.throttle.NoOpSensor;
+import com.linkedin.metadata.dao.throttle.ThrottleSensor;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
 import javax.inject.Named;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.test.context.ActiveProfiles;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
@@ -28,6 +34,10 @@ public class UpgradeCliApplicationTest extends AbstractTestNGSpringContextTests
 
   @Autowired private ESIndexBuilder esIndexBuilder;
 
+  @Qualifier("kafkaThrottle")
+  @Autowired
+  private ThrottleSensor kafkaThrottle;
+
   @Test
   public void testRestoreIndicesInit() {
     /*
@@ -46,4 +56,10 @@ public void testBuildIndicesInit() {
     assertFalse(
         esIndexBuilder.getElasticSearchConfiguration().getBuildIndices().isAllowDocCountMismatch());
   }
+
+  @Test
+  public void testNoThrottle() {
+    assertEquals(
+        new NoOpSensor(), kafkaThrottle, "No kafka throttle controls expected in datahub-upgrade");
+  }
 }
diff --git a/docs/authorization/policies.md b/docs/authorization/policies.md
@@ -146,15 +146,15 @@ These privileges are for DataHub operators to access & manage the administrative
 
 #### Access & Credentials
 
-| Platform Privileges                  | Description                                                                                                                                                                               |
-|--------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| Generate Personal Access Tokens      | Allow actor to generate personal access tokens for use with DataHub APIs.                                                                                                                 |
-| Manage Policies                      | Allow actor to create and remove access control policies. Be careful - Actors with this privilege are effectively super users.                                                            |
-| Manage Secrets                       | Allow actor to create & remove Secrets stored inside DataHub.                                                                                                                             |
-| Manage Users & Groups                | Allow actor to create, remove, and update users and groups on DataHub.                                                                                                                    |
-| Manage All Access Tokens             | Allow actor to create, list and revoke access tokens on behalf of users in DataHub. Be careful - Actors with this privilege are effectively super users that can impersonate other users. |
-| Manage User Credentials              | Allow actor to manage credentials for native DataHub users, including inviting new users and resetting passwords                                                                          |                                                                                                                                  |
-| Manage Connections                   | Allow actor to manage connections to external DataHub platforms.                                                                                                                          |
+| Platform Privileges             | Description                                                                                                                                                                               |
+|---------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Generate Personal Access Tokens | Allow actor to generate personal access tokens for use with DataHub APIs.                                                                                                                 |
+| Manage Policies                 | Allow actor to create and remove access control policies. Be careful - Actors with this privilege are effectively super users.                                                            |
+| Manage Secrets                  | Allow actor to create & remove Secrets stored inside DataHub.                                                                                                                             |
+| Manage Users & Groups           | Allow actor to create, remove, and update users and groups on DataHub.                                                                                                                    |
+| Manage All Access Tokens        | Allow actor to create, list and revoke access tokens on behalf of users in DataHub. Be careful - Actors with this privilege are effectively super users that can impersonate other users. |
+| Manage User Credentials         | Allow actor to manage credentials for native DataHub users, including inviting new users and resetting passwords                                                                          |                                                                                                                                  |
+| Manage Connections              | Allow actor to manage connections to external DataHub platforms.                                                                                                                          |
 
 #### Product Features
 
@@ -191,15 +191,16 @@ These privileges are for DataHub operators to access & manage the administrative
 
 #### System Management
 
-| Platform Privileges                           | Description                                                              |
-|-----------------------------------------------|--------------------------------------------------------------------------|
-| Restore Indices API[^1]                       | Allow actor to use the Restore Indices API.                              |                                                                                                                                                                                           |
-| Get Timeseries index sizes API[^1]            | Allow actor to use the get Timeseries indices size API.                  |
-| Truncate timeseries aspect index size API[^1] | Allow actor to use the API to truncate a timeseries index.               |
-| Get ES task status API[^1]                    | Allow actor to use the get task status API for an ElasticSearch task.    |
-| Enable/Disable Writeability API[^1]           | Allow actor to enable or disable GMS writeability for data migrations.   |
-| Apply Retention API[^1]                       | Allow actor to apply retention using the API.                            |
-| Analytics API access[^1]                      | Allow actor to use API read access to raw analytics data.                |
+| Platform Privileges                           | Description                                                            |
+|-----------------------------------------------|------------------------------------------------------------------------|
+| Restore Indices API[^1]                       | Allow actor to use the Restore Indices API.                            |                                                                                                                                                                                           |
+| Get Timeseries index sizes API[^1]            | Allow actor to use the get Timeseries indices size API.                |
+| Truncate timeseries aspect index size API[^1] | Allow actor to use the API to truncate a timeseries index.             |
+| Get ES task status API[^1]                    | Allow actor to use the get task status API for an ElasticSearch task.  |
+| Enable/Disable Writeability API[^1]           | Allow actor to enable or disable GMS writeability for data migrations. |
+| Apply Retention API[^1]                       | Allow actor to apply retention using the API.                          |
+| Analytics API access[^1]                      | Allow actor to use API read access to raw analytics data.              |
+| Manage System Operations                      | Allow actor to manage system operation controls.                       |
 
 [^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true
 [^2]: DataHub Cloud only

diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md
@@ -14,21 +14,21 @@ DataHub works.
 | `UI_INGESTION_ENABLED`                           | `true`  | boolean   | [`GMS`, `MCE Consumer`]                 | Enable UI based ingestion.                                                                                                  |
 | `DATAHUB_ANALYTICS_ENABLED`                      | `true`  | boolean   | [`Frontend`, `GMS`]                     | Collect DataHub usage to populate the analytics dashboard.                                                                  |
 | `BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE` | `true`  | boolean   | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Do not wait for the `system-update` to complete before starting. This should typically only be disabled during development. |
-| `ER_MODEL_RELATIONSHIP_FEATURE_ENABLED`          | `false` | boolean   | [`Frontend`, `GMS`]                     | Enable ER Model Relation Feature that shows Relationships Tab within a Dataset UI.                                                       |
+| `ER_MODEL_RELATIONSHIP_FEATURE_ENABLED`          | `false` | boolean   | [`Frontend`, `GMS`]                     | Enable ER Model Relation Feature that shows Relationships Tab within a Dataset UI.                                          |
 
 
 ## Ingestion
 
-| Variable                           | Default | Unit/Type | Components              | Description                                                                                                                                                                       |
-|------------------------------------|---------|-----------|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `ASYNC_INGEST_DEFAULT`          | `false` | boolean   | [`GMS`]                 | Asynchronously process ingestProposals by writing the ingestion MCP to Kafka. Typically enabled with standalone consumers.                                                        |
-| `MCP_CONSUMER_ENABLED`             | `true`  | boolean   | [`GMS`, `MCE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MCE Consumer`.                                                                                        |
-| `MCL_CONSUMER_ENABLED`             | `true`  | boolean   | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`.                                                                                        |
-| `PE_CONSUMER_ENABLED`              | `true`  | boolean   | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`.                                                                                        |
-| `ES_BULK_REQUESTS_LIMIT`           | 1000    | docs      | [`GMS`, `MAE Consumer`] | Number of bulk documents to index. `MAE Consumer` if standalone.                                                                                                                  |
-| `ES_BULK_FLUSH_PERIOD`             | 1       | seconds   | [`GMS`, `MAE Consumer`] | How frequently indexed documents are made available for query.                                                                                                                    |
-| `ALWAYS_EMIT_CHANGE_LOG`           | `false` | boolean   | [`GMS`]                 | Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur.                                                               |                                                                                                                  |
-| `GRAPH_SERVICE_DIFF_MODE_ENABLED`  | `true`  | boolean   | [`GMS`]                 | Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading. |
+| Variable                          | Default | Unit/Type | Components              | Description                                                                                                                                                                        |
+|-----------------------------------|---------|-----------|-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `ASYNC_INGEST_DEFAULT`            | `false` | boolean   | [`GMS`]                 | Asynchronously process ingestProposals by writing the ingestion MCP to Kafka. Typically enabled with standalone consumers.                                                         |
+| `MCP_CONSUMER_ENABLED`            | `true`  | boolean   | [`GMS`, `MCE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MCE Consumer`.                                                                                         |
+| `MCL_CONSUMER_ENABLED`            | `true`  | boolean   | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`.                                                                                         |
+| `PE_CONSUMER_ENABLED`             | `true`  | boolean   | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`.                                                                                         |
+| `ES_BULK_REQUESTS_LIMIT`          | 1000    | docs      | [`GMS`, `MAE Consumer`] | Number of bulk documents to index. `MAE Consumer` if standalone.                                                                                                                   |
+| `ES_BULK_FLUSH_PERIOD`            | 1       | seconds   | [`GMS`, `MAE Consumer`] | How frequently indexed documents are made available for query.                                                                                                                     |
+| `ALWAYS_EMIT_CHANGE_LOG`          | `false` | boolean   | [`GMS`]                 | Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur.                                                                |                                                                                                                  |
+| `GRAPH_SERVICE_DIFF_MODE_ENABLED` | `true`  | boolean   | [`GMS`]                 | Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading.  |
 
 ## Caching
 

diff --git a/lombok.config b/lombok.config
@@ -1,2 +1,3 @@
 config.stopBubbling = true
 lombok.addLombokGeneratedAnnotation = true
+lombok.copyableAnnotations += org.springframework.beans.factory.annotation.Qualifier