From 738eaed6f13860d4705269cbe2d406f767983163 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 12 Sep 2024 09:52:20 -0500 Subject: [PATCH] feat(throttle): extend throttling to API requests (#11325) --- .../src/main/resources/application.properties | 1 + .../upgrade/UpgradeCliApplicationTest.java | 18 +- docs/authorization/policies.md | 37 ++-- docs/deploy/environment-vars.md | 22 +-- lombok.config | 1 + .../KafkaThrottleSensor.java} | 157 ++++++++++------- .../KafkaThrottleSensorTest.java} | 101 +++++------ metadata-io/build.gradle | 2 + .../metadata/dao/throttle/APIThrottle.java | 82 +++++++++ .../dao/throttle/APIThrottleException.java | 20 +++ .../metadata/dao/throttle/NoOpSensor.java | 12 ++ .../dao/throttle/ThrottleControl.java | 31 ++++ .../metadata/dao/throttle/ThrottleEvent.java | 96 +++++++++++ .../metadata/dao/throttle/ThrottleSensor.java | 7 + .../metadata/dao/throttle/ThrottleType.java | 7 + .../metadata/entity/EntityServiceImpl.java | 25 +++ .../dao/throttle/APIThrottleTest.java | 162 ++++++++++++++++++ .../MetadataChangeProposalsProcessor.java | 57 +++++- .../context/TestOperationContexts.java | 6 + .../config/MetadataChangeProposalConfig.java | 20 +++ .../src/main/resources/application.yaml | 17 +- .../gms/factory/common/CacheConfig.java | 65 +++++-- .../factory/entity/EntityServiceFactory.java | 40 ++++- .../entity/throttle/ManualThrottleSensor.java | 117 +++++++++++++ ...Factory.java => KafkaThrottleFactory.java} | 57 +++--- .../GlobalControllerExceptionHandler.java | 15 ++ .../throttle/ThrottleController.java | 113 ++++++++++++ .../resources/entity/AspectResource.java | 14 +- .../entity/BatchIngestionRunResource.java | 8 +- .../resources/entity/EntityResource.java | 53 +++--- .../resources/entity/EntityV2Resource.java | 6 +- .../entity/EntityVersionedV2Resource.java | 4 +- .../resources/lineage/Relationships.java | 6 +- .../operations/OperationsResource.java | 10 +- .../resources/platform/PlatformResource.java | 4 +- .../resources/restli/RestliUtils.java | 48 +++++- .../metadata/resources/usage/UsageStats.java | 8 +- .../war/src/main/resources/boot/policies.json | 3 +- .../authorization/PoliciesConfig.java | 9 +- .../linkedin/metadata/restli/RestliUtil.java | 101 ----------- 40 files changed, 1189 insertions(+), 373 deletions(-) rename metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/{producer/KafkaProducerThrottle.java => throttle/KafkaThrottleSensor.java} (61%) rename metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/{producer/KafkaProducerThrottleTest.java => throttle/KafkaThrottleSensorTest.java} (80%) create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottle.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottleException.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/NoOpSensor.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleControl.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleEvent.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleSensor.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleType.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/dao/throttle/APIThrottleTest.java create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/throttle/ManualThrottleSensor.java rename metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/{KafkaProducerThrottleFactory.java => KafkaThrottleFactory.java} (59%) create mode 100644 metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/throttle/ThrottleController.java delete mode 100644 metadata-utils/src/main/java/com/linkedin/metadata/restli/RestliUtil.java diff --git a/datahub-upgrade/src/main/resources/application.properties b/datahub-upgrade/src/main/resources/application.properties index b884c92f74bd48..847c264dfac38c 100644 --- a/datahub-upgrade/src/main/resources/application.properties +++ b/datahub-upgrade/src/main/resources/application.properties @@ -3,3 +3,4 @@ management.health.neo4j.enabled=false ingestion.enabled=false spring.main.allow-bean-definition-overriding=true entityClient.impl=restli +metadataChangeProposal.throttle.updateIntervalMs=0 \ No newline at end of file diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java index dc4c3073ee351c..8b6899b4c78866 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java @@ -1,12 +1,18 @@ package com.linkedin.datahub.upgrade; -import static org.testng.AssertJUnit.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; import com.linkedin.datahub.upgrade.restoreindices.RestoreIndices; import com.linkedin.datahub.upgrade.system.BlockingSystemUpgrade; +import com.linkedin.metadata.dao.throttle.NoOpSensor; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import javax.inject.Named; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; @@ -28,6 +34,10 @@ public class UpgradeCliApplicationTest extends AbstractTestNGSpringContextTests @Autowired private ESIndexBuilder esIndexBuilder; + @Qualifier("kafkaThrottle") + @Autowired + private ThrottleSensor kafkaThrottle; + @Test public void testRestoreIndicesInit() { /* @@ -46,4 +56,10 @@ public void testBuildIndicesInit() { assertFalse( esIndexBuilder.getElasticSearchConfiguration().getBuildIndices().isAllowDocCountMismatch()); } + + @Test + public void testNoThrottle() { + assertEquals( + new NoOpSensor(), kafkaThrottle, "No kafka throttle controls expected in datahub-upgrade"); + } } diff --git a/docs/authorization/policies.md b/docs/authorization/policies.md index 45d0b59e408337..5c99241f75190f 100644 --- a/docs/authorization/policies.md +++ b/docs/authorization/policies.md @@ -146,15 +146,15 @@ These privileges are for DataHub operators to access & manage the administrative #### Access & Credentials -| Platform Privileges | Description | -|--------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Generate Personal Access Tokens | Allow actor to generate personal access tokens for use with DataHub APIs. | -| Manage Policies | Allow actor to create and remove access control policies. Be careful - Actors with this privilege are effectively super users. | -| Manage Secrets | Allow actor to create & remove Secrets stored inside DataHub. | -| Manage Users & Groups | Allow actor to create, remove, and update users and groups on DataHub. | -| Manage All Access Tokens | Allow actor to create, list and revoke access tokens on behalf of users in DataHub. Be careful - Actors with this privilege are effectively super users that can impersonate other users. | -| Manage User Credentials | Allow actor to manage credentials for native DataHub users, including inviting new users and resetting passwords | | -| Manage Connections | Allow actor to manage connections to external DataHub platforms. | +| Platform Privileges | Description | +|---------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Generate Personal Access Tokens | Allow actor to generate personal access tokens for use with DataHub APIs. | +| Manage Policies | Allow actor to create and remove access control policies. Be careful - Actors with this privilege are effectively super users. | +| Manage Secrets | Allow actor to create & remove Secrets stored inside DataHub. | +| Manage Users & Groups | Allow actor to create, remove, and update users and groups on DataHub. | +| Manage All Access Tokens | Allow actor to create, list and revoke access tokens on behalf of users in DataHub. Be careful - Actors with this privilege are effectively super users that can impersonate other users. | +| Manage User Credentials | Allow actor to manage credentials for native DataHub users, including inviting new users and resetting passwords | | +| Manage Connections | Allow actor to manage connections to external DataHub platforms. | #### Product Features @@ -191,15 +191,16 @@ These privileges are for DataHub operators to access & manage the administrative #### System Management -| Platform Privileges | Description | -|-----------------------------------------------|--------------------------------------------------------------------------| -| Restore Indices API[^1] | Allow actor to use the Restore Indices API. | | -| Get Timeseries index sizes API[^1] | Allow actor to use the get Timeseries indices size API. | -| Truncate timeseries aspect index size API[^1] | Allow actor to use the API to truncate a timeseries index. | -| Get ES task status API[^1] | Allow actor to use the get task status API for an ElasticSearch task. | -| Enable/Disable Writeability API[^1] | Allow actor to enable or disable GMS writeability for data migrations. | -| Apply Retention API[^1] | Allow actor to apply retention using the API. | -| Analytics API access[^1] | Allow actor to use API read access to raw analytics data. | +| Platform Privileges | Description | +|-----------------------------------------------|------------------------------------------------------------------------| +| Restore Indices API[^1] | Allow actor to use the Restore Indices API. | | +| Get Timeseries index sizes API[^1] | Allow actor to use the get Timeseries indices size API. | +| Truncate timeseries aspect index size API[^1] | Allow actor to use the API to truncate a timeseries index. | +| Get ES task status API[^1] | Allow actor to use the get task status API for an ElasticSearch task. | +| Enable/Disable Writeability API[^1] | Allow actor to enable or disable GMS writeability for data migrations. | +| Apply Retention API[^1] | Allow actor to apply retention using the API. | +| Analytics API access[^1] | Allow actor to use API read access to raw analytics data. | +| Manage System Operations | Allow actor to manage system operation controls. | [^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true [^2]: DataHub Cloud only diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md index 21ed738e878f88..6429996c088b4a 100644 --- a/docs/deploy/environment-vars.md +++ b/docs/deploy/environment-vars.md @@ -14,21 +14,21 @@ DataHub works. | `UI_INGESTION_ENABLED` | `true` | boolean | [`GMS`, `MCE Consumer`] | Enable UI based ingestion. | | `DATAHUB_ANALYTICS_ENABLED` | `true` | boolean | [`Frontend`, `GMS`] | Collect DataHub usage to populate the analytics dashboard. | | `BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE` | `true` | boolean | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Do not wait for the `system-update` to complete before starting. This should typically only be disabled during development. | -| `ER_MODEL_RELATIONSHIP_FEATURE_ENABLED` | `false` | boolean | [`Frontend`, `GMS`] | Enable ER Model Relation Feature that shows Relationships Tab within a Dataset UI. | +| `ER_MODEL_RELATIONSHIP_FEATURE_ENABLED` | `false` | boolean | [`Frontend`, `GMS`] | Enable ER Model Relation Feature that shows Relationships Tab within a Dataset UI. | ## Ingestion -| Variable | Default | Unit/Type | Components | Description | -|------------------------------------|---------|-----------|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `ASYNC_INGEST_DEFAULT` | `false` | boolean | [`GMS`] | Asynchronously process ingestProposals by writing the ingestion MCP to Kafka. Typically enabled with standalone consumers. | -| `MCP_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MCE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MCE Consumer`. | -| `MCL_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`. | -| `PE_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`. | -| `ES_BULK_REQUESTS_LIMIT` | 1000 | docs | [`GMS`, `MAE Consumer`] | Number of bulk documents to index. `MAE Consumer` if standalone. | -| `ES_BULK_FLUSH_PERIOD` | 1 | seconds | [`GMS`, `MAE Consumer`] | How frequently indexed documents are made available for query. | -| `ALWAYS_EMIT_CHANGE_LOG` | `false` | boolean | [`GMS`] | Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. | | -| `GRAPH_SERVICE_DIFF_MODE_ENABLED` | `true` | boolean | [`GMS`] | Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading. | +| Variable | Default | Unit/Type | Components | Description | +|-----------------------------------|---------|-----------|-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `ASYNC_INGEST_DEFAULT` | `false` | boolean | [`GMS`] | Asynchronously process ingestProposals by writing the ingestion MCP to Kafka. Typically enabled with standalone consumers. | +| `MCP_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MCE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MCE Consumer`. | +| `MCL_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`. | +| `PE_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`. | +| `ES_BULK_REQUESTS_LIMIT` | 1000 | docs | [`GMS`, `MAE Consumer`] | Number of bulk documents to index. `MAE Consumer` if standalone. | +| `ES_BULK_FLUSH_PERIOD` | 1 | seconds | [`GMS`, `MAE Consumer`] | How frequently indexed documents are made available for query. | +| `ALWAYS_EMIT_CHANGE_LOG` | `false` | boolean | [`GMS`] | Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. | | +| `GRAPH_SERVICE_DIFF_MODE_ENABLED` | `true` | boolean | [`GMS`] | Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading. | ## Caching diff --git a/lombok.config b/lombok.config index df71bb6a0fb878..7324b9265c5203 100644 --- a/lombok.config +++ b/lombok.config @@ -1,2 +1,3 @@ config.stopBubbling = true lombok.addLombokGeneratedAnnotation = true +lombok.copyableAnnotations += org.springframework.beans.factory.annotation.Qualifier diff --git a/metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/producer/KafkaProducerThrottle.java b/metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensor.java similarity index 61% rename from metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/producer/KafkaProducerThrottle.java rename to metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensor.java index 8fbb34b1eacd6f..2adf2543aa2f77 100644 --- a/metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/producer/KafkaProducerThrottle.java +++ b/metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensor.java @@ -1,20 +1,31 @@ -package com.datahub.metadata.dao.producer; +package com.datahub.metadata.dao.throttle; + +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_TIMESERIES_LAG; +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_VERSIONED_LAG; import com.codahale.metrics.Gauge; import com.google.common.annotations.VisibleForTesting; import com.linkedin.metadata.config.MetadataChangeProposalConfig; +import com.linkedin.metadata.dao.throttle.ThrottleControl; +import com.linkedin.metadata.dao.throttle.ThrottleEvent; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; +import com.linkedin.metadata.dao.throttle.ThrottleType; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.util.Pair; +import java.util.ArrayList; import java.util.Collection; +import java.util.Comparator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; @@ -27,23 +38,43 @@ import org.springframework.util.backoff.BackOffExecution; import org.springframework.util.backoff.ExponentialBackOff; +/** + * This class is designed to monitor MCL consumption by a specific consumer group and provide + * throttling hooks. + * + *

Initially this was designed for throttling the async mcp processor `mce-consumer`, however it + * also handles throttling synchronous requests via rest.li, graphql, and openapi for non-browser + * based requests. + */ @Slf4j @Builder(toBuilder = true) -public class KafkaProducerThrottle { +public class KafkaThrottleSensor implements ThrottleSensor { + private static final Set SUPPORTED_THROTTLE_TYPES = + Set.of(MCL_VERSIONED_LAG, MCL_TIMESERIES_LAG); @Nonnull private final EntityRegistry entityRegistry; @Nonnull private final Admin kafkaAdmin; @Nonnull private final MetadataChangeProposalConfig.ThrottlesConfig config; @Nonnull private final String mclConsumerGroupId; @Nonnull private final String versionedTopicName; @Nonnull private final String timeseriesTopicName; - @Nonnull private final Consumer pauseConsumer; + + /** A list of throttle event listeners to execute when throttling occurs and ceases */ + @Builder.Default @Nonnull + private final List> throttleCallbacks = + new ArrayList<>(); private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1); - private final Map medianLag = new ConcurrentHashMap<>(); - private final Map backoffMap = new ConcurrentHashMap<>(); + private final Map medianLag = new ConcurrentHashMap<>(); + private final Map backoffMap = new ConcurrentHashMap<>(); + + @Override + public KafkaThrottleSensor addCallback(Function callback) { + throttleCallbacks.add(callback); + return this; + } /** Update lag information at a given rate */ - public KafkaProducerThrottle start() { + public KafkaThrottleSensor start() { if ((config.getVersioned().isEnabled() || config.getTimeseries().isEnabled()) && config.getUpdateIntervalMs() > 0) { scheduler.scheduleAtFixedRate( @@ -79,13 +110,13 @@ public void stop() { * @return median lag per mcl topic */ @VisibleForTesting - public Map getLag() { + public Map getLag() { return medianLag.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } @VisibleForTesting - public boolean isThrottled(MclType mclType) { + public boolean isThrottled(ThrottleType mclType) { if (getThrottleConfig(mclType).isEnabled() && medianLag.containsKey(mclType)) { return medianLag.get(mclType) > getThrottleConfig(mclType).getThreshold(); } @@ -93,7 +124,7 @@ public boolean isThrottled(MclType mclType) { } @VisibleForTesting - public long computeNextBackOff(MclType mclType) { + public long computeNextBackOff(ThrottleType mclType) { if (isThrottled(mclType)) { BackOffExecution backOffExecution = backoffMap.computeIfAbsent( @@ -115,54 +146,61 @@ public long computeNextBackOff(MclType mclType) { @VisibleForTesting public void throttle() throws InterruptedException { - for (MclType mclType : MclType.values()) { - if (isThrottled(mclType)) { - long backoffWaitMs = computeNextBackOff(mclType); - - if (backoffWaitMs > 0) { - log.warn( - "Throttled producer Topic: {} Duration: {} ms MedianLag: {}", - getTopicName(mclType), - backoffWaitMs, - medianLag.get(mclType)); - MetricUtils.gauge( - this.getClass(), - String.format("%s_throttled", getTopicName(mclType)), - () -> (Gauge) () -> 1); - MetricUtils.counter( - this.getClass(), String.format("%s_throttledCount", getTopicName(mclType))) - .inc(); - - log.info("Pausing MCE consumer for {} ms.", backoffWaitMs); - pauseConsumer.accept(true); - Thread.sleep(backoffWaitMs); - log.info("Resuming MCE consumer."); - pauseConsumer.accept(false); - - // if throttled for one topic, skip remaining - return; - } else { - // no throttle or exceeded configuration limits - log.info("MCE consumer throttle exponential backoff reset."); - backoffMap.remove(mclType); - MetricUtils.gauge( - this.getClass(), - String.format("%s_throttled", getTopicName(mclType)), - () -> (Gauge) () -> 0); - } - } else { + + Map throttled = new LinkedHashMap<>(); + + for (ThrottleType mclType : SUPPORTED_THROTTLE_TYPES) { + long backoffWaitMs = computeNextBackOff(mclType); + + if (backoffWaitMs <= 0) { // not throttled, remove backoff tracking - log.info("MCE consumer throttle exponential backoff reset."); + log.info("Throttle exponential backoff reset."); backoffMap.remove(mclType); MetricUtils.gauge( this.getClass(), String.format("%s_throttled", getTopicName(mclType)), () -> (Gauge) () -> 0); + } else { + throttled.put(mclType, backoffWaitMs); + } + } + + // handle throttled + if (!throttled.isEmpty()) { + long maxBackoffWaitMs = throttled.values().stream().max(Comparator.naturalOrder()).get(); + log.warn( + "Throttled Topic: {} Duration: {} ms MedianLag: {}", + throttled.keySet().stream().map(this::getTopicName).collect(Collectors.toList()), + maxBackoffWaitMs, + throttled.keySet().stream().map(medianLag::get).collect(Collectors.toList())); + + throttled.keySet().stream() + .forEach( + mclType -> { + MetricUtils.gauge( + this.getClass(), + String.format("%s_throttled", getTopicName(mclType)), + () -> (Gauge) () -> 1); + MetricUtils.counter( + this.getClass(), String.format("%s_throttledCount", getTopicName(mclType))) + .inc(); + }); + + log.info("Throttling {} callbacks for {} ms.", throttleCallbacks.size(), maxBackoffWaitMs); + final ThrottleEvent throttleEvent = ThrottleEvent.throttle(throttled); + List throttleControls = + throttleCallbacks.stream().map(callback -> callback.apply(throttleEvent)).toList(); + + if (throttleControls.stream().anyMatch(ThrottleControl::hasCallback)) { + Thread.sleep(maxBackoffWaitMs); + log.info("Resuming {} callbacks after wait.", throttleControls.size()); + throttleControls.forEach( + control -> control.execute(ThrottleEvent.clearThrottle(throttleEvent))); } } } - private Map getMedianLag() { + private Map getMedianLag() { try { Map mclConsumerOffsets = kafkaAdmin @@ -183,11 +221,11 @@ private Map getMedianLag() { .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); return Stream.of( - Pair.of(MclType.VERSIONED, versionedTopicName), - Pair.of(MclType.TIMESERIES, timeseriesTopicName)) + Pair.of(MCL_VERSIONED_LAG, versionedTopicName), + Pair.of(MCL_TIMESERIES_LAG, timeseriesTopicName)) .map( topic -> { - MclType mclType = topic.getFirst(); + ThrottleType mclType = topic.getFirst(); String topicName = topic.getSecond(); Map topicOffsets = @@ -212,22 +250,22 @@ private Map getMedianLag() { .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } catch (ExecutionException | InterruptedException e) { log.error("Error fetching consumer group offsets.", e); - return Map.of(MclType.VERSIONED, 0L, MclType.TIMESERIES, 0L); + return Map.of(MCL_VERSIONED_LAG, 0L, MCL_TIMESERIES_LAG, 0L); } } - private MetadataChangeProposalConfig.ThrottleConfig getThrottleConfig(MclType mclType) { + private MetadataChangeProposalConfig.ThrottleConfig getThrottleConfig(ThrottleType mclType) { MetadataChangeProposalConfig.ThrottleConfig throttleConfig; switch (mclType) { - case VERSIONED -> throttleConfig = config.getVersioned(); - case TIMESERIES -> throttleConfig = config.getTimeseries(); + case MCL_VERSIONED_LAG -> throttleConfig = config.getVersioned(); + case MCL_TIMESERIES_LAG -> throttleConfig = config.getTimeseries(); default -> throw new IllegalStateException(); } return throttleConfig; } - private String getTopicName(MclType mclType) { - return MclType.TIMESERIES.equals(mclType) ? timeseriesTopicName : versionedTopicName; + private String getTopicName(ThrottleType mclType) { + return MCL_TIMESERIES_LAG.equals(mclType) ? timeseriesTopicName : versionedTopicName; } private static Double getMedian(Collection listValues) { @@ -238,9 +276,4 @@ private static Double getMedian(Collection listValues) { else median = values[values.length / 2]; return median; } - - public enum MclType { - TIMESERIES, - VERSIONED - } } diff --git a/metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/producer/KafkaProducerThrottleTest.java b/metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensorTest.java similarity index 80% rename from metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/producer/KafkaProducerThrottleTest.java rename to metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensorTest.java index ce6104ee2ca7dc..6f82ad86852992 100644 --- a/metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/producer/KafkaProducerThrottleTest.java +++ b/metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensorTest.java @@ -1,4 +1,4 @@ -package com.datahub.metadata.dao.producer; +package com.datahub.metadata.dao.throttle; import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.anyString; @@ -14,6 +14,8 @@ import static org.testng.Assert.assertTrue; import com.linkedin.metadata.config.MetadataChangeProposalConfig; +import com.linkedin.metadata.dao.throttle.ThrottleControl; +import com.linkedin.metadata.dao.throttle.ThrottleType; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.mxe.Topics; import com.linkedin.util.Pair; @@ -34,7 +36,7 @@ import org.apache.kafka.common.TopicPartition; import org.testng.annotations.Test; -public class KafkaProducerThrottleTest { +public class KafkaThrottleSensorTest { private static final List STANDARD_TOPICS = List.of(Topics.METADATA_CHANGE_LOG_VERSIONED, Topics.METADATA_CHANGE_LOG_TIMESERIES); private static final String STANDARD_MCL_CONSUMER_GROUP_ID = "generic-mae-consumer-job-client"; @@ -54,16 +56,16 @@ public void testLagCalculation() throws ExecutionException, InterruptedException topicPart -> ((long) topicPart.partition() + 1) * 2, 3)); - KafkaProducerThrottle test = - KafkaProducerThrottle.builder() + KafkaThrottleSensor test = + KafkaThrottleSensor.builder() .config(noSchedulerConfig().getThrottle()) .kafkaAdmin(mockAdmin) .versionedTopicName(STANDARD_TOPICS.get(0)) .timeseriesTopicName(STANDARD_TOPICS.get(1)) .entityRegistry(mock(EntityRegistry.class)) .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) - .pauseConsumer(mock(Consumer.class)) - .build(); + .build() + .addCallback((throttleEvent -> ThrottleControl.NONE)); // Refresh calculations test.refresh(); @@ -71,8 +73,8 @@ public void testLagCalculation() throws ExecutionException, InterruptedException assertEquals( test.getLag(), Map.of( - KafkaProducerThrottle.MclType.VERSIONED, 2L, - KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + ThrottleType.MCL_VERSIONED_LAG, 2L, + ThrottleType.MCL_TIMESERIES_LAG, 2L)); } @Test @@ -111,45 +113,52 @@ public void testThrottle() throws ExecutionException, InterruptedException { Consumer pauseFunction = mock(Consumer.class); - KafkaProducerThrottle test = - KafkaProducerThrottle.builder() + KafkaThrottleSensor test = + KafkaThrottleSensor.builder() .config(noThrottleConfig) .kafkaAdmin(mockAdmin) .versionedTopicName(STANDARD_TOPICS.get(0)) .timeseriesTopicName(STANDARD_TOPICS.get(1)) .entityRegistry(mock(EntityRegistry.class)) .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) - .pauseConsumer(pauseFunction) - .build(); + .build() + .addCallback( + (throttleEvent -> { + pauseFunction.accept(throttleEvent.isThrottled()); + return ThrottleControl.builder() + .callback( + throttleResume -> pauseFunction.accept(throttleResume.isThrottled())) + .build(); + })); // Refresh calculations test.refresh(); assertEquals( test.getLag(), Map.of( - KafkaProducerThrottle.MclType.VERSIONED, 2L, - KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + ThrottleType.MCL_VERSIONED_LAG, 2L, + ThrottleType.MCL_TIMESERIES_LAG, 2L)); assertFalse( - test.isThrottled(KafkaProducerThrottle.MclType.VERSIONED), + test.isThrottled(ThrottleType.MCL_VERSIONED_LAG), "Expected not throttling, lag is below threshold"); - assertFalse(test.isThrottled(KafkaProducerThrottle.MclType.TIMESERIES)); + assertFalse(test.isThrottled(ThrottleType.MCL_TIMESERIES_LAG)); test.throttle(); verifyNoInteractions(pauseFunction); reset(pauseFunction); - KafkaProducerThrottle test2 = test.toBuilder().config(throttleConfig).build(); + KafkaThrottleSensor test2 = test.toBuilder().config(throttleConfig).build(); // Refresh calculations test2.refresh(); assertEquals( test2.getLag(), Map.of( - KafkaProducerThrottle.MclType.VERSIONED, 2L, - KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + ThrottleType.MCL_VERSIONED_LAG, 2L, + ThrottleType.MCL_TIMESERIES_LAG, 2L)); assertTrue( - test2.isThrottled(KafkaProducerThrottle.MclType.VERSIONED), + test2.isThrottled(ThrottleType.MCL_VERSIONED_LAG), "Expected throttling, lag is above threshold."); assertFalse( - test2.isThrottled(KafkaProducerThrottle.MclType.TIMESERIES), + test2.isThrottled(ThrottleType.MCL_TIMESERIES_LAG), "Expected not throttling. Timeseries is disabled"); test2.throttle(); @@ -183,56 +192,48 @@ public void testBackOff() throws ExecutionException, InterruptedException { topicPart -> ((long) topicPart.partition() + 1) * 2, 3)); - KafkaProducerThrottle test = - KafkaProducerThrottle.builder() + KafkaThrottleSensor test = + KafkaThrottleSensor.builder() .config(throttleConfig) .kafkaAdmin(mockAdmin) .versionedTopicName(STANDARD_TOPICS.get(0)) .timeseriesTopicName(STANDARD_TOPICS.get(1)) .entityRegistry(mock(EntityRegistry.class)) .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) - .pauseConsumer(mock(Consumer.class)) - .build(); + .build() + .addCallback((throttleEvent -> ThrottleControl.NONE)); // Refresh calculations test.refresh(); assertEquals( test.getLag(), Map.of( - KafkaProducerThrottle.MclType.VERSIONED, 2L, - KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + ThrottleType.MCL_VERSIONED_LAG, 2L, + ThrottleType.MCL_TIMESERIES_LAG, 2L)); assertTrue( - test.isThrottled(KafkaProducerThrottle.MclType.VERSIONED), + test.isThrottled(ThrottleType.MCL_VERSIONED_LAG), "Expected throttling, lag is above threshold."); assertFalse( - test.isThrottled(KafkaProducerThrottle.MclType.TIMESERIES), + test.isThrottled(ThrottleType.MCL_TIMESERIES_LAG), "Expected no throttling. Timeseries is disabled"); assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.TIMESERIES), + test.computeNextBackOff(ThrottleType.MCL_TIMESERIES_LAG), 0L, "Expected no backoff. Timeseries is disabled."); + assertEquals(test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), 1L, "Expected initial 1"); assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), 1L, "Expected initial 1"); + test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), 2L, "Expected second 2^1"); + assertEquals(test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), 4L, "Expected third 2^2"); assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), - 2L, - "Expected second 2^1"); + test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), 8L, "Expected fourth 2^3"); assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), 4L, "Expected third 2^2"); - assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), - 8L, - "Expected fourth 2^3"); - assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), + test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), 8L, "Expected fifth max interval at 8"); assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), - -1L, - "Expected max attempts"); + test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), -1L, "Expected max attempts"); } @Test @@ -253,16 +254,16 @@ public void testScheduler() throws ExecutionException, InterruptedException { AdminClient mockAdmin = mockKafka(generateLag(STANDARD_TOPICS, topicPart -> 1L, topicPart -> 2L, 1)); - KafkaProducerThrottle test = - KafkaProducerThrottle.builder() + KafkaThrottleSensor test = + KafkaThrottleSensor.builder() .config(throttlesConfig) .kafkaAdmin(mockAdmin) .versionedTopicName(STANDARD_TOPICS.get(0)) .timeseriesTopicName(STANDARD_TOPICS.get(1)) .entityRegistry(mock(EntityRegistry.class)) .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) - .pauseConsumer(mock(Consumer.class)) - .build(); + .build() + .addCallback((throttleEvent -> ThrottleControl.NONE)); try { test.start(); @@ -270,8 +271,8 @@ public void testScheduler() throws ExecutionException, InterruptedException { assertEquals( test.getLag(), Map.of( - KafkaProducerThrottle.MclType.VERSIONED, 1L, - KafkaProducerThrottle.MclType.TIMESERIES, 1L), + ThrottleType.MCL_VERSIONED_LAG, 1L, + ThrottleType.MCL_TIMESERIES_LAG, 1L), "Expected lag updated"); } finally { test.stop(); diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 9f5fc109eea7f6..7e72767c08b79c 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -27,6 +27,8 @@ dependencies { implementation externalDependency.guava implementation externalDependency.reflections + // https://mvnrepository.com/artifact/nl.basjes.parse.useragent/yauaa + implementation 'nl.basjes.parse.useragent:yauaa:7.27.0' api(externalDependency.dgraph4j) { exclude group: 'com.google.guava', module: 'guava' diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottle.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottle.java new file mode 100644 index 00000000000000..542eb5f3869c01 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottle.java @@ -0,0 +1,82 @@ +package com.linkedin.metadata.dao.throttle; + +import static com.linkedin.metadata.dao.throttle.ThrottleType.MANUAL; +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_TIMESERIES_LAG; +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_VERSIONED_LAG; + +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; +import java.util.Comparator; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import nl.basjes.parse.useragent.UserAgent; +import nl.basjes.parse.useragent.UserAgentAnalyzer; + +public class APIThrottle { + private static final Set AGENT_EXEMPTIONS = Set.of("Browser"); + private static final UserAgentAnalyzer UAA = + UserAgentAnalyzer.newBuilder() + .hideMatcherLoadStats() + .withField(UserAgent.AGENT_CLASS) + .withCache(1000) + .build(); + + private APIThrottle() {} + + /** + * This method is expected to be called on sync ingest requests for both timeseries or versioned + * aspects. + * + *

1. Async requests are never expected to be throttled here. 2. UI requests are not expected + * to be throttled, so we'll try to detect browser vs non-browser activity. 3. Throttling + * exceptions are expected to be caught by the API implementation and converted to a 429 http + * status code + * + * @param opContext the operation context + * @param throttleEvents the throttle state + * @param isTimeseries whether the operation is for timeseries or not (throttled separately) + */ + public static void evaluate( + @Nonnull OperationContext opContext, + @Nullable Set throttleEvents, + boolean isTimeseries) { + + Set eventMatchMaxWaitMs = eventMatchMaxWaitMs(throttleEvents, isTimeseries); + + if (!eventMatchMaxWaitMs.isEmpty() && !isExempt(opContext.getRequestContext())) { + throw new APIThrottleException( + eventMatchMaxWaitMs.stream().max(Comparator.naturalOrder()).orElse(-1L), + "Throttled due to " + throttleEvents); + } + } + + private static boolean isExempt(@Nullable RequestContext requestContext) { + // Exclude internal calls + if (requestContext == null + || requestContext.getUserAgent() == null + || requestContext.getUserAgent().isEmpty()) { + return true; + } + + UserAgent ua = UAA.parse(requestContext.getUserAgent()); + return AGENT_EXEMPTIONS.contains(ua.get(UserAgent.AGENT_CLASS).getValue()); + } + + private static Set eventMatchMaxWaitMs( + @Nullable Set throttleEvents, boolean isTimeseries) { + if (throttleEvents == null) { + return Set.of(); + } + + return throttleEvents.stream() + .map( + e -> + e.getActiveThrottleMaxWaitMs( + Set.of(MANUAL, isTimeseries ? MCL_TIMESERIES_LAG : MCL_VERSIONED_LAG))) + .filter(Objects::nonNull) + .collect(Collectors.toSet()); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottleException.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottleException.java new file mode 100644 index 00000000000000..6f1a5fcd1af220 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottleException.java @@ -0,0 +1,20 @@ +package com.linkedin.metadata.dao.throttle; + +import java.util.concurrent.TimeUnit; + +public class APIThrottleException extends RuntimeException { + private final long durationMs; + + public APIThrottleException(long durationMs, String message) { + super(message); + this.durationMs = durationMs; + } + + public long getDurationMs() { + return durationMs; + } + + public long getDurationSeconds() { + return TimeUnit.MILLISECONDS.toSeconds(durationMs); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/NoOpSensor.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/NoOpSensor.java new file mode 100644 index 00000000000000..29692ff86d805f --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/NoOpSensor.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.dao.throttle; + +import java.util.function.Function; +import lombok.EqualsAndHashCode; + +@EqualsAndHashCode +public class NoOpSensor implements ThrottleSensor { + @Override + public ThrottleSensor addCallback(Function callback) { + return this; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleControl.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleControl.java new file mode 100644 index 00000000000000..b08c43078e79ba --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleControl.java @@ -0,0 +1,31 @@ +package com.linkedin.metadata.dao.throttle; + +import java.util.function.Consumer; +import javax.annotation.Nullable; +import lombok.AccessLevel; +import lombok.Builder; +import lombok.Getter; +import lombok.Value; +import lombok.experimental.Accessors; + +@Value +@Accessors(fluent = true) +@Builder +public class ThrottleControl { + public static ThrottleControl NONE = ThrottleControl.builder().build(); + + // call this after pause/sleep + @Getter(AccessLevel.NONE) + @Nullable + Consumer callback; + + public boolean hasCallback() { + return callback != null; + } + + public void execute(ThrottleEvent throttleEvent) { + if (callback != null) { + callback.accept(throttleEvent); + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleEvent.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleEvent.java new file mode 100644 index 00000000000000..d382c87d6b546a --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleEvent.java @@ -0,0 +1,96 @@ +package com.linkedin.metadata.dao.throttle; + +import java.util.Comparator; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Value; +import lombok.experimental.Accessors; + +@Value +@Accessors(fluent = true) +@Builder +public class ThrottleEvent { + public static ThrottleEvent throttle(Map backoffWaitMs) { + return ThrottleEvent.builder() + .backoffWaitMs(backoffWaitMs) + .throttled( + backoffWaitMs.entrySet().stream() + .filter(entry -> entry.getValue() > 0) + .map(entry -> Map.entry(entry.getKey(), true)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))) + .build(); + } + + public static ThrottleEvent clearThrottle(ThrottleEvent throttleEvent) { + return clearThrottle(throttleEvent.getActiveThrottles()); + } + + public static ThrottleEvent clearThrottle(Set clear) { + return ThrottleEvent.builder() + .throttled( + clear.stream() + .map(t -> Map.entry(t, false)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))) + .build(); + } + + Map throttled; + Map backoffWaitMs; + + public Set getActiveThrottles() { + return streamTypes().filter(this::isThrottled).collect(Collectors.toSet()); + } + + /** + * Return the suggested wait time in milliseconds given an optional list filter types. + * + * @param filterTypes empty for no filters + * @return suggested wait time in milliseconds, negative if no suggestion is possible, null if no + * wait + */ + @Nullable + public Long getActiveThrottleMaxWaitMs(Set filterTypes) { + Set activeThrottles = + getActiveThrottles().stream() + .filter(a -> filterTypes.isEmpty() || filterTypes.contains(a)) + .collect(Collectors.toSet()); + + if (activeThrottles.isEmpty()) { + return null; + } + + if (!activeThrottles.contains(ThrottleType.MANUAL) && backoffWaitMs != null) { + return getActiveThrottles().stream() + .map(t -> backoffWaitMs.getOrDefault(t, -1L)) + .max(Comparator.naturalOrder()) + .orElse(-1L); + } + + return -1L; + } + + public Set getDisabledThrottles() { + return streamTypes().filter(t -> !isThrottled(t)).collect(Collectors.toSet()); + } + + public boolean isThrottled() { + return (throttled != null && throttled.values().stream().anyMatch(b -> b)) + || (backoffWaitMs != null && backoffWaitMs.values().stream().anyMatch(wait -> wait > 0)); + } + + private boolean isThrottled(ThrottleType throttleType) { + return (throttled != null && throttled.getOrDefault(throttleType, false)) + || (backoffWaitMs != null && backoffWaitMs.getOrDefault(throttleType, 0L) > 0); + } + + private Stream streamTypes() { + return Stream.concat( + throttled != null ? throttled.keySet().stream() : Stream.empty(), + backoffWaitMs != null ? backoffWaitMs.keySet().stream() : Stream.empty()) + .distinct(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleSensor.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleSensor.java new file mode 100644 index 00000000000000..d92defe5edbcb7 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleSensor.java @@ -0,0 +1,7 @@ +package com.linkedin.metadata.dao.throttle; + +import java.util.function.Function; + +public interface ThrottleSensor { + ThrottleSensor addCallback(Function callback); +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleType.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleType.java new file mode 100644 index 00000000000000..ac6d13a58cd079 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleType.java @@ -0,0 +1,7 @@ +package com.linkedin.metadata.dao.throttle; + +public enum ThrottleType { + MCL_TIMESERIES_LAG, + MCL_VERSIONED_LAG, + MANUAL +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 4b83ea40f722db..c584b8ac4d7a27 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -51,6 +51,10 @@ import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.config.PreProcessHooks; +import com.linkedin.metadata.dao.throttle.APIThrottle; +import com.linkedin.metadata.dao.throttle.ThrottleControl; +import com.linkedin.metadata.dao.throttle.ThrottleEvent; +import com.linkedin.metadata.dao.throttle.ThrottleType; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.ebean.PartitionedStream; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -96,6 +100,7 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; @@ -159,6 +164,9 @@ public class EntityServiceImpl implements EntityService { private final Integer ebeanMaxTransactionRetry; private final boolean enableBrowseV2; + @Getter + private final Map, ThrottleEvent> throttleEvents = new ConcurrentHashMap<>(); + public EntityServiceImpl( @Nonnull final AspectDao aspectDao, @Nonnull final EventProducer producer, @@ -194,6 +202,17 @@ public void setUpdateIndicesService(@Nullable SearchIndicesService updateIndices this.updateIndicesService = updateIndicesService; } + public ThrottleControl handleThrottleEvent(ThrottleEvent throttleEvent) { + final Set activeEvents = throttleEvent.getActiveThrottles(); + // store throttle event + throttleEvents.put(activeEvents, throttleEvent); + + return ThrottleControl.builder() + // clear throttle event + .callback(clearThrottle -> throttleEvents.remove(clearThrottle.getDisabledThrottles())) + .build(); + } + @Override public RecordTemplate getLatestAspect( @Nonnull OperationContext opContext, @Nonnull Urn urn, @Nonnull String aspectName) { @@ -769,6 +788,9 @@ public List ingestAspects( return Collections.emptyList(); } + // Handle throttling + APIThrottle.evaluate(opContext, new HashSet<>(throttleEvents.values()), false); + List ingestResults = ingestAspectsToLocalDB(opContext, aspectsBatch, overwrite); @@ -1183,6 +1205,9 @@ private Stream ingestTimeseriesProposal( } if (!async) { + // Handle throttling + APIThrottle.evaluate(opContext, new HashSet<>(throttleEvents.values()), true); + // Create default non-timeseries aspects for timeseries aspects List timeseriesKeyAspects = aspectsBatch.getMCPItems().stream() diff --git a/metadata-io/src/test/java/com/linkedin/metadata/dao/throttle/APIThrottleTest.java b/metadata-io/src/test/java/com/linkedin/metadata/dao/throttle/APIThrottleTest.java new file mode 100644 index 00000000000000..c86d80be2d7fd2 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/dao/throttle/APIThrottleTest.java @@ -0,0 +1,162 @@ +package com.linkedin.metadata.dao.throttle; + +import static com.linkedin.metadata.dao.throttle.ThrottleType.MANUAL; +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_TIMESERIES_LAG; +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_VERSIONED_LAG; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class APIThrottleTest { + private static final ThrottleEvent MANUAL_THROTTLED_EVENT = + ThrottleEvent.builder().throttled(Map.of(MANUAL, true)).build(); + private static final ThrottleEvent MCL_TIMESERIES_THROTTLED_EVENT = + ThrottleEvent.builder().throttled(Map.of(MCL_TIMESERIES_LAG, true)).build(); + private static final ThrottleEvent MCL_VERSIONED_THROTTLED_EVENT = + ThrottleEvent.builder().throttled(Map.of(MCL_VERSIONED_LAG, true)).build(); + private static final ThrottleEvent ALL_MCL_THROTTLED_EVENT = + ThrottleEvent.builder() + .throttled(Map.of(MCL_TIMESERIES_LAG, true, MCL_VERSIONED_LAG, true)) + .build(); + private static final ThrottleEvent ALL_THROTTLED_EVENT = + ThrottleEvent.builder() + .throttled(Map.of(MANUAL, true, MCL_TIMESERIES_LAG, true, MCL_VERSIONED_LAG, true)) + .build(); + public static final Set ALL_EVENTS = + Set.of( + MANUAL_THROTTLED_EVENT, + MCL_TIMESERIES_THROTTLED_EVENT, + MCL_VERSIONED_THROTTLED_EVENT, + ALL_MCL_THROTTLED_EVENT, + ALL_THROTTLED_EVENT); + + private OperationContext opContext; + private RequestContext mockRequestContext; + + @BeforeMethod + public void init() { + mockRequestContext = mock(RequestContext.class); + opContext = TestOperationContexts.userContextNoSearchAuthorization(mockRequestContext); + } + + @Test + public void testExemptions() { + List exemptions = + List.of( + "", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/129.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15"); + + for (ThrottleEvent event : ALL_EVENTS) { + when(mockRequestContext.getUserAgent()).thenReturn(null); + try { + APIThrottle.evaluate(opContext, Set.of(event), false); + } catch (Exception ex) { + Assert.fail("Exception was thrown and NOT expected! " + event); + } + try { + APIThrottle.evaluate(opContext, Set.of(event), true); + } catch (Exception ex) { + Assert.fail("Exception was thrown and NOT expected! " + event); + } + + // Browser tests + for (String ua : exemptions) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), true); + } catch (Exception ex) { + Assert.fail("Exception was thrown and NOT expected! " + event); + } + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), false); + } catch (Exception ex) { + Assert.fail("Exception was thrown and NOT expected! " + event); + } + } + } + } + + @Test + public void testThrottleException() { + List applicable = + List.of( + "python-requests/2.28.2", + "Apache-HttpClient/4.5.5 (Java/1.8.0_162)", + "okhttp/4.9.3.7", + "Go-http-client/1.1"); + + for (ThrottleEvent event : ALL_EVENTS) { + for (String ua : applicable) { + // timeseries lag present + if (event.getActiveThrottles().contains(MCL_TIMESERIES_LAG) + && !event.getActiveThrottles().contains(MANUAL)) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), true); + Assert.fail(String.format("Exception WAS expected! %s %s", ua, event)); + } catch (Exception ignored) { + } + } + if (!event.getActiveThrottles().contains(MCL_TIMESERIES_LAG) + && !event.getActiveThrottles().contains(MANUAL)) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), true); + } catch (Exception ex) { + Assert.fail(String.format("Exception was thrown and NOT expected! %s %s", ua, event)); + } + } + + // versioned lag present + if (event.getActiveThrottles().contains(MCL_VERSIONED_LAG) + && !event.getActiveThrottles().contains(MANUAL)) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), false); + Assert.fail(String.format("Exception WAS expected! %s %s", ua, event)); + } catch (Exception ignored) { + } + } + if (!event.getActiveThrottles().contains(MCL_VERSIONED_LAG) + && !event.getActiveThrottles().contains(MANUAL)) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), false); + } catch (Exception ex) { + Assert.fail(String.format("Exception was thrown and NOT expected! %s %s", ua, event)); + } + } + + // manual throttle active + if (event.getActiveThrottles().contains(MANUAL)) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), true); + Assert.fail(String.format("Exception WAS expected! %s %s", ua, event)); + } catch (Exception ignored) { + } + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), false); + Assert.fail(String.format("Exception WAS expected! %s %s", ua, event)); + } catch (Exception ignored) { + } + } + } + } + } +} diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java index 60d9c7496dfcb5..2f3f35697e476c 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java @@ -4,8 +4,11 @@ import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.Timer; import com.linkedin.entity.client.SystemEntityClient; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entityclient.RestliEntityClientFactory; import com.linkedin.metadata.EventUtils; +import com.linkedin.metadata.dao.throttle.ThrottleControl; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; import com.linkedin.metadata.kafka.config.MetadataChangeProposalProcessorCondition; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.mxe.FailedMetadataChangeProposal; @@ -13,7 +16,9 @@ import com.linkedin.mxe.Topics; import io.datahubproject.metadata.context.OperationContext; import java.io.IOException; +import java.util.Optional; import javax.annotation.Nonnull; +import javax.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.avro.generic.GenericRecord; @@ -22,11 +27,14 @@ import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerRecord; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Conditional; import org.springframework.context.annotation.Import; import org.springframework.kafka.annotation.EnableKafka; import org.springframework.kafka.annotation.KafkaListener; +import org.springframework.kafka.config.KafkaListenerEndpointRegistry; +import org.springframework.kafka.listener.MessageListenerContainer; import org.springframework.stereotype.Component; @Slf4j @@ -36,11 +44,19 @@ @EnableKafka @RequiredArgsConstructor public class MetadataChangeProposalsProcessor { + private static final String CONSUMER_GROUP_ID_VALUE = + "${METADATA_CHANGE_PROPOSAL_KAFKA_CONSUMER_GROUP_ID:generic-mce-consumer-job-client}"; private final OperationContext systemOperationContext; private final SystemEntityClient entityClient; private final Producer kafkaProducer; + @Qualifier("kafkaThrottle") + private final ThrottleSensor kafkaThrottle; + + private final KafkaListenerEndpointRegistry registry; + private final ConfigurationProvider provider; + private final Histogram kafkaLagStats = MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag")); @@ -50,8 +66,47 @@ public class MetadataChangeProposalsProcessor { + "}") private String fmcpTopicName; + @Value(CONSUMER_GROUP_ID_VALUE) + private String mceConsumerGroupId; + + @PostConstruct + public void registerConsumerThrottle() { + if (kafkaThrottle != null + && provider + .getMetadataChangeProposal() + .getThrottle() + .getComponents() + .getMceConsumer() + .isEnabled()) { + log.info("MCE Consumer Throttle Enabled"); + kafkaThrottle.addCallback( + (throttleEvent) -> { + Optional container = + Optional.ofNullable(registry.getListenerContainer(mceConsumerGroupId)); + if (container.isEmpty()) { + log.warn( + "Expected container was missing: {} throttle is not possible.", + mceConsumerGroupId); + } else { + if (throttleEvent.isThrottled()) { + container.ifPresent(MessageListenerContainer::pause); + return ThrottleControl.builder() + // resume consumer after sleep + .callback( + (resumeEvent) -> container.ifPresent(MessageListenerContainer::resume)) + .build(); + } + } + + return ThrottleControl.NONE; + }); + } else { + log.info("MCE Consumer Throttle Disabled"); + } + } + @KafkaListener( - id = "${METADATA_CHANGE_PROPOSAL_KAFKA_CONSUMER_GROUP_ID:generic-mce-consumer-job-client}", + id = CONSUMER_GROUP_ID_VALUE, topics = "${METADATA_CHANGE_PROPOSAL_TOPIC_NAME:" + Topics.METADATA_CHANGE_PROPOSAL + "}", containerFactory = "kafkaEventConsumer") public void consume(final ConsumerRecord consumerRecord) { diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java index 76f58fb4751085..cdcbb540eeda43 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java @@ -259,6 +259,12 @@ public static OperationContext userContextNoSearchAuthorization( .asSession(RequestContext.TEST, authorizer, sessionAuthorization); } + public static OperationContext userContextNoSearchAuthorization( + @Nonnull RequestContext requestContext) { + return systemContextNoSearchAuthorization(defaultEntityRegistry()) + .asSession(requestContext, Authorizer.EMPTY, TEST_USER_AUTH); + } + @Builder public static class EmptyAspectRetriever implements AspectRetriever { private final Supplier entityRegistrySupplier; diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java index f988758beee363..4e8c18912c40ea 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java @@ -14,10 +14,30 @@ public class MetadataChangeProposalConfig { @Accessors(chain = true) public static class ThrottlesConfig { Integer updateIntervalMs; + ComponentsThrottleConfig components; ThrottleConfig versioned; ThrottleConfig timeseries; } + @Data + @Accessors(chain = true) + public static class ComponentsThrottleConfig { + MceConsumerThrottleConfig mceConsumer; + ApiRequestsThrottleConfig apiRequests; + } + + @Data + @Accessors(chain = true) + public static class MceConsumerThrottleConfig { + boolean enabled; + } + + @Data + @Accessors(chain = true) + public static class ApiRequestsThrottleConfig { + boolean enabled; + } + @Data @Accessors(chain = true) public static class ThrottleConfig { diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 8abed3dcb44cc5..45a98b472b0aee 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -543,18 +543,27 @@ metadataChangeProposal: throttle: updateIntervalMs: ${MCP_THROTTLE_UPDATE_INTERVAL_MS:60000} - # Versioned MCL topic + # What component is throttled + components: + mceConsumer: + enabled: ${MCP_MCE_CONSUMER_THROTTLE_ENABLED:false} + apiRequests: + enabled: ${MCP_API_REQUESTS_THROTTLE_ENABLED:false} + + # How is it throttled + # Versioned MCL topic settings versioned: - # Whether to throttle MCP processing based on MCL backlog + # Whether to monitor MCL versioned backlog enabled: ${MCP_VERSIONED_THROTTLE_ENABLED:false} threshold: ${MCP_VERSIONED_THRESHOLD:4000} # throttle threshold maxAttempts: ${MCP_VERSIONED_MAX_ATTEMPTS:1000} initialIntervalMs: ${MCP_VERSIONED_INITIAL_INTERVAL_MS:100} multiplier: ${MCP_VERSIONED_MULTIPLIER:10} maxIntervalMs: ${MCP_VERSIONED_MAX_INTERVAL_MS:30000} - # Timeseries MCL topic + + # Timeseries MCL topic settings timeseries: - # Whether to throttle MCP processing based on MCL backlog + # Whether to monitor MCL timeseries backlog enabled: ${MCP_TIMESERIES_THROTTLE_ENABLED:false} threshold: ${MCP_TIMESERIES_THRESHOLD:4000} # throttle threshold maxAttempts: ${MCP_TIMESERIES_MAX_ATTEMPTS:1000} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/CacheConfig.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/CacheConfig.java index 185e1e3ae624c4..383716a80cc60a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/CacheConfig.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/CacheConfig.java @@ -4,12 +4,18 @@ import com.hazelcast.config.Config; import com.hazelcast.config.EvictionConfig; import com.hazelcast.config.EvictionPolicy; +import com.hazelcast.config.InMemoryFormat; import com.hazelcast.config.MapConfig; import com.hazelcast.config.MaxSizePolicy; +import com.hazelcast.config.MergePolicyConfig; +import com.hazelcast.config.ReplicatedMapConfig; import com.hazelcast.core.Hazelcast; import com.hazelcast.core.HazelcastInstance; +import com.hazelcast.spi.merge.LatestUpdateMergePolicy; import com.hazelcast.spring.cache.HazelcastCacheManager; +import java.util.List; import java.util.concurrent.TimeUnit; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.cache.CacheManager; @@ -19,6 +25,7 @@ @Configuration public class CacheConfig { + public static final String THROTTLE_MAP = "distributedThrottle"; @Value("${cache.primary.ttlSeconds:600}") private int cacheTtlSeconds; @@ -45,23 +52,15 @@ private Caffeine caffeineCacheBuilder() { .recordStats(); } - @Bean + @Bean("hazelcastInstance") @ConditionalOnProperty(name = "searchService.cacheImplementation", havingValue = "hazelcast") - public CacheManager hazelcastCacheManager() { + public HazelcastInstance hazelcastInstance( + List hazelcastMapConfigs, + List hazelcastReplicatedMapConfigs) { Config config = new Config(); - // TODO: This setting is equivalent to expireAfterAccess, refreshes timer after a get, put, - // containsKey etc. - // is this behavior what we actually desire? Should we change it now? - MapConfig mapConfig = new MapConfig().setMaxIdleSeconds(cacheTtlSeconds); - EvictionConfig evictionConfig = - new EvictionConfig() - .setMaxSizePolicy(MaxSizePolicy.PER_NODE) - .setSize(cacheMaxSize) - .setEvictionPolicy(EvictionPolicy.LFU); - mapConfig.setEvictionConfig(evictionConfig); - mapConfig.setName("default"); - config.addMapConfig(mapConfig); + hazelcastMapConfigs.forEach(config::addMapConfig); + hazelcastReplicatedMapConfigs.forEach(config::addReplicatedMapConfig); // Force classloader to load from application code config.setClassLoader(this.getClass().getClassLoader()); @@ -74,8 +73,44 @@ public CacheManager hazelcastCacheManager() { .setEnabled(true) .setProperty("service-dns", hazelcastServiceName); - HazelcastInstance hazelcastInstance = Hazelcast.newHazelcastInstance(config); + return Hazelcast.newHazelcastInstance(config); + } + @Bean + @ConditionalOnProperty(name = "searchService.cacheImplementation", havingValue = "hazelcast") + public CacheManager hazelcastCacheManager( + @Qualifier("hazelcastInstance") final HazelcastInstance hazelcastInstance) { return new HazelcastCacheManager(hazelcastInstance); } + + @Bean + @ConditionalOnProperty(name = "searchService.cacheImplementation", havingValue = "hazelcast") + public MapConfig defaultMapConfig() { + // TODO: This setting is equivalent to expireAfterAccess, refreshes timer after a get, put, + // containsKey etc. + // is this behavior what we actually desire? Should we change it now? + MapConfig mapConfig = new MapConfig().setMaxIdleSeconds(cacheTtlSeconds); + + EvictionConfig evictionConfig = + new EvictionConfig() + .setMaxSizePolicy(MaxSizePolicy.PER_NODE) + .setSize(cacheMaxSize) + .setEvictionPolicy(EvictionPolicy.LFU); + mapConfig.setEvictionConfig(evictionConfig); + mapConfig.setName("default"); + return mapConfig; + } + + @Bean + @ConditionalOnProperty(name = "searchService.cacheImplementation", havingValue = "hazelcast") + public ReplicatedMapConfig distributedThrottleMapConfig() { + ReplicatedMapConfig mapConfig = new ReplicatedMapConfig(); + mapConfig + .setName(THROTTLE_MAP) + .setInMemoryFormat(InMemoryFormat.OBJECT) + .setMergePolicyConfig( + new MergePolicyConfig().setPolicy(LatestUpdateMergePolicy.class.getName())); + + return mapConfig; + } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java index 51eea1578596bb..aa29908e415074 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java @@ -3,17 +3,21 @@ import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.dao.producer.KafkaEventProducer; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import java.util.List; import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.DependsOn; +@Slf4j @Configuration public class EntityServiceFactory { @@ -26,17 +30,35 @@ public class EntityServiceFactory { protected EntityService createInstance( @Qualifier("kafkaEventProducer") final KafkaEventProducer eventProducer, @Qualifier("entityAspectDao") final AspectDao aspectDao, - final ConfigurationProvider configurationProvider, - @Value("${featureFlags.showBrowseV2}") final boolean enableBrowsePathV2) { + @Qualifier("configurationProvider") ConfigurationProvider configurationProvider, + @Value("${featureFlags.showBrowseV2}") final boolean enableBrowsePathV2, + final List throttleSensors) { FeatureFlags featureFlags = configurationProvider.getFeatureFlags(); - return new EntityServiceImpl( - aspectDao, - eventProducer, - featureFlags.isAlwaysEmitChangeLog(), - featureFlags.getPreProcessHooks(), - _ebeanMaxTransactionRetry, - enableBrowsePathV2); + EntityServiceImpl entityService = + new EntityServiceImpl( + aspectDao, + eventProducer, + featureFlags.isAlwaysEmitChangeLog(), + featureFlags.getPreProcessHooks(), + _ebeanMaxTransactionRetry, + enableBrowsePathV2); + + if (throttleSensors != null + && !throttleSensors.isEmpty() + && configurationProvider + .getMetadataChangeProposal() + .getThrottle() + .getComponents() + .getApiRequests() + .isEnabled()) { + log.info("API Requests Throttle Enabled"); + throttleSensors.forEach(sensor -> sensor.addCallback(entityService::handleThrottleEvent)); + } else { + log.info("API Requests Throttle Disabled"); + } + + return entityService; } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/throttle/ManualThrottleSensor.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/throttle/ManualThrottleSensor.java new file mode 100644 index 00000000000000..72505beb5b40e1 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/throttle/ManualThrottleSensor.java @@ -0,0 +1,117 @@ +package com.linkedin.gms.factory.entity.throttle; + +import static com.linkedin.gms.factory.common.CacheConfig.THROTTLE_MAP; + +import com.hazelcast.core.EntryEvent; +import com.hazelcast.core.EntryListener; +import com.hazelcast.core.HazelcastInstance; +import com.hazelcast.map.MapEvent; +import com.hazelcast.replicatedmap.ReplicatedMap; +import com.linkedin.metadata.dao.throttle.ThrottleControl; +import com.linkedin.metadata.dao.throttle.ThrottleEvent; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; +import com.linkedin.metadata.dao.throttle.ThrottleType; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import org.springframework.stereotype.Component; + +/** + * Uses the distributed cache to propagate a manual throttle event when GMS is run in a distributed + * mode. + */ +@Component +public class ManualThrottleSensor implements ThrottleSensor { + private static final ThrottleEvent ENABLE = + ThrottleEvent.builder().throttled(Map.of(ThrottleType.MANUAL, true)).build(); + private static final ThrottleEvent DISABLE = + ThrottleEvent.builder().throttled(Map.of(ThrottleType.MANUAL, false)).build(); + + /** A list of throttle event listeners to execute when throttling occurs and ceases */ + private final List> throttleCallbacks = + new ArrayList<>(); + + private final Set registeredThrottles = new HashSet<>(); + + @Nullable private final ReplicatedMap throttleState; + + public ManualThrottleSensor(@Nullable final HazelcastInstance hazelcastInstance) { + if (hazelcastInstance != null) { + throttleState = hazelcastInstance.getReplicatedMap(THROTTLE_MAP); + throttleState.addEntryListener( + ManualThrottleTypeListener.builder().manualThrottleSensor(this).build()); + } else { + throttleState = null; + } + } + + @Override + public ManualThrottleSensor addCallback(Function callback) { + throttleCallbacks.add(callback); + return this; + } + + public void setThrottle(boolean enabled) { + if (throttleState == null) { + // set local only + setLocalThrottle(enabled); + } else { + // set shared location for distribution + throttleState.put(ThrottleType.MANUAL.toString(), enabled ? "true" : "false"); + } + } + + private void setLocalThrottle(boolean enabled) { + synchronized (this) { + registeredThrottles.forEach(listener -> listener.execute(DISABLE)); + registeredThrottles.clear(); + + if (enabled) { + registeredThrottles.addAll( + throttleCallbacks.stream() + .map(listener -> listener.apply(ENABLE)) + .collect(Collectors.toSet())); + } + } + } + + @Builder + private record ManualThrottleTypeListener(@Nonnull ManualThrottleSensor manualThrottleSensor) + implements EntryListener { + @Override + public void entryAdded(EntryEvent event) { + if (ThrottleType.MANUAL.equals(ThrottleType.valueOf(event.getKey()))) { + manualThrottleSensor.setLocalThrottle(Boolean.parseBoolean(event.getValue())); + } + } + + @Override + public void entryUpdated(EntryEvent event) { + if (ThrottleType.MANUAL.equals(ThrottleType.valueOf(event.getKey()))) { + manualThrottleSensor.setLocalThrottle(Boolean.parseBoolean(event.getValue())); + } + } + + @Override + public void entryRemoved(EntryEvent event) {} + + @Override + public void entryEvicted(EntryEvent entryEvent) {} + + @Override + public void entryExpired(EntryEvent entryEvent) {} + + @Override + public void mapCleared(MapEvent mapEvent) {} + + @Override + public void mapEvicted(MapEvent mapEvent) {} + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaProducerThrottleFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaThrottleFactory.java similarity index 59% rename from metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaProducerThrottleFactory.java rename to metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaThrottleFactory.java index 1eaff82fd517f0..e2cdca8a065c03 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaProducerThrottleFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaThrottleFactory.java @@ -1,15 +1,16 @@ package com.linkedin.gms.factory.kafka.throttle; -import com.datahub.metadata.dao.producer.KafkaProducerThrottle; +import com.datahub.metadata.dao.throttle.KafkaThrottleSensor; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.config.MetadataChangeProposalConfig; import com.linkedin.metadata.config.kafka.KafkaConfiguration; +import com.linkedin.metadata.dao.throttle.NoOpSensor; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.mxe.Topics; import java.util.Arrays; import java.util.HashMap; import java.util.Map; -import java.util.Optional; import lombok.extern.slf4j.Slf4j; import org.apache.kafka.clients.admin.AdminClient; import org.apache.kafka.clients.admin.AdminClientConfig; @@ -19,19 +20,14 @@ import org.springframework.boot.autoconfigure.kafka.KafkaProperties; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.kafka.config.KafkaListenerEndpointRegistry; -import org.springframework.kafka.listener.MessageListenerContainer; @Slf4j @Configuration -public class KafkaProducerThrottleFactory { +public class KafkaThrottleFactory { @Value("${METADATA_CHANGE_LOG_KAFKA_CONSUMER_GROUP_ID:generic-mae-consumer-job-client}") private String maeConsumerGroupId; - @Value("${METADATA_CHANGE_PROPOSAL_KAFKA_CONSUMER_GROUP_ID:generic-mce-consumer-job-client}") - private String mceConsumerGroupId; - @Value("${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_VERSIONED + "}") private String versionedTopicName; @@ -39,41 +35,28 @@ public class KafkaProducerThrottleFactory { "${METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_TIMESERIES + "}") private String timeseriesTopicName; - @Bean - public KafkaProducerThrottle kafkaProducerThrottle( + @Bean("kafkaThrottle") + public ThrottleSensor kafkaThrottle( @Qualifier("configurationProvider") ConfigurationProvider provider, final KafkaProperties kafkaProperties, - final EntityRegistry entityRegistry, - final KafkaListenerEndpointRegistry registry) { + final EntityRegistry entityRegistry) { KafkaConfiguration kafkaConfiguration = provider.getKafka(); MetadataChangeProposalConfig mcpConfig = provider.getMetadataChangeProposal(); - return KafkaProducerThrottle.builder() - .entityRegistry(entityRegistry) - .kafkaAdmin(kafkaAdmin(kafkaConfiguration, kafkaProperties)) - .config(mcpConfig.getThrottle()) - .mclConsumerGroupId(maeConsumerGroupId) - .timeseriesTopicName(timeseriesTopicName) - .versionedTopicName(versionedTopicName) - .pauseConsumer( - (pause) -> { - Optional container = - Optional.ofNullable(registry.getListenerContainer(mceConsumerGroupId)); - if (container.isEmpty()) { - log.warn( - "Expected container was missing: {} throttling is not possible.", - mceConsumerGroupId); - } else { - if (pause) { - container.ifPresent(MessageListenerContainer::pause); - } else { - container.ifPresent(MessageListenerContainer::resume); - } - } - }) - .build() - .start(); + if (mcpConfig.getThrottle().getUpdateIntervalMs() > 0) { + return KafkaThrottleSensor.builder() + .entityRegistry(entityRegistry) + .kafkaAdmin(kafkaAdmin(kafkaConfiguration, kafkaProperties)) + .config(mcpConfig.getThrottle()) + .mclConsumerGroupId(maeConsumerGroupId) + .timeseriesTopicName(timeseriesTopicName) + .versionedTopicName(versionedTopicName) + .build() + .start(); + } else { + return new NoOpSensor(); + } } private static AdminClient kafkaAdmin( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java index 0e9fcbe15b525b..dc4726900a1c31 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java @@ -1,11 +1,13 @@ package io.datahubproject.openapi; +import com.linkedin.metadata.dao.throttle.APIThrottleException; import io.datahubproject.openapi.exception.InvalidUrnException; import java.util.Map; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.ConversionNotSupportedException; import org.springframework.core.Ordered; import org.springframework.core.convert.ConversionFailedException; +import org.springframework.http.HttpHeaders; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ControllerAdvice; @@ -30,4 +32,17 @@ public ResponseEntity handleConflict(RuntimeException ex) { public static ResponseEntity> handleUrnException(InvalidUrnException e) { return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.BAD_REQUEST); } + + @ExceptionHandler(APIThrottleException.class) + public static ResponseEntity> handleThrottleException( + APIThrottleException e) { + + HttpHeaders headers = new HttpHeaders(); + if (e.getDurationMs() >= 0) { + headers.add(HttpHeaders.RETRY_AFTER, String.valueOf(e.getDurationSeconds())); + } + + return new ResponseEntity<>( + Map.of("error", e.getMessage()), headers, HttpStatus.TOO_MANY_REQUESTS); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/throttle/ThrottleController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/throttle/ThrottleController.java new file mode 100644 index 00000000000000..3c44d94428f428 --- /dev/null +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/throttle/ThrottleController.java @@ -0,0 +1,113 @@ +package io.datahubproject.openapi.operations.throttle; + +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.AuthUtil; +import com.datahub.authorization.AuthorizerChain; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.gms.factory.entity.throttle.ManualThrottleSensor; +import com.linkedin.metadata.authorization.PoliciesConfig; +import com.linkedin.metadata.entity.EntityServiceImpl; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import jakarta.servlet.http.HttpServletRequest; +import java.util.List; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequestMapping("/operations/throttle") +@Slf4j +@Tag(name = "GMS Throttle Control", description = "An API for GMS throttle control.") +public class ThrottleController { + + private final OperationContext systemOperationContext; + private final AuthorizerChain authorizerChain; + private final EntityServiceImpl entityService; + private final ObjectMapper objectMapper; + private final ManualThrottleSensor manualThrottleSensor; + + public ThrottleController( + @Qualifier("systemOperationContext") OperationContext systemOperationContext, + EntityServiceImpl entityService, + AuthorizerChain authorizerChain, + ObjectMapper objectMapper, + ManualThrottleSensor manualThrottleSensor) { + this.systemOperationContext = systemOperationContext; + this.authorizerChain = authorizerChain; + this.entityService = entityService; + this.objectMapper = objectMapper; + this.manualThrottleSensor = manualThrottleSensor; + } + + @Tag(name = "API Requests") + @GetMapping(path = "/requests", produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Get API Requests Throttle") + public ResponseEntity> getManualAPIRequestsThrottle( + HttpServletRequest httpServletRequest) { + Authentication authentication = AuthenticationContext.getAuthentication(); + String actorUrnStr = authentication.getActor().toUrnStr(); + + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + actorUrnStr, httpServletRequest, "getManualAPIRequestsThrottle", List.of()), + authorizerChain, + authentication, + true); + + if (!AuthUtil.isAPIAuthorized(opContext, PoliciesConfig.MANAGE_SYSTEM_OPERATIONS_PRIVILEGE)) { + return ResponseEntity.status(HttpStatus.FORBIDDEN) + .body( + Map.of( + "error", + String.format(actorUrnStr + " is not authorized for system operations."))); + } + + return ResponseEntity.ok( + objectMapper.convertValue(entityService.getThrottleEvents(), new TypeReference<>() {})); + } + + @Tag(name = "API Requests") + @PostMapping(path = "/requests/manual", produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Set API Requests Manual Throttle") + public ResponseEntity> setAPIRequestManualThrottle( + HttpServletRequest httpServletRequest, @RequestParam(name = "enabled") boolean enabled) { + + Authentication authentication = AuthenticationContext.getAuthentication(); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + httpServletRequest, + "getManualAPIRequestsThrottle", + List.of()), + authorizerChain, + authentication, + true); + + if (!AuthUtil.isAPIAuthorized(opContext, PoliciesConfig.MANAGE_SYSTEM_OPERATIONS_PRIVILEGE)) { + return ResponseEntity.status(HttpStatus.FORBIDDEN).build(); + } + + manualThrottleSensor.setThrottle(enabled); + + return getManualAPIRequestsThrottle(httpServletRequest); + } +} diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index 69bfe288da7a7a..42265e902cc6fd 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -29,7 +29,7 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.resources.operations.Utils; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.mxe.MetadataChangeProposal; @@ -133,7 +133,7 @@ public Task get( throws URISyntaxException { log.info("GET ASPECT urn: {} aspect: {} version: {}", urnStr, aspectName, version); final Urn urn = Urn.createFromString(urnStr); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { Authentication auth = AuthenticationContext.getAuthentication(); @@ -153,7 +153,7 @@ public Task get( _entityService.getVersionedAspect(opContext, urn, aspectName, version); if (aspect == null) { log.warn("Did not find urn: {} aspect: {} version: {}", urn, aspectName, version); - throw RestliUtil.nonExceptionResourceNotFound(); + throw RestliUtils.nonExceptionResourceNotFound(); } return new AnyRecord(aspect.data()); }, @@ -183,7 +183,7 @@ public Task getTimeseriesAspectValues( endTimeMillis, limit); final Urn urn = Urn.createFromString(urnStr); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { Authentication auth = AuthenticationContext.getAuthentication(); @@ -298,7 +298,7 @@ private Task ingestProposals( final AuditStamp auditStamp = new AuditStamp().setTime(_clock.millis()).setActor(Urn.createFromString(actorUrnStr)); - return RestliUtil.toTask(() -> { + return RestliUtils.toTask(() -> { log.debug("Proposals: {}", metadataChangeProposals); try { final AspectsBatch batch = AspectsBatchImpl.builder() @@ -332,7 +332,7 @@ private Task ingestProposals( public Task getCount( @ActionParam(PARAM_ASPECT) @Nonnull String aspectName, @ActionParam(PARAM_URN_LIKE) @Optional @Nullable String urnLike) { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { Authentication authentication = AuthenticationContext.getAuthentication(); @@ -364,7 +364,7 @@ public Task restoreIndices( @ActionParam("limit") @Optional @Nullable Integer limit, @ActionParam("gePitEpochMs") @Optional @Nullable Long gePitEpochMs, @ActionParam("lePitEpochMs") @Optional @Nullable Long lePitEpochMs) { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { Authentication authentication = AuthenticationContext.getAuthentication(); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java index 69c789ceb2a3cd..ebbfc6bb6c2983 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java @@ -18,7 +18,7 @@ import com.linkedin.entity.EnvelopedAspect; import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.run.AspectRowSummaryArray; import com.linkedin.metadata.run.IngestionRunSummary; @@ -108,7 +108,7 @@ public Task rollback( "Both Safe & hardDelete flags were defined, honouring safe flag as hardDelete is deprecated"); } try { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { try { @@ -136,7 +136,7 @@ public Task list( @ActionParam("includeSoft") @Optional @Nullable Boolean includeSoft) { log.info("LIST RUNS offset: {} size: {}", pageOffset, pageSize); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { List summaries = systemMetadataService.listRuns( @@ -160,7 +160,7 @@ public Task describe( @ActionParam("includeAspect") @Optional @Nullable Boolean includeAspect) { log.info("DESCRIBE RUN runId: {}, start: {}, count: {}", runId, start, count); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { Authentication auth = AuthenticationContext.getAuthentication(); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index a8f127e52ee791..16901853245604 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -21,6 +21,7 @@ import com.datahub.authorization.AuthUtil; import com.datahub.authorization.EntitySpec; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.utils.SystemMetadataUtils; import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.metadata.services.RestrictedService; @@ -52,7 +53,7 @@ import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.run.AspectRowSummaryArray; import com.linkedin.metadata.run.DeleteEntityResponse; @@ -205,7 +206,7 @@ public Task get( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity " + urn); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Set projectedAspects = aspectNames == null @@ -213,7 +214,7 @@ public Task get( : new HashSet<>(Arrays.asList(aspectNames)); final Entity entity = entityService.getEntity(opContext, urn, projectedAspects, true); if (entity == null) { - throw RestliUtil.resourceNotFoundException(String.format("Did not find %s", urnStr)); + throw RestliUtils.resourceNotFoundException(String.format("Did not find %s", urnStr)); } return new AnyRecord(entity.data()); }, @@ -246,7 +247,7 @@ public Task> batchGet( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entities: " + urnStrs); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Set projectedAspects = aspectNames == null @@ -296,7 +297,7 @@ public Task ingest( // variables referenced in lambdas are required to be final final SystemMetadata finalSystemMetadata = systemMetadata; - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { entityService.ingestEntity(opContext, entity, auditStamp, finalSystemMetadata); return null; @@ -345,7 +346,7 @@ public Task batchIngest( } if (entities.length != systemMetadataList.length) { - throw RestliUtil.invalidArgumentsException("entities and systemMetadata length must match"); + throw RestliUtils.invalidArgumentsException("entities and systemMetadata length must match"); } final List finalSystemMetadataList = @@ -353,7 +354,7 @@ public Task batchIngest( .map(SystemMetadataUtils::generateSystemMetadataIfEmpty) .collect(Collectors.toList()); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { entityService.ingestEntities(opContext, Arrays.asList(entities), auditStamp, finalSystemMetadataList); @@ -394,7 +395,7 @@ public Task search( log.info("GET SEARCH RESULTS for {} with query {}", entityName, input); // TODO - change it to use _searchService once we are confident on it's latency - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final SearchResult result; // This API is not used by the frontend for search bars so we default to structured @@ -445,7 +446,7 @@ public Task searchAcrossEntities( List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); log.info("GET SEARCH RESULTS ACROSS ENTITIES for {} with query {}", entityList, input); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { SearchResult result = searchService.searchAcrossEntities(opContext, entityList, input, filter, sortCriterionList, start, count); if (!isAPIAuthorizedResult( @@ -507,7 +508,7 @@ public Task scrollAcrossEntities( input, scrollId); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { ScrollResult result = searchService.scrollAcrossEntities( opContext, @@ -574,7 +575,7 @@ public Task searchAcrossLineage( direction, entityList, input); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> validateLineageSearchResult(opContext, lineageSearchService.searchAcrossLineage( opContext, urn, @@ -637,7 +638,7 @@ public Task scrollAcrossLineage( List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> validateLineageScrollResult(opContext, lineageSearchService.scrollAcrossLineage( @@ -683,7 +684,7 @@ public Task list( List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); log.info("GET LIST RESULTS for {} with filter {}", entityName, filter); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { SearchResult result = entitySearchService.filter(opContext, entityName, filter, sortCriterionList, start, count); if (!AuthUtil.isAPIAuthorizedResult( @@ -722,7 +723,7 @@ public Task autocomplete( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { AutoCompleteResult result = entitySearchService.autoComplete(opContext, entityName, query, field, filter, limit); if (!isAPIAuthorizedResult( @@ -760,7 +761,7 @@ public Task browse( } log.info("GET BROWSE RESULTS for {} at path {}", entityName, path); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { BrowseResult result = entitySearchService.browse(opContext, entityName, path, filter, start, limit); if (!isAPIAuthorizedResult( @@ -796,7 +797,7 @@ public Task getBrowsePaths( } log.info("GET BROWSE PATHS for {}", urn); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> new StringArray(entitySearchService.getBrowsePaths(opContext, urnToEntityName(urn), urn)), MetricRegistry.name(this.getClass(), "getBrowsePaths")); } @@ -836,7 +837,7 @@ public Task deleteEntities( ComparableVersion finalRegistryVersion = registryVersion; String finalRegistryName1 = registryName; ComparableVersion finalRegistryVersion1 = registryVersion; - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { RollbackResponse response = new RollbackResponse(); List aspectRowsToDelete = @@ -918,7 +919,7 @@ public Task deleteEntity( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to delete entity: " + urnStr); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { // Find the timeseries aspects to delete. If aspectName is null, delete all. List timeseriesAspectNames = @@ -1038,7 +1039,7 @@ public Task deleteReferencesTo( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to delete entity " + urnStr); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> deleteEntityService.deleteReferencesTo(opContext, urn, dryRun), MetricRegistry.name(this.getClass(), "deleteReferences")); } @@ -1064,7 +1065,7 @@ public Task setWriteable( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to enable and disable write mode."); } log.info("setting entity resource to be writable"); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { entityService.setWritable(value); return null; @@ -1088,7 +1089,7 @@ public Task getTotalEntityCount(@ActionParam(PARAM_ENTITY) @Nonnull String HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity counts."); } - return RestliUtil.toTask(() -> entitySearchService.docCount(opContext, entityName)); + return RestliUtils.toTask(() -> entitySearchService.docCount(opContext, entityName)); } @Action(name = "batchGetTotalEntityCount") @@ -1108,7 +1109,7 @@ public Task batchGetTotalEntityCount( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity counts."); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> new LongMap(searchService.docCountPerEntity(opContext, Arrays.asList(entityNames)))); } @@ -1134,7 +1135,7 @@ public Task listUrns( } log.info("LIST URNS for {} with start {} and count {}", entityName, start, count); - return RestliUtil.toTask(() -> { + return RestliUtils.toTask(() -> { ListUrnsResult result = entityService.listUrns(opContext, entityName, start, count); if (!isAPIAuthorizedEntityUrns( opContext, @@ -1175,7 +1176,7 @@ public Task applyRetention( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to apply retention."); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> entityService.batchApplyRetention(opContext, start, count, attemptWithVersion, aspectName, urn), ACTION_APPLY_RETENTION); } @@ -1205,7 +1206,7 @@ public Task filter( List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); log.info("FILTER RESULTS for {} with filter {}", entityName, filter); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { SearchResult result = entitySearchService.filter(opContext.withSearchFlags(flags -> flags.setFulltext(true)), entityName, filter, sortCriterionList, start, count); @@ -1242,7 +1243,7 @@ public Task exists(@ActionParam(PARAM_URN) @Nonnull String urnStr, @Act log.info("EXISTS for {}", urnStr); final boolean includeRemoved = includeSoftDelete == null || includeSoftDelete; - return RestliUtil.toTask( + return RestliUtils.toTask( () -> entityService.exists(opContext, urn, includeRemoved), MetricRegistry.name(this.getClass(), "exists")); } } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java index 1afe062ce5c5f4..20209ddf44d643 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java @@ -16,7 +16,7 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.server.RestLiServiceException; @@ -82,7 +82,7 @@ public Task get( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity " + urn); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final String entityName = urnToEntityName(urn); final Set projectedAspects = @@ -131,7 +131,7 @@ public Task> batchGet( return Task.value(Collections.emptyMap()); } final String entityName = urnToEntityName(urns.iterator().next()); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Set projectedAspects = aspectNames == null diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java index d253ef69680339..73b2d1a6c5cb87 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java @@ -18,7 +18,7 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.server.RestLiServiceException; @@ -96,7 +96,7 @@ public Task> batchGetVersioned( if (versionedUrnStrs.size() <= 0) { return Task.value(Collections.emptyMap()); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Set projectedAspects = aspectNames == null diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java index ba8baacdd9c920..738f33db63a8b7 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java @@ -28,7 +28,7 @@ import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.graph.RelatedEntitiesResult; import com.linkedin.metadata.query.filter.RelationshipDirection; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; @@ -137,7 +137,7 @@ public Task get( } RelationshipDirection direction = RelationshipDirection.valueOf(rawDirection); final List relationshipTypes = Arrays.asList(relationshipTypesParam); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final RelatedEntitiesResult relatedEntitiesResult = getRelatedEntities(rawUrn, relationshipTypes, direction, start, count); @@ -214,7 +214,7 @@ public Task getLineage( throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity lineage: " + urnStr); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> _graphService.getLineage( urn, diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java index 5f4bb46ff626d1..ea329ce0809fba 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java @@ -17,7 +17,7 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.Filter; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.BatchWriteOperationsOptions; @@ -106,7 +106,7 @@ public Task restoreIndices( @ActionParam("limit") @Optional @Nullable Integer limit, @ActionParam("gePitEpochMs") @Optional @Nullable Long gePitEpochMs, @ActionParam("lePitEpochMs") @Optional @Nullable Long lePitEpochMs) { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> Utils.restoreIndices(systemOperationContext, getContext(), aspectName, urn, urnLike, start, batchSize, limit, gePitEpochMs, lePitEpochMs, _authorizer, _entityService), MetricRegistry.name(this.getClass(), "restoreIndices")); @@ -131,7 +131,7 @@ public Task getTaskStatus( @ActionParam(PARAM_NODE_ID) @Optional String nodeId, @ActionParam(PARAM_TASK_ID) @Optional("0") long taskId, @ActionParam(PARAM_TASK) @Optional String task) { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Authentication auth = AuthenticationContext.getAuthentication(); @@ -194,7 +194,7 @@ public Task getTaskStatus( @Nonnull @WithSpan public Task getIndexSizes() { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Authentication auth = AuthenticationContext.getAuthentication(); @@ -319,7 +319,7 @@ public Task truncateTimeseriesAspect( @ActionParam(PARAM_TIMEOUT_SECONDS) @Optional @Nullable Long timeoutSeconds, @ActionParam(PARAM_FORCE_DELETE_BY_QUERY) @Optional @Nullable Boolean forceDeleteByQuery, @ActionParam(PARAM_FORCE_REINDEX) @Optional @Nullable Boolean forceReindex) { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> executeTruncateTimeseriesAspect( entityType, diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java index 986783e6359f2f..46fab05133651a 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java @@ -10,7 +10,7 @@ import com.linkedin.metadata.authorization.Disjunctive; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.event.EventProducer; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.mxe.PlatformEvent; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; @@ -70,7 +70,7 @@ public Task producePlatformEvent( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to produce platform events."); } log.info(String.format("Emitting platform event. name: %s, key: %s", eventName, key)); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { _eventProducer.producePlatformEvent(eventName, key, event); return null; diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java index 0ca8eb49308b32..185874fac1382d 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java @@ -1,5 +1,10 @@ package com.linkedin.metadata.resources.restli; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import com.linkedin.metadata.dao.throttle.APIThrottleException; +import com.linkedin.metadata.restli.NonExceptionHttpErrorResponse; +import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.server.RestLiServiceException; @@ -27,20 +32,41 @@ public static Task toTask(@Nonnull Supplier supplier) { return Task.value(supplier.get()); } catch (Throwable throwable) { + final RestLiServiceException finalException; + // Convert IllegalArgumentException to BAD REQUEST if (throwable instanceof IllegalArgumentException || throwable.getCause() instanceof IllegalArgumentException) { - throwable = badRequestException(throwable.getMessage()); - } - - if (throwable instanceof RestLiServiceException) { - throw (RestLiServiceException) throwable; + finalException = badRequestException(throwable.getMessage()); + } else if (throwable instanceof APIThrottleException) { + finalException = apiThrottled(throwable.getMessage()); + } else if (throwable instanceof RestLiServiceException) { + finalException = (RestLiServiceException) throwable; + } else { + finalException = new RestLiServiceException(HttpStatus.S_500_INTERNAL_SERVER_ERROR, throwable); } - throw new RestLiServiceException(HttpStatus.S_500_INTERNAL_SERVER_ERROR, throwable); + throw finalException; } } + @Nonnull + public static Task toTask(@Nonnull Supplier supplier, String metricName) { + Timer.Context context = MetricUtils.timer(metricName).time(); + // Stop timer on success and failure + return toTask(supplier) + .transform( + orig -> { + context.stop(); + if (orig.isFailed()) { + MetricUtils.counter(MetricRegistry.name(metricName, "failed")).inc(); + } else { + MetricUtils.counter(MetricRegistry.name(metricName, "success")).inc(); + } + return orig; + }); + } + /** * Similar to {@link #toTask(Supplier)} but the supplier is expected to return an {@link Optional} * instead. A {@link RestLiServiceException} with 404 HTTP status code will be thrown if the @@ -59,6 +85,11 @@ public static RestLiServiceException resourceNotFoundException() { return resourceNotFoundException(null); } + @Nonnull + public static RestLiServiceException nonExceptionResourceNotFound() { + return new NonExceptionHttpErrorResponse(HttpStatus.S_404_NOT_FOUND); + } + @Nonnull public static RestLiServiceException resourceNotFoundException(@Nullable String message) { return new RestLiServiceException(HttpStatus.S_404_NOT_FOUND, message); @@ -73,4 +104,9 @@ public static RestLiServiceException badRequestException(@Nullable String messag public static RestLiServiceException invalidArgumentsException(@Nullable String message) { return new RestLiServiceException(HttpStatus.S_412_PRECONDITION_FAILED, message); } + + @Nonnull + public static RestLiServiceException apiThrottled(@Nullable String message) { + return new RestLiServiceException(HttpStatus.S_429_TOO_MANY_REQUESTS, message); + } } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java index 8cf1b07d971d93..a0c3d460951605 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java @@ -24,7 +24,7 @@ import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.UsageServiceUtil; import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer; @@ -100,7 +100,7 @@ public class UsageStats extends SimpleResourceTemplate { @WithSpan public Task batchIngest(@ActionParam(PARAM_BUCKETS) @Nonnull UsageAggregation[] buckets) { log.info("Ingesting {} usage stats aggregations", buckets.length); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Authentication auth = AuthenticationContext.getAuthentication(); @@ -140,7 +140,7 @@ public Task query( log.info( "Querying usage stats for resource: {}, duration: {}, start time: {}, end time: {}, max buckets: {}", resource, duration, startTime, endTime, maxBuckets); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { Urn resourceUrn = UrnUtils.getUrn(resource); @@ -185,7 +185,7 @@ public Task queryRange( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to query usage."); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> UsageServiceUtil.queryRange(opContext, _timeseriesAspectService, resource, duration, range), MetricRegistry.name(this.getClass(), "queryRange")); } diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json index e62d0a33e7cd05..e0f26b908c4991 100644 --- a/metadata-service/war/src/main/resources/boot/policies.json +++ b/metadata-service/war/src/main/resources/boot/policies.json @@ -37,7 +37,8 @@ "MANAGE_BUSINESS_ATTRIBUTE", "MANAGE_STRUCTURED_PROPERTIES", "MANAGE_DOCUMENTATION_FORMS", - "MANAGE_FEATURES" + "MANAGE_FEATURES", + "MANAGE_SYSTEM_OPERATIONS" ], "displayName": "Root User - All Platform Privileges", "description": "Grants all platform privileges to root user.", diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index a282c6be673d0e..7a5a34d0f36301 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -167,6 +167,12 @@ public class PoliciesConfig { Privilege.of( "MANAGE_FEATURES", "Manage Features", "Umbrella privilege to manage all features."); + public static final Privilege MANAGE_SYSTEM_OPERATIONS_PRIVILEGE = + Privilege.of( + "MANAGE_SYSTEM_OPERATIONS", + "Manage System Operations", + "Allow access to system operations APIs and controls."); + public static final List PLATFORM_PRIVILEGES = ImmutableList.of( MANAGE_POLICIES_PRIVILEGE, @@ -194,7 +200,8 @@ public class PoliciesConfig { MANAGE_CONNECTIONS_PRIVILEGE, MANAGE_STRUCTURED_PROPERTIES_PRIVILEGE, MANAGE_DOCUMENTATION_FORMS_PRIVILEGE, - MANAGE_FEATURES_PRIVILEGE); + MANAGE_FEATURES_PRIVILEGE, + MANAGE_SYSTEM_OPERATIONS_PRIVILEGE); // Resource Privileges // diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/restli/RestliUtil.java b/metadata-utils/src/main/java/com/linkedin/metadata/restli/RestliUtil.java deleted file mode 100644 index c9b1d5a8a82de5..00000000000000 --- a/metadata-utils/src/main/java/com/linkedin/metadata/restli/RestliUtil.java +++ /dev/null @@ -1,101 +0,0 @@ -package com.linkedin.metadata.restli; - -import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; -import com.linkedin.metadata.utils.metrics.MetricUtils; -import com.linkedin.parseq.Task; -import com.linkedin.restli.common.HttpStatus; -import com.linkedin.restli.server.RestLiServiceException; -import java.util.Optional; -import java.util.function.Supplier; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - -public class RestliUtil { - - private RestliUtil() { - // Utils class - } - - /** - * Executes the provided supplier and convert the results to a {@link Task}. Exceptions thrown - * during the execution will be properly wrapped in {@link RestLiServiceException}. - * - * @param supplier The supplier to execute - * @return A parseq {@link Task} - */ - @Nonnull - public static Task toTask(@Nonnull Supplier supplier) { - try { - return Task.value(supplier.get()); - } catch (Throwable throwable) { - - // Convert IllegalArgumentException to BAD REQUEST - if (throwable instanceof IllegalArgumentException - || throwable.getCause() instanceof IllegalArgumentException) { - throwable = badRequestException(throwable.getMessage()); - } - - if (throwable instanceof RestLiServiceException) { - throw (RestLiServiceException) throwable; - } - - throw new RestLiServiceException(HttpStatus.S_500_INTERNAL_SERVER_ERROR, throwable); - } - } - - @Nonnull - public static Task toTask(@Nonnull Supplier supplier, String metricName) { - Timer.Context context = MetricUtils.timer(metricName).time(); - // Stop timer on success and failure - return toTask(supplier) - .transform( - orig -> { - context.stop(); - if (orig.isFailed()) { - MetricUtils.counter(MetricRegistry.name(metricName, "failed")).inc(); - } else { - MetricUtils.counter(MetricRegistry.name(metricName, "success")).inc(); - } - return orig; - }); - } - - /** - * Similar to {@link #toTask(Supplier)} but the supplier is expected to return an {@link Optional} - * instead. A {@link RestLiServiceException} with 404 HTTP status code will be thrown if the - * optional is emtpy. - * - * @param supplier The supplier to execute - * @return A parseq {@link Task} - */ - @Nonnull - public static Task toTaskFromOptional(@Nonnull Supplier> supplier) { - return toTask(() -> supplier.get().orElseThrow(RestliUtil::resourceNotFoundException)); - } - - @Nonnull - public static RestLiServiceException resourceNotFoundException() { - return resourceNotFoundException(null); - } - - @Nonnull - public static RestLiServiceException nonExceptionResourceNotFound() { - return new NonExceptionHttpErrorResponse(HttpStatus.S_404_NOT_FOUND); - } - - @Nonnull - public static RestLiServiceException resourceNotFoundException(@Nullable String message) { - return new RestLiServiceException(HttpStatus.S_404_NOT_FOUND, message); - } - - @Nonnull - public static RestLiServiceException badRequestException(@Nullable String message) { - return new RestLiServiceException(HttpStatus.S_400_BAD_REQUEST, message); - } - - @Nonnull - public static RestLiServiceException invalidArgumentsException(@Nullable String message) { - return new RestLiServiceException(HttpStatus.S_412_PRECONDITION_FAILED, message); - } -}