From b9b2f79c30f628b1aae7e4422fcd4f7d9d949cb0 Mon Sep 17 00:00:00 2001 From: tabVersion Date: Tue, 22 Oct 2024 21:37:18 +0800 Subject: [PATCH 01/51] share kafka client on meta Signed-off-by: tabVersion --- .../src/source/kafka/enumerator/client.rs | 81 +++++++++++++------ src/meta/src/stream/source_manager.rs | 27 +++++++ 2 files changed, 83 insertions(+), 25 deletions(-) diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index a425de418ef4a..67b1dd3748864 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::collections::HashMap; +use std::sync::{Arc, LazyLock}; use std::time::Duration; use anyhow::{anyhow, Context}; @@ -32,6 +33,16 @@ use crate::source::kafka::{ }; use crate::source::SourceEnumeratorContextRef; +pub static SHARED_KAFKA_CLIENT: LazyLock>> = + LazyLock::new(|| tokio::sync::Mutex::new(HashMap::new())); + +#[derive(Clone)] +pub struct SharedKafkaItem { + pub client: Arc>, + pub ref_count: i32, + pub props: KafkaProperties, +} + #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum KafkaEnumeratorOffset { Earliest, @@ -44,7 +55,7 @@ pub struct KafkaSplitEnumerator { context: SourceEnumeratorContextRef, broker_address: String, topic: String, - client: BaseConsumer, + client: Arc>, start_offset: KafkaEnumeratorOffset, // maybe used in the future for batch processing @@ -94,36 +105,56 @@ impl SplitEnumerator for KafkaSplitEnumerator { scan_start_offset = KafkaEnumeratorOffset::Timestamp(time_offset) } - // don't need kafka metrics from enumerator - let ctx_common = KafkaContextCommon::new( - broker_rewrite_map, - None, - None, - properties.aws_auth_props, - common_props.is_aws_msk_iam(), - ) - .await?; - let client_ctx = RwConsumerContext::new(ctx_common); - let client: BaseConsumer = - config.create_with_context(client_ctx).await?; - - // Note that before any SASL/OAUTHBEARER broker connection can succeed the application must call - // rd_kafka_oauthbearer_set_token() once – either directly or, more typically, by invoking either - // rd_kafka_poll(), rd_kafka_consumer_poll(), rd_kafka_queue_poll(), etc, in order to cause retrieval - // of an initial token to occur. - // https://docs.confluent.io/platform/current/clients/librdkafka/html/rdkafka_8h.html#a988395722598f63396d7a1bedb22adaf - if common_props.is_aws_msk_iam() { - #[cfg(not(madsim))] - client.poll(Duration::from_secs(10)); // note: this is a blocking call - #[cfg(madsim)] - client.poll(Duration::from_secs(10)).await; + let kafka_client: Arc>; + if let Some(item) = SHARED_KAFKA_CLIENT + .lock() + .await + .get_mut(broker_address.as_str()) + { + kafka_client = item.client.clone(); + item.ref_count += 1; + } else { + // don't need kafka metrics from enumerator + let ctx_common = KafkaContextCommon::new( + broker_rewrite_map, + None, + None, + properties.aws_auth_props.clone(), + common_props.is_aws_msk_iam(), + ) + .await?; + let client_ctx = RwConsumerContext::new(ctx_common); + let client: BaseConsumer = + config.create_with_context(client_ctx).await?; + + // Note that before any SASL/OAUTHBEARER broker connection can succeed the application must call + // rd_kafka_oauthbearer_set_token() once – either directly or, more typically, by invoking either + // rd_kafka_poll(), rd_kafka_consumer_poll(), rd_kafka_queue_poll(), etc, in order to cause retrieval + // of an initial token to occur. + // https://docs.confluent.io/platform/current/clients/librdkafka/html/rdkafka_8h.html#a988395722598f63396d7a1bedb22adaf + if common_props.is_aws_msk_iam() { + #[cfg(not(madsim))] + client.poll(Duration::from_secs(10)); // note: this is a blocking call + #[cfg(madsim)] + client.poll(Duration::from_secs(10)).await; + } + + kafka_client = Arc::new(client); + SHARED_KAFKA_CLIENT.lock().await.insert( + broker_address.clone(), + SharedKafkaItem { + client: kafka_client.clone(), + ref_count: 1, + props: properties.clone(), + }, + ); } Ok(Self { context, broker_address, topic, - client, + client: kafka_client, start_offset: scan_start_offset, stop_offset: KafkaEnumeratorOffset::None, sync_call_timeout: properties.common.sync_call_timeout, diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index c5bcc0c179ba3..6f4b810997990 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -24,6 +24,7 @@ use anyhow::Context; use risingwave_common::catalog::TableId; use risingwave_common::metrics::LabelGuardedIntGauge; use risingwave_connector::error::ConnectorResult; +use risingwave_connector::source::kafka::SHARED_KAFKA_CLIENT; use risingwave_connector::source::{ ConnectorProperties, SourceEnumeratorContext, SourceEnumeratorInfo, SourceProperties, SplitEnumerator, SplitId, SplitImpl, SplitMetaData, @@ -110,6 +111,7 @@ pub async fn create_source_worker_handle( let current_splits_ref = splits.clone(); let connector_properties = extract_prop_from_new_source(source)?; + let share_client_entry = get_kafka_bootstrap_addr(&connector_properties); let enable_scale_in = connector_properties.enable_split_scale_in(); let (sync_call_tx, sync_call_rx) = tokio::sync::mpsc::unbounded_channel(); let handle = dispatch_source_prop!(connector_properties, prop, { @@ -144,6 +146,7 @@ pub async fn create_source_worker_handle( sync_call_tx, splits, enable_scale_in, + share_client_entry, }) } @@ -264,6 +267,7 @@ pub struct ConnectorSourceWorkerHandle { sync_call_tx: UnboundedSender>>, splits: SharedSplitMapRef, enable_scale_in: bool, + pub share_client_entry: Option, } impl ConnectorSourceWorkerHandle { @@ -1033,6 +1037,15 @@ impl SourceManager { for source_id in source_ids { if let Some(handle) = core.managed_sources.remove(&source_id) { handle.handle.abort(); + if let Some(entry) = handle.share_client_entry { + let mut share_client_guard = SHARED_KAFKA_CLIENT.lock().await; + if let Some(item) = share_client_guard.get_mut(&entry) { + item.ref_count -= 1; + if item.ref_count == 0 { + share_client_guard.remove(&entry); + } + } + } } } } @@ -1050,6 +1063,9 @@ impl SourceManager { let source_id = source.id; let connector_properties = extract_prop_from_existing_source(&source)?; + + let share_client_entry = get_kafka_bootstrap_addr(&connector_properties); + let enable_scale_in = connector_properties.enable_split_scale_in(); let (sync_call_tx, sync_call_rx) = tokio::sync::mpsc::unbounded_channel(); let handle = tokio::spawn(async move { @@ -1089,6 +1105,7 @@ impl SourceManager { sync_call_tx, splits, enable_scale_in, + share_client_entry, }, ); Ok(()) @@ -1180,6 +1197,16 @@ pub fn build_actor_split_impls( .collect() } +fn get_kafka_bootstrap_addr(connector_properties: &ConnectorProperties) -> Option { + { + // for kafka source: extract the bootstrap servers from the source properties as shared source entry (on meta) + if let ConnectorProperties::Kafka(kafka_props) = connector_properties { + return Some(kafka_props.common.brokers.clone()); + } + None + } +} + #[cfg(test)] mod tests { use std::collections::{BTreeMap, HashMap, HashSet}; From ab4667220baa7424c2b8e6211d3a5369ff29d9c5 Mon Sep 17 00:00:00 2001 From: tabVersion Date: Tue, 22 Oct 2024 22:30:11 +0800 Subject: [PATCH 02/51] check kafka connection identical Signed-off-by: tabVersion --- src/connector/src/connector_common/common.rs | 45 ++++++++++++------- src/connector/src/connector_common/mod.rs | 7 +-- src/connector/src/sink/kafka.rs | 8 ++-- .../src/source/kafka/enumerator/client.rs | 10 +++-- .../src/source/kafka/source/reader.rs | 9 ++-- src/meta/src/stream/source_manager.rs | 2 +- 6 files changed, 51 insertions(+), 30 deletions(-) diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index 1be41b4e09caa..4d29ad9c527bd 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -60,8 +60,10 @@ use aws_types::region::Region; use aws_types::SdkConfig; use risingwave_common::util::env_var::env_var_is_true; +use crate::source::kafka::KafkaProperties; + /// A flatten config map for aws auth. -#[derive(Deserialize, Debug, Clone, WithOptions)] +#[derive(Deserialize, Debug, Clone, WithOptions, PartialEq)] pub struct AwsAuthProps { #[serde(rename = "aws.region", alias = "region")] pub region: Option, @@ -160,22 +162,18 @@ impl AwsAuthProps { } } +pub fn check_kafka_connection_identical(lhs: &KafkaProperties, rhs: &KafkaProperties) -> bool { + lhs.aws_auth_props == rhs.aws_auth_props + && lhs.privatelink_common == rhs.privatelink_common + && lhs.common.connection == rhs.common.connection +} + #[serde_as] -#[derive(Debug, Clone, Deserialize, WithOptions)] -pub struct KafkaCommon { +#[derive(Debug, Clone, Deserialize, WithOptions, PartialEq)] +pub struct KafkaConnection { #[serde(rename = "properties.bootstrap.server", alias = "kafka.brokers")] pub brokers: String, - #[serde(rename = "topic", alias = "kafka.topic")] - pub topic: String, - - #[serde( - rename = "properties.sync.call.timeout", - deserialize_with = "deserialize_duration_from_string", - default = "default_kafka_sync_call_timeout" - )] - pub sync_call_timeout: Duration, - /// Security protocol used for RisingWave to communicate with Kafka brokers. Could be /// PLAINTEXT, SSL, SASL_PLAINTEXT or SASL_SSL. #[serde(rename = "properties.security.protocol")] @@ -252,6 +250,23 @@ pub struct KafkaCommon { #[serde_as] #[derive(Debug, Clone, Deserialize, WithOptions)] +pub struct KafkaCommon { + #[serde(rename = "topic", alias = "kafka.topic")] + pub topic: String, + + #[serde( + rename = "properties.sync.call.timeout", + deserialize_with = "deserialize_duration_from_string", + default = "default_kafka_sync_call_timeout" + )] + pub sync_call_timeout: Duration, + + #[serde(flatten)] + pub connection: KafkaConnection, +} + +#[serde_as] +#[derive(Debug, Clone, Deserialize, WithOptions, PartialEq)] pub struct KafkaPrivateLinkCommon { /// This is generated from `private_link_targets` and `private_link_endpoint` in frontend, instead of given by users. #[serde(rename = "broker.rewrite.endpoints")] @@ -269,7 +284,7 @@ pub struct RdKafkaPropertiesCommon { /// Maximum Kafka protocol request message size. Due to differing framing overhead between /// protocol versions the producer is unable to reliably enforce a strict max message limit at /// produce time and may exceed the maximum size by one message in protocol ProduceRequests, - /// the broker will enforce the the topic's max.message.bytes limit + /// the broker will enforce the topic's max.message.bytes limit #[serde(rename = "properties.message.max.bytes")] #[serde_as(as = "Option")] pub message_max_bytes: Option, @@ -316,7 +331,7 @@ impl RdKafkaPropertiesCommon { } } -impl KafkaCommon { +impl KafkaConnection { pub(crate) fn set_security_properties(&self, config: &mut ClientConfig) { // AWS_MSK_IAM if self.is_aws_msk_iam() { diff --git a/src/connector/src/connector_common/mod.rs b/src/connector/src/connector_common/mod.rs index 30d699198ccd4..024ff5cb55583 100644 --- a/src/connector/src/connector_common/mod.rs +++ b/src/connector/src/connector_common/mod.rs @@ -19,9 +19,10 @@ pub use mqtt_common::{MqttCommon, QualityOfService as MqttQualityOfService}; mod common; pub use common::{ - AwsAuthProps, AwsPrivateLinkItem, KafkaCommon, KafkaPrivateLinkCommon, KinesisCommon, - MongodbCommon, NatsCommon, PulsarCommon, PulsarOauthCommon, RdKafkaPropertiesCommon, - PRIVATE_LINK_BROKER_REWRITE_MAP_KEY, PRIVATE_LINK_TARGETS_KEY, + check_kafka_connection_identical, AwsAuthProps, AwsPrivateLinkItem, KafkaCommon, + KafkaPrivateLinkCommon, KinesisCommon, MongodbCommon, NatsCommon, PulsarCommon, + PulsarOauthCommon, RdKafkaPropertiesCommon, PRIVATE_LINK_BROKER_REWRITE_MAP_KEY, + PRIVATE_LINK_TARGETS_KEY, }; mod iceberg; diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index 68f20fcf7d581..25745d31c0182 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -368,7 +368,7 @@ impl Sink for KafkaSink { if !check.check_reachability().await { return Err(SinkError::Config(anyhow!( "cannot connect to kafka broker ({})", - self.config.common.brokers + self.config.common.connection.brokers ))); } Ok(()) @@ -413,11 +413,11 @@ impl KafkaSinkWriter { let mut c = ClientConfig::new(); // KafkaConfig configuration - config.common.set_security_properties(&mut c); + config.common.connection.set_security_properties(&mut c); config.set_client(&mut c); // ClientConfig configuration - c.set("bootstrap.servers", &config.common.brokers); + c.set("bootstrap.servers", &config.common.connection.brokers); // Create the producer context, will be used to create the producer let broker_rewrite_map = config.privatelink_common.broker_rewrite_map.clone(); @@ -426,7 +426,7 @@ impl KafkaSinkWriter { None, None, config.aws_auth_props.clone(), - config.common.is_aws_msk_iam(), + config.common.connection.is_aws_msk_iam(), ) .await?; let producer_ctx = RwProducerContext::new(ctx_common); diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 67b1dd3748864..1b1ea962bc134 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -25,6 +25,7 @@ use rdkafka::{Offset, TopicPartitionList}; use risingwave_common::bail; use risingwave_common::metrics::LabelGuardedMetric; +use crate::connector_common::check_kafka_connection_identical; use crate::error::ConnectorResult; use crate::source::base::SplitEnumerator; use crate::source::kafka::split::KafkaSplit; @@ -79,12 +80,12 @@ impl SplitEnumerator for KafkaSplitEnumerator { let mut config = rdkafka::ClientConfig::new(); let common_props = &properties.common; - let broker_address = common_props.brokers.clone(); + let broker_address = common_props.connection.brokers.clone(); let broker_rewrite_map = properties.privatelink_common.broker_rewrite_map.clone(); let topic = common_props.topic.clone(); config.set("bootstrap.servers", &broker_address); config.set("isolation.level", KAFKA_ISOLATION_LEVEL); - common_props.set_security_properties(&mut config); + common_props.connection.set_security_properties(&mut config); properties.set_client(&mut config); let mut scan_start_offset = match properties .scan_startup_mode @@ -110,6 +111,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { .lock() .await .get_mut(broker_address.as_str()) + && check_kafka_connection_identical(&item.props, &properties) { kafka_client = item.client.clone(); item.ref_count += 1; @@ -120,7 +122,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { None, None, properties.aws_auth_props.clone(), - common_props.is_aws_msk_iam(), + common_props.connection.is_aws_msk_iam(), ) .await?; let client_ctx = RwConsumerContext::new(ctx_common); @@ -132,7 +134,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { // rd_kafka_poll(), rd_kafka_consumer_poll(), rd_kafka_queue_poll(), etc, in order to cause retrieval // of an initial token to occur. // https://docs.confluent.io/platform/current/clients/librdkafka/html/rdkafka_8h.html#a988395722598f63396d7a1bedb22adaf - if common_props.is_aws_msk_iam() { + if common_props.connection.is_aws_msk_iam() { #[cfg(not(madsim))] client.poll(Duration::from_secs(10)); // note: this is a blocking call #[cfg(madsim)] diff --git a/src/connector/src/source/kafka/source/reader.rs b/src/connector/src/source/kafka/source/reader.rs index d58f1b70dd9fc..fd930344ca8bc 100644 --- a/src/connector/src/source/kafka/source/reader.rs +++ b/src/connector/src/source/kafka/source/reader.rs @@ -64,7 +64,7 @@ impl SplitReader for KafkaSplitReader { ) -> Result { let mut config = ClientConfig::new(); - let bootstrap_servers = &properties.common.brokers; + let bootstrap_servers = &properties.common.connection.brokers; let broker_rewrite_map = properties.privatelink_common.broker_rewrite_map.clone(); // disable partition eof @@ -73,7 +73,10 @@ impl SplitReader for KafkaSplitReader { config.set("isolation.level", KAFKA_ISOLATION_LEVEL); config.set("bootstrap.servers", bootstrap_servers); - properties.common.set_security_properties(&mut config); + properties + .common + .connection + .set_security_properties(&mut config); properties.set_client(&mut config); let group_id_prefix = properties @@ -95,7 +98,7 @@ impl SplitReader for KafkaSplitReader { // explicitly Some(source_ctx.metrics.rdkafka_native_metric.clone()), properties.aws_auth_props, - properties.common.is_aws_msk_iam(), + properties.common.connection.is_aws_msk_iam(), ) .await?; diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index 6f4b810997990..84dc1cda4c5ea 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -1201,7 +1201,7 @@ fn get_kafka_bootstrap_addr(connector_properties: &ConnectorProperties) -> Optio { // for kafka source: extract the bootstrap servers from the source properties as shared source entry (on meta) if let ConnectorProperties::Kafka(kafka_props) = connector_properties { - return Some(kafka_props.common.brokers.clone()); + return Some(kafka_props.common.connection.brokers.clone()); } None } From 3373a5828c2ebae9c3f1c15a376ee734b69c4983 Mon Sep 17 00:00:00 2001 From: tabVersion Date: Tue, 22 Oct 2024 22:30:44 +0800 Subject: [PATCH 03/51] fix Signed-off-by: tabVersion --- src/connector/src/sink/kafka.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index 25745d31c0182..ee8fc4197e488 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -685,7 +685,7 @@ mod test { "broker.rewrite.endpoints".to_string() => "{\"broker1\": \"10.0.0.1:8001\"}".to_string(), }; let config = KafkaConfig::from_btreemap(properties).unwrap(); - assert_eq!(config.common.brokers, "localhost:9092"); + assert_eq!(config.common.connection.brokers, "localhost:9092"); assert_eq!(config.common.topic, "test"); assert_eq!(config.max_retry_num, 20); assert_eq!(config.retry_interval, Duration::from_millis(500)); From c5203d6c22d5cca2a60c18c300326d9c5b73b1e6 Mon Sep 17 00:00:00 2001 From: tabVersion Date: Tue, 22 Oct 2024 23:27:44 +0800 Subject: [PATCH 04/51] fix with props Signed-off-by: tabVersion --- src/connector/with_options_sink.yaml | 12 ++++++------ src/connector/with_options_source.yaml | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml index eb03288dfbcca..dd2dd7098f99f 100644 --- a/src/connector/with_options_sink.yaml +++ b/src/connector/with_options_sink.yaml @@ -434,11 +434,6 @@ IcebergConfig: default: Default::default KafkaConfig: fields: - - name: properties.bootstrap.server - field_type: String - required: true - alias: - - kafka.brokers - name: topic field_type: String required: true @@ -448,6 +443,11 @@ KafkaConfig: field_type: Duration required: false default: 'Duration :: from_secs (5)' + - name: properties.bootstrap.server + field_type: String + required: true + alias: + - kafka.brokers - name: properties.security.protocol field_type: String comments: |- @@ -542,7 +542,7 @@ KafkaConfig: Maximum Kafka protocol request message size. Due to differing framing overhead between protocol versions the producer is unable to reliably enforce a strict max message limit at produce time and may exceed the maximum size by one message in protocol ProduceRequests, - the broker will enforce the the topic's max.message.bytes limit + the broker will enforce the topic's max.message.bytes limit required: false - name: properties.receive.message.max.bytes field_type: usize diff --git a/src/connector/with_options_source.yaml b/src/connector/with_options_source.yaml index 20d3fc1714233..b1b3731a1d388 100644 --- a/src/connector/with_options_source.yaml +++ b/src/connector/with_options_source.yaml @@ -178,11 +178,6 @@ KafkaProperties: combine both key and value fields of the Kafka message. TODO: Currently, `Option` can not be parsed here. required: false - - name: properties.bootstrap.server - field_type: String - required: true - alias: - - kafka.brokers - name: topic field_type: String required: true @@ -192,6 +187,11 @@ KafkaProperties: field_type: Duration required: false default: 'Duration :: from_secs (5)' + - name: properties.bootstrap.server + field_type: String + required: true + alias: + - kafka.brokers - name: properties.security.protocol field_type: String comments: |- @@ -271,7 +271,7 @@ KafkaProperties: Maximum Kafka protocol request message size. Due to differing framing overhead between protocol versions the producer is unable to reliably enforce a strict max message limit at produce time and may exceed the maximum size by one message in protocol ProduceRequests, - the broker will enforce the the topic's max.message.bytes limit + the broker will enforce the topic's max.message.bytes limit required: false - name: properties.receive.message.max.bytes field_type: usize From 3b6a6a24c6291038a85cf6cf31bf4901fa30583a Mon Sep 17 00:00:00 2001 From: tabVersion Date: Wed, 23 Oct 2024 00:17:52 +0800 Subject: [PATCH 05/51] fix Signed-off-by: tabVersion --- src/connector/src/connector_common/common.rs | 5 +- src/connector/src/connector_common/mod.rs | 4 +- src/connector/src/sink/kafka.rs | 14 +- .../src/source/kafka/enumerator/client.rs | 8 +- src/connector/src/source/kafka/mod.rs | 5 +- .../src/source/kafka/source/reader.rs | 9 +- src/connector/with_options_sink.yaml | 159 +++++++++--------- src/meta/src/stream/source_manager.rs | 2 +- 8 files changed, 106 insertions(+), 100 deletions(-) diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index 4d29ad9c527bd..3c3f1d8493fb5 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -165,7 +165,7 @@ impl AwsAuthProps { pub fn check_kafka_connection_identical(lhs: &KafkaProperties, rhs: &KafkaProperties) -> bool { lhs.aws_auth_props == rhs.aws_auth_props && lhs.privatelink_common == rhs.privatelink_common - && lhs.common.connection == rhs.common.connection + && lhs.connection == rhs.connection } #[serde_as] @@ -260,9 +260,6 @@ pub struct KafkaCommon { default = "default_kafka_sync_call_timeout" )] pub sync_call_timeout: Duration, - - #[serde(flatten)] - pub connection: KafkaConnection, } #[serde_as] diff --git a/src/connector/src/connector_common/mod.rs b/src/connector/src/connector_common/mod.rs index 024ff5cb55583..fcce2aa80a379 100644 --- a/src/connector/src/connector_common/mod.rs +++ b/src/connector/src/connector_common/mod.rs @@ -20,8 +20,8 @@ pub use mqtt_common::{MqttCommon, QualityOfService as MqttQualityOfService}; mod common; pub use common::{ check_kafka_connection_identical, AwsAuthProps, AwsPrivateLinkItem, KafkaCommon, - KafkaPrivateLinkCommon, KinesisCommon, MongodbCommon, NatsCommon, PulsarCommon, - PulsarOauthCommon, RdKafkaPropertiesCommon, PRIVATE_LINK_BROKER_REWRITE_MAP_KEY, + KafkaConnection, KafkaPrivateLinkCommon, KinesisCommon, MongodbCommon, NatsCommon, + PulsarCommon, PulsarOauthCommon, RdKafkaPropertiesCommon, PRIVATE_LINK_BROKER_REWRITE_MAP_KEY, PRIVATE_LINK_TARGETS_KEY, }; diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index ee8fc4197e488..67d2d0854b0c3 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -214,6 +214,9 @@ pub struct KafkaConfig { #[serde(flatten)] pub common: KafkaCommon, + #[serde(flatten)] + pub connection: crate::connector_common::KafkaConnection, + #[serde( rename = "properties.retry.max", default = "_default_max_retries", @@ -269,6 +272,7 @@ impl From for KafkaProperties { time_offset: None, upsert: None, common: val.common, + connection: val.connection, rdkafka_properties_common: val.rdkafka_properties_common, rdkafka_properties_consumer: Default::default(), privatelink_common: val.privatelink_common, @@ -368,7 +372,7 @@ impl Sink for KafkaSink { if !check.check_reachability().await { return Err(SinkError::Config(anyhow!( "cannot connect to kafka broker ({})", - self.config.common.connection.brokers + self.config.connection.brokers ))); } Ok(()) @@ -413,11 +417,11 @@ impl KafkaSinkWriter { let mut c = ClientConfig::new(); // KafkaConfig configuration - config.common.connection.set_security_properties(&mut c); + config.connection.set_security_properties(&mut c); config.set_client(&mut c); // ClientConfig configuration - c.set("bootstrap.servers", &config.common.connection.brokers); + c.set("bootstrap.servers", &config.connection.brokers); // Create the producer context, will be used to create the producer let broker_rewrite_map = config.privatelink_common.broker_rewrite_map.clone(); @@ -426,7 +430,7 @@ impl KafkaSinkWriter { None, None, config.aws_auth_props.clone(), - config.common.connection.is_aws_msk_iam(), + config.connection.is_aws_msk_iam(), ) .await?; let producer_ctx = RwProducerContext::new(ctx_common); @@ -685,7 +689,7 @@ mod test { "broker.rewrite.endpoints".to_string() => "{\"broker1\": \"10.0.0.1:8001\"}".to_string(), }; let config = KafkaConfig::from_btreemap(properties).unwrap(); - assert_eq!(config.common.connection.brokers, "localhost:9092"); + assert_eq!(config.connection.brokers, "localhost:9092"); assert_eq!(config.common.topic, "test"); assert_eq!(config.max_retry_num, 20); assert_eq!(config.retry_interval, Duration::from_millis(500)); diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 1b1ea962bc134..cdb5fbf9b88f1 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -80,12 +80,12 @@ impl SplitEnumerator for KafkaSplitEnumerator { let mut config = rdkafka::ClientConfig::new(); let common_props = &properties.common; - let broker_address = common_props.connection.brokers.clone(); + let broker_address = properties.connection.brokers.clone(); let broker_rewrite_map = properties.privatelink_common.broker_rewrite_map.clone(); let topic = common_props.topic.clone(); config.set("bootstrap.servers", &broker_address); config.set("isolation.level", KAFKA_ISOLATION_LEVEL); - common_props.connection.set_security_properties(&mut config); + properties.connection.set_security_properties(&mut config); properties.set_client(&mut config); let mut scan_start_offset = match properties .scan_startup_mode @@ -122,7 +122,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { None, None, properties.aws_auth_props.clone(), - common_props.connection.is_aws_msk_iam(), + properties.connection.is_aws_msk_iam(), ) .await?; let client_ctx = RwConsumerContext::new(ctx_common); @@ -134,7 +134,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { // rd_kafka_poll(), rd_kafka_consumer_poll(), rd_kafka_queue_poll(), etc, in order to cause retrieval // of an initial token to occur. // https://docs.confluent.io/platform/current/clients/librdkafka/html/rdkafka_8h.html#a988395722598f63396d7a1bedb22adaf - if common_props.connection.is_aws_msk_iam() { + if properties.connection.is_aws_msk_iam() { #[cfg(not(madsim))] client.poll(Duration::from_secs(10)); // note: this is a blocking call #[cfg(madsim)] diff --git a/src/connector/src/source/kafka/mod.rs b/src/connector/src/source/kafka/mod.rs index 125bf73a3529f..030c190eb4942 100644 --- a/src/connector/src/source/kafka/mod.rs +++ b/src/connector/src/source/kafka/mod.rs @@ -17,7 +17,7 @@ use std::collections::HashMap; use serde::Deserialize; use serde_with::{serde_as, DisplayFromStr}; -use crate::connector_common::{AwsAuthProps, KafkaPrivateLinkCommon}; +use crate::connector_common::{AwsAuthProps, KafkaConnection, KafkaPrivateLinkCommon}; mod client_context; pub mod enumerator; @@ -143,6 +143,9 @@ pub struct KafkaProperties { #[serde(flatten)] pub common: KafkaCommon, + #[serde(flatten)] + pub connection: KafkaConnection, + #[serde(flatten)] pub rdkafka_properties_common: RdKafkaPropertiesCommon, diff --git a/src/connector/src/source/kafka/source/reader.rs b/src/connector/src/source/kafka/source/reader.rs index fd930344ca8bc..b9523eca98b57 100644 --- a/src/connector/src/source/kafka/source/reader.rs +++ b/src/connector/src/source/kafka/source/reader.rs @@ -64,7 +64,7 @@ impl SplitReader for KafkaSplitReader { ) -> Result { let mut config = ClientConfig::new(); - let bootstrap_servers = &properties.common.connection.brokers; + let bootstrap_servers = &properties.connection.brokers; let broker_rewrite_map = properties.privatelink_common.broker_rewrite_map.clone(); // disable partition eof @@ -73,10 +73,7 @@ impl SplitReader for KafkaSplitReader { config.set("isolation.level", KAFKA_ISOLATION_LEVEL); config.set("bootstrap.servers", bootstrap_servers); - properties - .common - .connection - .set_security_properties(&mut config); + properties.connection.set_security_properties(&mut config); properties.set_client(&mut config); let group_id_prefix = properties @@ -98,7 +95,7 @@ impl SplitReader for KafkaSplitReader { // explicitly Some(source_ctx.metrics.rdkafka_native_metric.clone()), properties.aws_auth_props, - properties.common.connection.is_aws_msk_iam(), + properties.connection.is_aws_msk_iam(), ) .await?; diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml index dd2dd7098f99f..9b63353bc41fa 100644 --- a/src/connector/with_options_sink.yaml +++ b/src/connector/with_options_sink.yaml @@ -443,84 +443,9 @@ KafkaConfig: field_type: Duration required: false default: 'Duration :: from_secs (5)' - - name: properties.bootstrap.server - field_type: String + - name: connection + field_type: crate::connector_common::KafkaConnection required: true - alias: - - kafka.brokers - - name: properties.security.protocol - field_type: String - comments: |- - Security protocol used for RisingWave to communicate with Kafka brokers. Could be - PLAINTEXT, SSL, SASL_PLAINTEXT or SASL_SSL. - required: false - - name: properties.ssl.endpoint.identification.algorithm - field_type: String - required: false - - name: properties.ssl.ca.location - field_type: String - comments: Path to CA certificate file for verifying the broker's key. - required: false - - name: properties.ssl.ca.pem - field_type: String - comments: CA certificate string (PEM format) for verifying the broker's key. - required: false - - name: properties.ssl.certificate.location - field_type: String - comments: Path to client's certificate file (PEM). - required: false - - name: properties.ssl.certificate.pem - field_type: String - comments: Client's public key string (PEM format) used for authentication. - required: false - - name: properties.ssl.key.location - field_type: String - comments: Path to client's private key file (PEM). - required: false - - name: properties.ssl.key.pem - field_type: String - comments: Client's private key string (PEM format) used for authentication. - required: false - - name: properties.ssl.key.password - field_type: String - comments: Passphrase of client's private key. - required: false - - name: properties.sasl.mechanism - field_type: String - comments: SASL mechanism if SASL is enabled. Currently support PLAIN, SCRAM, GSSAPI, and AWS_MSK_IAM. - required: false - - name: properties.sasl.username - field_type: String - comments: SASL username for SASL/PLAIN and SASL/SCRAM. - required: false - - name: properties.sasl.password - field_type: String - comments: SASL password for SASL/PLAIN and SASL/SCRAM. - required: false - - name: properties.sasl.kerberos.service.name - field_type: String - comments: Kafka server's Kerberos principal name under SASL/GSSAPI, not including /hostname@REALM. - required: false - - name: properties.sasl.kerberos.keytab - field_type: String - comments: Path to client's Kerberos keytab file under SASL/GSSAPI. - required: false - - name: properties.sasl.kerberos.principal - field_type: String - comments: Client's Kerberos principal name under SASL/GSSAPI. - required: false - - name: properties.sasl.kerberos.kinit.cmd - field_type: String - comments: Shell command to refresh or acquire the client's Kerberos ticket under SASL/GSSAPI. - required: false - - name: properties.sasl.kerberos.min.time.before.relogin - field_type: String - comments: Minimum time in milliseconds between key refresh attempts under SASL/GSSAPI. - required: false - - name: properties.sasl.oauthbearer.config - field_type: String - comments: Configurations for SASL/OAUTHBEARER. - required: false - name: properties.retry.max field_type: u32 required: false @@ -682,6 +607,86 @@ KafkaConfig: required: false alias: - profile +KafkaConnection: + fields: + - name: properties.bootstrap.server + field_type: String + required: true + alias: + - kafka.brokers + - name: properties.security.protocol + field_type: String + comments: |- + Security protocol used for RisingWave to communicate with Kafka brokers. Could be + PLAINTEXT, SSL, SASL_PLAINTEXT or SASL_SSL. + required: false + - name: properties.ssl.endpoint.identification.algorithm + field_type: String + required: false + - name: properties.ssl.ca.location + field_type: String + comments: Path to CA certificate file for verifying the broker's key. + required: false + - name: properties.ssl.ca.pem + field_type: String + comments: CA certificate string (PEM format) for verifying the broker's key. + required: false + - name: properties.ssl.certificate.location + field_type: String + comments: Path to client's certificate file (PEM). + required: false + - name: properties.ssl.certificate.pem + field_type: String + comments: Client's public key string (PEM format) used for authentication. + required: false + - name: properties.ssl.key.location + field_type: String + comments: Path to client's private key file (PEM). + required: false + - name: properties.ssl.key.pem + field_type: String + comments: Client's private key string (PEM format) used for authentication. + required: false + - name: properties.ssl.key.password + field_type: String + comments: Passphrase of client's private key. + required: false + - name: properties.sasl.mechanism + field_type: String + comments: SASL mechanism if SASL is enabled. Currently support PLAIN, SCRAM, GSSAPI, and AWS_MSK_IAM. + required: false + - name: properties.sasl.username + field_type: String + comments: SASL username for SASL/PLAIN and SASL/SCRAM. + required: false + - name: properties.sasl.password + field_type: String + comments: SASL password for SASL/PLAIN and SASL/SCRAM. + required: false + - name: properties.sasl.kerberos.service.name + field_type: String + comments: Kafka server's Kerberos principal name under SASL/GSSAPI, not including /hostname@REALM. + required: false + - name: properties.sasl.kerberos.keytab + field_type: String + comments: Path to client's Kerberos keytab file under SASL/GSSAPI. + required: false + - name: properties.sasl.kerberos.principal + field_type: String + comments: Client's Kerberos principal name under SASL/GSSAPI. + required: false + - name: properties.sasl.kerberos.kinit.cmd + field_type: String + comments: Shell command to refresh or acquire the client's Kerberos ticket under SASL/GSSAPI. + required: false + - name: properties.sasl.kerberos.min.time.before.relogin + field_type: String + comments: Minimum time in milliseconds between key refresh attempts under SASL/GSSAPI. + required: false + - name: properties.sasl.oauthbearer.config + field_type: String + comments: Configurations for SASL/OAUTHBEARER. + required: false KinesisSinkConfig: fields: - name: stream diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index 84dc1cda4c5ea..deaf34da2a545 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -1201,7 +1201,7 @@ fn get_kafka_bootstrap_addr(connector_properties: &ConnectorProperties) -> Optio { // for kafka source: extract the bootstrap servers from the source properties as shared source entry (on meta) if let ConnectorProperties::Kafka(kafka_props) = connector_properties { - return Some(kafka_props.common.connection.brokers.clone()); + return Some(kafka_props.connection.brokers.clone()); } None } From 3b3f7256fa46a18b1fe4007dc7cd5e58ea4f0205 Mon Sep 17 00:00:00 2001 From: tabVersion Date: Wed, 23 Oct 2024 01:06:14 +0800 Subject: [PATCH 06/51] use connection hash as hashmap entry Signed-off-by: tabVersion --- src/connector/src/connector_common/common.rs | 9 ++++++++- .../src/source/kafka/enumerator/client.rs | 19 ++++++------------- src/meta/src/stream/source_manager.rs | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index 3c3f1d8493fb5..6015461344ebc 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::collections::BTreeMap; +use std::hash::{Hash, Hasher}; use std::io::Write; use std::time::Duration; @@ -169,7 +170,7 @@ pub fn check_kafka_connection_identical(lhs: &KafkaProperties, rhs: &KafkaProper } #[serde_as] -#[derive(Debug, Clone, Deserialize, WithOptions, PartialEq)] +#[derive(Debug, Clone, Deserialize, WithOptions, PartialEq, Hash)] pub struct KafkaConnection { #[serde(rename = "properties.bootstrap.server", alias = "kafka.brokers")] pub brokers: String, @@ -329,6 +330,12 @@ impl RdKafkaPropertiesCommon { } impl KafkaConnection { + pub fn get_hash(&self) -> u64 { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + self.hash(&mut hasher); + hasher.finish() + } + pub(crate) fn set_security_properties(&self, config: &mut ClientConfig) { // AWS_MSK_IAM if self.is_aws_msk_iam() { diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index cdb5fbf9b88f1..9ae66afefec66 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -25,7 +25,6 @@ use rdkafka::{Offset, TopicPartitionList}; use risingwave_common::bail; use risingwave_common::metrics::LabelGuardedMetric; -use crate::connector_common::check_kafka_connection_identical; use crate::error::ConnectorResult; use crate::source::base::SplitEnumerator; use crate::source::kafka::split::KafkaSplit; @@ -34,14 +33,12 @@ use crate::source::kafka::{ }; use crate::source::SourceEnumeratorContextRef; -pub static SHARED_KAFKA_CLIENT: LazyLock>> = +pub static SHARED_KAFKA_CLIENT: LazyLock>> = LazyLock::new(|| tokio::sync::Mutex::new(HashMap::new())); -#[derive(Clone)] pub struct SharedKafkaItem { pub client: Arc>, pub ref_count: i32, - pub props: KafkaProperties, } #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -81,6 +78,8 @@ impl SplitEnumerator for KafkaSplitEnumerator { let common_props = &properties.common; let broker_address = properties.connection.brokers.clone(); + + let connection_hash = properties.connection.get_hash(); let broker_rewrite_map = properties.privatelink_common.broker_rewrite_map.clone(); let topic = common_props.topic.clone(); config.set("bootstrap.servers", &broker_address); @@ -107,12 +106,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { } let kafka_client: Arc>; - if let Some(item) = SHARED_KAFKA_CLIENT - .lock() - .await - .get_mut(broker_address.as_str()) - && check_kafka_connection_identical(&item.props, &properties) - { + if let Some(item) = SHARED_KAFKA_CLIENT.lock().await.get_mut(&connection_hash) { kafka_client = item.client.clone(); item.ref_count += 1; } else { @@ -121,7 +115,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { broker_rewrite_map, None, None, - properties.aws_auth_props.clone(), + properties.aws_auth_props, properties.connection.is_aws_msk_iam(), ) .await?; @@ -143,11 +137,10 @@ impl SplitEnumerator for KafkaSplitEnumerator { kafka_client = Arc::new(client); SHARED_KAFKA_CLIENT.lock().await.insert( - broker_address.clone(), + connection_hash, SharedKafkaItem { client: kafka_client.clone(), ref_count: 1, - props: properties.clone(), }, ); } diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index deaf34da2a545..57733c15f6011 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -111,7 +111,7 @@ pub async fn create_source_worker_handle( let current_splits_ref = splits.clone(); let connector_properties = extract_prop_from_new_source(source)?; - let share_client_entry = get_kafka_bootstrap_addr(&connector_properties); + let share_client_entry = get_kafka_connection_hash(&connector_properties); let enable_scale_in = connector_properties.enable_split_scale_in(); let (sync_call_tx, sync_call_rx) = tokio::sync::mpsc::unbounded_channel(); let handle = dispatch_source_prop!(connector_properties, prop, { @@ -267,7 +267,7 @@ pub struct ConnectorSourceWorkerHandle { sync_call_tx: UnboundedSender>>, splits: SharedSplitMapRef, enable_scale_in: bool, - pub share_client_entry: Option, + pub share_client_entry: Option, } impl ConnectorSourceWorkerHandle { @@ -1064,7 +1064,7 @@ impl SourceManager { let connector_properties = extract_prop_from_existing_source(&source)?; - let share_client_entry = get_kafka_bootstrap_addr(&connector_properties); + let share_client_entry = get_kafka_connection_hash(&connector_properties); let enable_scale_in = connector_properties.enable_split_scale_in(); let (sync_call_tx, sync_call_rx) = tokio::sync::mpsc::unbounded_channel(); @@ -1197,11 +1197,11 @@ pub fn build_actor_split_impls( .collect() } -fn get_kafka_bootstrap_addr(connector_properties: &ConnectorProperties) -> Option { +fn get_kafka_connection_hash(connector_properties: &ConnectorProperties) -> Option { { - // for kafka source: extract the bootstrap servers from the source properties as shared source entry (on meta) + // for kafka source: get the hash of connection props as shared source entry (on meta) if let ConnectorProperties::Kafka(kafka_props) = connector_properties { - return Some(kafka_props.connection.brokers.clone()); + return Some(kafka_props.connection.get_hash()); } None } From 968ed087d76761f24c75ccf0e1073daf381b501b Mon Sep 17 00:00:00 2001 From: tabVersion Date: Wed, 23 Oct 2024 01:11:25 +0800 Subject: [PATCH 07/51] fix Signed-off-by: tabVersion --- src/connector/src/connector_common/common.rs | 6 ------ src/connector/src/connector_common/mod.rs | 7 +++---- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index 6015461344ebc..b21f63a4b16b4 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -163,12 +163,6 @@ impl AwsAuthProps { } } -pub fn check_kafka_connection_identical(lhs: &KafkaProperties, rhs: &KafkaProperties) -> bool { - lhs.aws_auth_props == rhs.aws_auth_props - && lhs.privatelink_common == rhs.privatelink_common - && lhs.connection == rhs.connection -} - #[serde_as] #[derive(Debug, Clone, Deserialize, WithOptions, PartialEq, Hash)] pub struct KafkaConnection { diff --git a/src/connector/src/connector_common/mod.rs b/src/connector/src/connector_common/mod.rs index fcce2aa80a379..57b614fdf548a 100644 --- a/src/connector/src/connector_common/mod.rs +++ b/src/connector/src/connector_common/mod.rs @@ -19,10 +19,9 @@ pub use mqtt_common::{MqttCommon, QualityOfService as MqttQualityOfService}; mod common; pub use common::{ - check_kafka_connection_identical, AwsAuthProps, AwsPrivateLinkItem, KafkaCommon, - KafkaConnection, KafkaPrivateLinkCommon, KinesisCommon, MongodbCommon, NatsCommon, - PulsarCommon, PulsarOauthCommon, RdKafkaPropertiesCommon, PRIVATE_LINK_BROKER_REWRITE_MAP_KEY, - PRIVATE_LINK_TARGETS_KEY, + AwsAuthProps, AwsPrivateLinkItem, KafkaCommon, KafkaConnection, KafkaPrivateLinkCommon, + KinesisCommon, MongodbCommon, NatsCommon, PulsarCommon, PulsarOauthCommon, + RdKafkaPropertiesCommon, PRIVATE_LINK_BROKER_REWRITE_MAP_KEY, PRIVATE_LINK_TARGETS_KEY, }; mod iceberg; From a41f3fceea4f8a415216da92f5b05922d60b1681 Mon Sep 17 00:00:00 2001 From: tabversion Date: Wed, 23 Oct 2024 13:45:35 +0800 Subject: [PATCH 08/51] fix Signed-off-by: tabversion --- src/connector/src/connector_common/common.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index b21f63a4b16b4..7c3c273e99ac6 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -61,8 +61,6 @@ use aws_types::region::Region; use aws_types::SdkConfig; use risingwave_common::util::env_var::env_var_is_true; -use crate::source::kafka::KafkaProperties; - /// A flatten config map for aws auth. #[derive(Deserialize, Debug, Clone, WithOptions, PartialEq)] pub struct AwsAuthProps { From 6534ebf57eec5530b86719b4a37d97f93e751251 Mon Sep 17 00:00:00 2001 From: tabVersion Date: Wed, 23 Oct 2024 15:58:44 +0800 Subject: [PATCH 09/51] rerun Signed-off-by: tabVersion From ad8b98929f59a2f0af4440328cbd48b399de996a Mon Sep 17 00:00:00 2001 From: tabVersion Date: Wed, 23 Oct 2024 18:03:04 +0800 Subject: [PATCH 10/51] fix Signed-off-by: tabVersion --- .../src/source/kafka/enumerator/client.rs | 22 ++++++++++++++++--- src/meta/src/stream/source_manager.rs | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 9ae66afefec66..af202b76ee9b6 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -106,10 +106,20 @@ impl SplitEnumerator for KafkaSplitEnumerator { } let kafka_client: Arc>; - if let Some(item) = SHARED_KAFKA_CLIENT.lock().await.get_mut(&connection_hash) { + let mut shared_client_guard = SHARED_KAFKA_CLIENT.lock().await; + if let Some(item) = shared_client_guard.get_mut(&connection_hash) { + tracing::info!( + "reusing kafka client for connection hash {}, to broker {}", + connection_hash, + broker_address + ); kafka_client = item.client.clone(); item.ref_count += 1; + drop(shared_client_guard); } else { + // drop the guard and acquire a new one to avoid a 10s blocking call + drop(shared_client_guard); + // don't need kafka metrics from enumerator let ctx_common = KafkaContextCommon::new( broker_rewrite_map, @@ -136,7 +146,13 @@ impl SplitEnumerator for KafkaSplitEnumerator { } kafka_client = Arc::new(client); - SHARED_KAFKA_CLIENT.lock().await.insert( + tracing::debug!( + "created kafka client for connection hash {} to broker {}", + connection_hash, + broker_address + ); + let mut shared_client_guard = SHARED_KAFKA_CLIENT.lock().await; + shared_client_guard.insert( connection_hash, SharedKafkaItem { client: kafka_client.clone(), @@ -174,7 +190,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { .fetch_stop_offset(topic_partitions.as_ref(), &watermarks) .await?; - let ret = topic_partitions + let ret: Vec<_> = topic_partitions .into_iter() .map(|partition| KafkaSplit { topic: self.topic.clone(), diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index 57733c15f6011..74098e7ebdd76 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -140,7 +140,6 @@ pub async fn create_source_worker_handle( tokio::spawn(async move { worker.run(sync_call_rx).await }) }); - Ok(ConnectorSourceWorkerHandle { handle, sync_call_tx, @@ -1042,6 +1041,7 @@ impl SourceManager { if let Some(item) = share_client_guard.get_mut(&entry) { item.ref_count -= 1; if item.ref_count == 0 { + tracing::info!("removing shared kafka client entry {}", entry); share_client_guard.remove(&entry); } } From ae1b70a3b591b2cd7196353676069358ac1ea95a Mon Sep 17 00:00:00 2001 From: tabVersion Date: Wed, 23 Oct 2024 20:06:49 +0800 Subject: [PATCH 11/51] fix Signed-off-by: tabVersion --- e2e_test/source_legacy/basic/ddl.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e_test/source_legacy/basic/ddl.slt b/e2e_test/source_legacy/basic/ddl.slt index 8e63971fb5b82..4ce630f04da6d 100644 --- a/e2e_test/source_legacy/basic/ddl.slt +++ b/e2e_test/source_legacy/basic/ddl.slt @@ -30,7 +30,7 @@ Caused by these errors (recent errors listed first): 1: gRPC request to meta service failed: Internal error 2: failed to create source worker 3: failed to parse json - 4: missing field `properties.bootstrap.server` + 4: missing field `topic` statement error From 58b51280f94784154d1c31994791628748a0c42b Mon Sep 17 00:00:00 2001 From: tabversion Date: Fri, 25 Oct 2024 10:25:38 +0800 Subject: [PATCH 12/51] better with options --- src/connector/src/sink/kafka.rs | 4 +- src/connector/with_options_sink.yaml | 159 +++++++++++++-------------- 2 files changed, 79 insertions(+), 84 deletions(-) diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index 67d2d0854b0c3..9fc8da7ef7a48 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -35,7 +35,7 @@ use with_options::WithOptions; use super::catalog::{SinkFormat, SinkFormatDesc}; use super::{Sink, SinkError, SinkParam}; use crate::connector_common::{ - AwsAuthProps, KafkaCommon, KafkaPrivateLinkCommon, RdKafkaPropertiesCommon, + AwsAuthProps, KafkaCommon, KafkaConnection, KafkaPrivateLinkCommon, RdKafkaPropertiesCommon, }; use crate::sink::formatter::SinkFormatterImpl; use crate::sink::log_store::DeliveryFutureManagerAddFuture; @@ -215,7 +215,7 @@ pub struct KafkaConfig { pub common: KafkaCommon, #[serde(flatten)] - pub connection: crate::connector_common::KafkaConnection, + pub connection: KafkaConnection, #[serde( rename = "properties.retry.max", diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml index 9b63353bc41fa..dd2dd7098f99f 100644 --- a/src/connector/with_options_sink.yaml +++ b/src/connector/with_options_sink.yaml @@ -443,9 +443,84 @@ KafkaConfig: field_type: Duration required: false default: 'Duration :: from_secs (5)' - - name: connection - field_type: crate::connector_common::KafkaConnection + - name: properties.bootstrap.server + field_type: String required: true + alias: + - kafka.brokers + - name: properties.security.protocol + field_type: String + comments: |- + Security protocol used for RisingWave to communicate with Kafka brokers. Could be + PLAINTEXT, SSL, SASL_PLAINTEXT or SASL_SSL. + required: false + - name: properties.ssl.endpoint.identification.algorithm + field_type: String + required: false + - name: properties.ssl.ca.location + field_type: String + comments: Path to CA certificate file for verifying the broker's key. + required: false + - name: properties.ssl.ca.pem + field_type: String + comments: CA certificate string (PEM format) for verifying the broker's key. + required: false + - name: properties.ssl.certificate.location + field_type: String + comments: Path to client's certificate file (PEM). + required: false + - name: properties.ssl.certificate.pem + field_type: String + comments: Client's public key string (PEM format) used for authentication. + required: false + - name: properties.ssl.key.location + field_type: String + comments: Path to client's private key file (PEM). + required: false + - name: properties.ssl.key.pem + field_type: String + comments: Client's private key string (PEM format) used for authentication. + required: false + - name: properties.ssl.key.password + field_type: String + comments: Passphrase of client's private key. + required: false + - name: properties.sasl.mechanism + field_type: String + comments: SASL mechanism if SASL is enabled. Currently support PLAIN, SCRAM, GSSAPI, and AWS_MSK_IAM. + required: false + - name: properties.sasl.username + field_type: String + comments: SASL username for SASL/PLAIN and SASL/SCRAM. + required: false + - name: properties.sasl.password + field_type: String + comments: SASL password for SASL/PLAIN and SASL/SCRAM. + required: false + - name: properties.sasl.kerberos.service.name + field_type: String + comments: Kafka server's Kerberos principal name under SASL/GSSAPI, not including /hostname@REALM. + required: false + - name: properties.sasl.kerberos.keytab + field_type: String + comments: Path to client's Kerberos keytab file under SASL/GSSAPI. + required: false + - name: properties.sasl.kerberos.principal + field_type: String + comments: Client's Kerberos principal name under SASL/GSSAPI. + required: false + - name: properties.sasl.kerberos.kinit.cmd + field_type: String + comments: Shell command to refresh or acquire the client's Kerberos ticket under SASL/GSSAPI. + required: false + - name: properties.sasl.kerberos.min.time.before.relogin + field_type: String + comments: Minimum time in milliseconds between key refresh attempts under SASL/GSSAPI. + required: false + - name: properties.sasl.oauthbearer.config + field_type: String + comments: Configurations for SASL/OAUTHBEARER. + required: false - name: properties.retry.max field_type: u32 required: false @@ -607,86 +682,6 @@ KafkaConfig: required: false alias: - profile -KafkaConnection: - fields: - - name: properties.bootstrap.server - field_type: String - required: true - alias: - - kafka.brokers - - name: properties.security.protocol - field_type: String - comments: |- - Security protocol used for RisingWave to communicate with Kafka brokers. Could be - PLAINTEXT, SSL, SASL_PLAINTEXT or SASL_SSL. - required: false - - name: properties.ssl.endpoint.identification.algorithm - field_type: String - required: false - - name: properties.ssl.ca.location - field_type: String - comments: Path to CA certificate file for verifying the broker's key. - required: false - - name: properties.ssl.ca.pem - field_type: String - comments: CA certificate string (PEM format) for verifying the broker's key. - required: false - - name: properties.ssl.certificate.location - field_type: String - comments: Path to client's certificate file (PEM). - required: false - - name: properties.ssl.certificate.pem - field_type: String - comments: Client's public key string (PEM format) used for authentication. - required: false - - name: properties.ssl.key.location - field_type: String - comments: Path to client's private key file (PEM). - required: false - - name: properties.ssl.key.pem - field_type: String - comments: Client's private key string (PEM format) used for authentication. - required: false - - name: properties.ssl.key.password - field_type: String - comments: Passphrase of client's private key. - required: false - - name: properties.sasl.mechanism - field_type: String - comments: SASL mechanism if SASL is enabled. Currently support PLAIN, SCRAM, GSSAPI, and AWS_MSK_IAM. - required: false - - name: properties.sasl.username - field_type: String - comments: SASL username for SASL/PLAIN and SASL/SCRAM. - required: false - - name: properties.sasl.password - field_type: String - comments: SASL password for SASL/PLAIN and SASL/SCRAM. - required: false - - name: properties.sasl.kerberos.service.name - field_type: String - comments: Kafka server's Kerberos principal name under SASL/GSSAPI, not including /hostname@REALM. - required: false - - name: properties.sasl.kerberos.keytab - field_type: String - comments: Path to client's Kerberos keytab file under SASL/GSSAPI. - required: false - - name: properties.sasl.kerberos.principal - field_type: String - comments: Client's Kerberos principal name under SASL/GSSAPI. - required: false - - name: properties.sasl.kerberos.kinit.cmd - field_type: String - comments: Shell command to refresh or acquire the client's Kerberos ticket under SASL/GSSAPI. - required: false - - name: properties.sasl.kerberos.min.time.before.relogin - field_type: String - comments: Minimum time in milliseconds between key refresh attempts under SASL/GSSAPI. - required: false - - name: properties.sasl.oauthbearer.config - field_type: String - comments: Configurations for SASL/OAUTHBEARER. - required: false KinesisSinkConfig: fields: - name: stream From f115a0c1ac01c3ec4bb903969e8cfda28f2294ed Mon Sep 17 00:00:00 2001 From: tabversion Date: Fri, 25 Oct 2024 10:40:05 +0800 Subject: [PATCH 13/51] use kafka connection as hashkey --- src/connector/src/connector_common/common.rs | 2 +- .../src/source/kafka/enumerator/client.rs | 19 ++++++++----------- src/meta/src/stream/source_manager.rs | 16 ++++++++++------ 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index 7c3c273e99ac6..34d151d2f87ef 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -162,7 +162,7 @@ impl AwsAuthProps { } #[serde_as] -#[derive(Debug, Clone, Deserialize, WithOptions, PartialEq, Hash)] +#[derive(Debug, Clone, Deserialize, WithOptions, PartialEq, Hash, Eq)] pub struct KafkaConnection { #[serde(rename = "properties.bootstrap.server", alias = "kafka.brokers")] pub brokers: String, diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index af202b76ee9b6..05ea7df51aa9a 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -29,12 +29,13 @@ use crate::error::ConnectorResult; use crate::source::base::SplitEnumerator; use crate::source::kafka::split::KafkaSplit; use crate::source::kafka::{ - KafkaContextCommon, KafkaProperties, RwConsumerContext, KAFKA_ISOLATION_LEVEL, + KafkaConnection, KafkaContextCommon, KafkaProperties, RwConsumerContext, KAFKA_ISOLATION_LEVEL, }; use crate::source::SourceEnumeratorContextRef; -pub static SHARED_KAFKA_CLIENT: LazyLock>> = - LazyLock::new(|| tokio::sync::Mutex::new(HashMap::new())); +pub static SHARED_KAFKA_CLIENT: LazyLock< + tokio::sync::Mutex>, +> = LazyLock::new(|| tokio::sync::Mutex::new(HashMap::new())); pub struct SharedKafkaItem { pub client: Arc>, @@ -78,8 +79,6 @@ impl SplitEnumerator for KafkaSplitEnumerator { let common_props = &properties.common; let broker_address = properties.connection.brokers.clone(); - - let connection_hash = properties.connection.get_hash(); let broker_rewrite_map = properties.privatelink_common.broker_rewrite_map.clone(); let topic = common_props.topic.clone(); config.set("bootstrap.servers", &broker_address); @@ -107,10 +106,9 @@ impl SplitEnumerator for KafkaSplitEnumerator { let kafka_client: Arc>; let mut shared_client_guard = SHARED_KAFKA_CLIENT.lock().await; - if let Some(item) = shared_client_guard.get_mut(&connection_hash) { + if let Some(item) = shared_client_guard.get_mut(&properties.connection) { tracing::info!( - "reusing kafka client for connection hash {}, to broker {}", - connection_hash, + "reusing kafka client for connection to broker {}", broker_address ); kafka_client = item.client.clone(); @@ -147,13 +145,12 @@ impl SplitEnumerator for KafkaSplitEnumerator { kafka_client = Arc::new(client); tracing::debug!( - "created kafka client for connection hash {} to broker {}", - connection_hash, + "created kafka client for connection to broker {}", broker_address ); let mut shared_client_guard = SHARED_KAFKA_CLIENT.lock().await; shared_client_guard.insert( - connection_hash, + properties.connection, SharedKafkaItem { client: kafka_client.clone(), ref_count: 1, diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index 74098e7ebdd76..49c5904b078cd 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -23,6 +23,7 @@ use std::time::Duration; use anyhow::Context; use risingwave_common::catalog::TableId; use risingwave_common::metrics::LabelGuardedIntGauge; +use risingwave_connector::connector_common::KafkaConnection; use risingwave_connector::error::ConnectorResult; use risingwave_connector::source::kafka::SHARED_KAFKA_CLIENT; use risingwave_connector::source::{ @@ -111,7 +112,7 @@ pub async fn create_source_worker_handle( let current_splits_ref = splits.clone(); let connector_properties = extract_prop_from_new_source(source)?; - let share_client_entry = get_kafka_connection_hash(&connector_properties); + let share_client_entry = get_kafka_connection(&connector_properties); let enable_scale_in = connector_properties.enable_split_scale_in(); let (sync_call_tx, sync_call_rx) = tokio::sync::mpsc::unbounded_channel(); let handle = dispatch_source_prop!(connector_properties, prop, { @@ -266,7 +267,7 @@ pub struct ConnectorSourceWorkerHandle { sync_call_tx: UnboundedSender>>, splits: SharedSplitMapRef, enable_scale_in: bool, - pub share_client_entry: Option, + pub share_client_entry: Option, } impl ConnectorSourceWorkerHandle { @@ -1041,7 +1042,10 @@ impl SourceManager { if let Some(item) = share_client_guard.get_mut(&entry) { item.ref_count -= 1; if item.ref_count == 0 { - tracing::info!("removing shared kafka client entry {}", entry); + tracing::info!( + "removing shared kafka client entry to broker {:?}", + entry.brokers + ); share_client_guard.remove(&entry); } } @@ -1064,7 +1068,7 @@ impl SourceManager { let connector_properties = extract_prop_from_existing_source(&source)?; - let share_client_entry = get_kafka_connection_hash(&connector_properties); + let share_client_entry = get_kafka_connection(&connector_properties); let enable_scale_in = connector_properties.enable_split_scale_in(); let (sync_call_tx, sync_call_rx) = tokio::sync::mpsc::unbounded_channel(); @@ -1197,11 +1201,11 @@ pub fn build_actor_split_impls( .collect() } -fn get_kafka_connection_hash(connector_properties: &ConnectorProperties) -> Option { +fn get_kafka_connection(connector_properties: &ConnectorProperties) -> Option { { // for kafka source: get the hash of connection props as shared source entry (on meta) if let ConnectorProperties::Kafka(kafka_props) = connector_properties { - return Some(kafka_props.connection.get_hash()); + return Some(kafka_props.connection.clone()); } None } From d128644b81174d46bcacb10e5684a0070ef8f720 Mon Sep 17 00:00:00 2001 From: tabversion Date: Fri, 25 Oct 2024 17:38:12 +0800 Subject: [PATCH 14/51] use moka --- Cargo.lock | 116 +++------------- src/connector/Cargo.toml | 2 +- .../src/source/kafka/enumerator/client.rs | 129 ++++++++++-------- src/meta/Cargo.toml | 1 + src/meta/src/stream/source_manager.rs | 52 +++++-- 5 files changed, 136 insertions(+), 164 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4e315686fde05..dbde7ed89aa37 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1040,7 +1040,7 @@ dependencies = [ "async-channel 2.2.1", "async-executor", "async-io", - "async-lock 3.4.0", + "async-lock", "blocking", "futures-lite", "once_cell", @@ -1053,7 +1053,7 @@ version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d6baa8f0178795da0e71bc42c9e5d13261aac7ee549853162e66a241ba17964" dependencies = [ - "async-lock 3.4.0", + "async-lock", "cfg-if", "concurrent-queue", "futures-io", @@ -1066,15 +1066,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "async-lock" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b" -dependencies = [ - "event-listener 2.5.3", -] - [[package]] name = "async-lock" version = "3.4.0" @@ -1141,7 +1132,7 @@ dependencies = [ "async-channel 1.9.0", "async-global-executor", "async-io", - "async-lock 3.4.0", + "async-lock", "crossbeam-utils", "futures-channel", "futures-core", @@ -2322,12 +2313,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "bytecount" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" - [[package]] name = "bytemuck" version = "1.14.0" @@ -2389,15 +2374,6 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "camino" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" -dependencies = [ - "serde", -] - [[package]] name = "cap-fs-ext" version = "3.0.0" @@ -2469,28 +2445,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1582e1c9e755dd6ad6b224dcffb135d199399a4568d454bd89fe515ca8425695" -[[package]] -name = "cargo-platform" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" -dependencies = [ - "serde", -] - -[[package]] -name = "cargo_metadata" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" -dependencies = [ - "camino", - "cargo-platform", - "semver 1.0.18", - "serde", - "serde_json", -] - [[package]] name = "cast" version = "0.3.0" @@ -4731,15 +4685,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "error-chain" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" -dependencies = [ - "version_check", -] - [[package]] name = "escape8259" version = "0.5.2" @@ -7610,21 +7555,21 @@ dependencies = [ [[package]] name = "moka" -version = "0.12.0" +version = "0.12.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dc65d4615c08c8a13d91fd404b5a2a4485ba35b4091e3315cf8798d280c2f29" +checksum = "32cf62eb4dd975d2dde76432fb1075c49e3ee2331cf36f1f8fd4b66550d32b6f" dependencies = [ - "async-lock 2.8.0", + "async-lock", "async-trait", "crossbeam-channel", "crossbeam-epoch", "crossbeam-utils", + "event-listener 5.3.1", "futures-util", "once_cell", "parking_lot 0.12.1", "quanta", "rustc_version 0.4.0", - "skeptic", "smallvec", "tagptr", "thiserror", @@ -9688,17 +9633,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "pulldown-cmark" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" -dependencies = [ - "bitflags 2.6.0", - "memchr", - "unicase", -] - [[package]] name = "pulsar" version = "6.3.0" @@ -9808,12 +9742,12 @@ checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" [[package]] name = "quanta" -version = "0.11.0" -source = "git+https://github.com/madsim-rs/quanta.git?rev=948bdc3#948bdc3d4cd3fcfe3d52d03dd83deee96d97d770" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5167a477619228a0b284fac2674e3c388cba90631d7b7de620e6f1fcd08da5" dependencies = [ "crossbeam-utils", "libc", - "mach2", "once_cell", "raw-cpuid", "wasi", @@ -9912,11 +9846,11 @@ dependencies = [ [[package]] name = "raw-cpuid" -version = "10.7.0" +version = "11.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +checksum = "1ab240315c661615f2ee9f0f2cd32d5a7343a84d5ebcccb99d46e6637565e7b0" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", ] [[package]] @@ -11564,6 +11498,7 @@ dependencies = [ "maplit", "memcomparable", "mime_guess", + "moka", "notify", "num-integer", "num-traits", @@ -13024,9 +12959,6 @@ name = "semver" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" -dependencies = [ - "serde", -] [[package]] name = "semver-parser" @@ -13488,21 +13420,6 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fed904c7fb2856d868b92464fc8fa597fce366edea1a9cbfaa8cb5fe080bd6d" -[[package]] -name = "skeptic" -version = "0.13.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8" -dependencies = [ - "bytecount", - "cargo_metadata", - "error-chain", - "glob", - "pulldown-cmark", - "tempfile", - "walkdir", -] - [[package]] name = "slab" version = "0.4.9" @@ -16612,3 +16529,8 @@ dependencies = [ "libc", "pkg-config", ] + +[[patch.unused]] +name = "quanta" +version = "0.11.0" +source = "git+https://github.com/madsim-rs/quanta.git?rev=948bdc3#948bdc3d4cd3fcfe3d52d03dd83deee96d97d770" diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml index 79764508e22c4..7313783651c14 100644 --- a/src/connector/Cargo.toml +++ b/src/connector/Cargo.toml @@ -71,7 +71,7 @@ jni = { version = "0.21.1", features = ["invocation"] } jsonbb = { workspace = true } jsonwebtoken = "9.2.0" maplit = "1.0.2" -moka = { version = "0.12.0", features = ["future"] } +moka = { version = "0.12.8", features = ["future"] } mongodb = { version = "2.8.2", features = ["tokio-runtime"] } mysql_async = { version = "0.34", default-features = false, features = [ "default", diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 05ea7df51aa9a..66714f2d04367 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -18,12 +18,14 @@ use std::time::Duration; use anyhow::{anyhow, Context}; use async_trait::async_trait; +use moka::future::Cache as MokaCache; use prometheus::core::{AtomicI64, GenericGauge}; use rdkafka::consumer::{BaseConsumer, Consumer}; use rdkafka::error::KafkaResult; use rdkafka::{Offset, TopicPartitionList}; use risingwave_common::bail; use risingwave_common::metrics::LabelGuardedMetric; +use thiserror_ext::AsReport; use crate::error::ConnectorResult; use crate::source::base::SplitEnumerator; @@ -34,9 +36,10 @@ use crate::source::kafka::{ use crate::source::SourceEnumeratorContextRef; pub static SHARED_KAFKA_CLIENT: LazyLock< - tokio::sync::Mutex>, -> = LazyLock::new(|| tokio::sync::Mutex::new(HashMap::new())); + MokaCache>>, +> = LazyLock::new(|| moka::future::Cache::builder().build()); +#[derive(Clone)] pub struct SharedKafkaItem { pub client: Arc>, pub ref_count: i32, @@ -104,59 +107,77 @@ impl SplitEnumerator for KafkaSplitEnumerator { scan_start_offset = KafkaEnumeratorOffset::Timestamp(time_offset) } - let kafka_client: Arc>; - let mut shared_client_guard = SHARED_KAFKA_CLIENT.lock().await; - if let Some(item) = shared_client_guard.get_mut(&properties.connection) { - tracing::info!( - "reusing kafka client for connection to broker {}", - broker_address - ); - kafka_client = item.client.clone(); - item.ref_count += 1; - drop(shared_client_guard); - } else { - // drop the guard and acquire a new one to avoid a 10s blocking call - drop(shared_client_guard); - - // don't need kafka metrics from enumerator - let ctx_common = KafkaContextCommon::new( - broker_rewrite_map, - None, - None, - properties.aws_auth_props, - properties.connection.is_aws_msk_iam(), - ) - .await?; - let client_ctx = RwConsumerContext::new(ctx_common); - let client: BaseConsumer = - config.create_with_context(client_ctx).await?; - - // Note that before any SASL/OAUTHBEARER broker connection can succeed the application must call - // rd_kafka_oauthbearer_set_token() once – either directly or, more typically, by invoking either - // rd_kafka_poll(), rd_kafka_consumer_poll(), rd_kafka_queue_poll(), etc, in order to cause retrieval - // of an initial token to occur. - // https://docs.confluent.io/platform/current/clients/librdkafka/html/rdkafka_8h.html#a988395722598f63396d7a1bedb22adaf - if properties.connection.is_aws_msk_iam() { - #[cfg(not(madsim))] - client.poll(Duration::from_secs(10)); // note: this is a blocking call - #[cfg(madsim)] - client.poll(Duration::from_secs(10)).await; - } + let client_arc = SHARED_KAFKA_CLIENT + .entry_by_ref(&properties.connection) + .and_upsert_with(|maybe_item| async { + tracing::info!( + "entry {:?} maybe item: {:?}", + &properties.connection, + maybe_item.is_some() + ); + if let Some(item) = maybe_item { + let arc_item = item.into_value(); + match &arc_item.as_ref() { + Ok(item) => { + tracing::info!( + "reusing source {} shared kafka client to {} (ref count: {})", + &context.info.source_id, + &broker_address, + item.ref_count + 1 + ); + Arc::new(Ok(SharedKafkaItem { + client: item.client.clone(), + ref_count: item.ref_count + 1, + })) + } + Err(_) => arc_item.clone(), + } + } else { + let build_client: ConnectorResult = async { + let ctx_common = KafkaContextCommon::new( + broker_rewrite_map, + None, + None, + properties.aws_auth_props, + properties.connection.is_aws_msk_iam(), + ) + .await?; + let client_ctx = RwConsumerContext::new(ctx_common); + let client: BaseConsumer = + config.create_with_context(client_ctx).await?; + + // Note that before any SASL/OAUTHBEARER broker connection can succeed the application must call + // rd_kafka_oauthbearer_set_token() once – either directly or, more typically, by invoking either + // rd_kafka_poll(), rd_kafka_consumer_poll(), rd_kafka_queue_poll(), etc, in order to cause retrieval + // of an initial token to occur. + // https://docs.confluent.io/platform/current/clients/librdkafka/html/rdkafka_8h.html#a988395722598f63396d7a1bedb22adaf + if properties.connection.is_aws_msk_iam() { + #[cfg(not(madsim))] + client.poll(Duration::from_secs(10)); // note: this is a blocking call + #[cfg(madsim)] + client.poll(Duration::from_secs(10)).await; + } + + Ok(SharedKafkaItem { + client: Arc::new(client), + ref_count: 1, + }) + } + .await; + + Arc::new(build_client) + } + }) + .await + .into_value(); - kafka_client = Arc::new(client); - tracing::debug!( - "created kafka client for connection to broker {}", - broker_address - ); - let mut shared_client_guard = SHARED_KAFKA_CLIENT.lock().await; - shared_client_guard.insert( - properties.connection, - SharedKafkaItem { - client: kafka_client.clone(), - ref_count: 1, - }, - ); - } + let kafka_client: Arc> = match client_arc.as_ref() { + Ok(client) => client.client.clone(), + Err(e) => { + SHARED_KAFKA_CLIENT.remove(&properties.connection).await; + bail!("{}", e.as_report()); + } + }; Ok(Self { context, diff --git a/src/meta/Cargo.toml b/src/meta/Cargo.toml index d3174b7ddfb19..afc5357c7ebd4 100644 --- a/src/meta/Cargo.toml +++ b/src/meta/Cargo.toml @@ -42,6 +42,7 @@ jsonbb = { workspace = true } maplit = "1.0.2" memcomparable = { version = "0.2" } mime_guess = "2" +moka = { version = "0.12.8", features = ["future"] } notify = { version = "6", default-features = false, features = ["macos_fsevent"] } num-integer = "0.1" num-traits = "0.2" diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index 49c5904b078cd..48b260e837d93 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -21,11 +21,12 @@ use std::sync::Arc; use std::time::Duration; use anyhow::Context; +use moka::ops::compute::Op; use risingwave_common::catalog::TableId; use risingwave_common::metrics::LabelGuardedIntGauge; use risingwave_connector::connector_common::KafkaConnection; use risingwave_connector::error::ConnectorResult; -use risingwave_connector::source::kafka::SHARED_KAFKA_CLIENT; +use risingwave_connector::source::kafka::{SharedKafkaItem, SHARED_KAFKA_CLIENT}; use risingwave_connector::source::{ ConnectorProperties, SourceEnumeratorContext, SourceEnumeratorInfo, SourceProperties, SplitEnumerator, SplitId, SplitImpl, SplitMetaData, @@ -1037,18 +1038,45 @@ impl SourceManager { for source_id in source_ids { if let Some(handle) = core.managed_sources.remove(&source_id) { handle.handle.abort(); + // remove the shared kafka client entry if it exists if let Some(entry) = handle.share_client_entry { - let mut share_client_guard = SHARED_KAFKA_CLIENT.lock().await; - if let Some(item) = share_client_guard.get_mut(&entry) { - item.ref_count -= 1; - if item.ref_count == 0 { - tracing::info!( - "removing shared kafka client entry to broker {:?}", - entry.brokers - ); - share_client_guard.remove(&entry); - } - } + SHARED_KAFKA_CLIENT + .entry_by_ref(&entry) + .and_compute_with(|maybe_item| async { + if let Some(item) = maybe_item { + match item.into_value().as_ref() { + Ok(item_val) => { + let share_item = SharedKafkaItem { + client: item_val.client.clone(), + ref_count: item_val.ref_count - 1, + }; + tracing::info!( + "drop source {} shared kafka client to {} (ref count: {})", + &source_id, + &entry.brokers, + share_item.ref_count + ); + if share_item.ref_count == 0 { + tracing::info!( + "source shared kafka client to {} is freed, ref count drop to 0.", + &entry.brokers + ); + Op::Remove + } else { + Op::Put(Arc::new(Ok(share_item))) + } + }, + Err(_) => Op::Nop, + } + } else { + tracing::warn!( + "source worker's shared kafka client to {} is not found.", + &entry.brokers + ); + Op::Nop + } + }) + .await; } } } From ae9df4187e5d8a233b3a77d90b5e43cb86c3cb5b Mon Sep 17 00:00:00 2001 From: tabversion Date: Fri, 25 Oct 2024 18:03:37 +0800 Subject: [PATCH 15/51] fix lint --- src/connector/src/source/kafka/enumerator/client.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 66714f2d04367..9dc6c0d798ab0 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -175,7 +175,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { Ok(client) => client.client.clone(), Err(e) => { SHARED_KAFKA_CLIENT.remove(&properties.connection).await; - bail!("{}", e.as_report()); + bail!("{}", anyhow!(e.as_report())); } }; From 45295bcf3e76418e9c6423434fa00b2a617d01b9 Mon Sep 17 00:00:00 2001 From: tabversion Date: Fri, 25 Oct 2024 18:22:36 +0800 Subject: [PATCH 16/51] fix --- src/connector/src/source/kafka/enumerator/client.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 9dc6c0d798ab0..91f893599ba53 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -27,7 +27,7 @@ use risingwave_common::bail; use risingwave_common::metrics::LabelGuardedMetric; use thiserror_ext::AsReport; -use crate::error::ConnectorResult; +use crate::error::{ConnectorError, ConnectorResult}; use crate::source::base::SplitEnumerator; use crate::source::kafka::split::KafkaSplit; use crate::source::kafka::{ @@ -175,7 +175,8 @@ impl SplitEnumerator for KafkaSplitEnumerator { Ok(client) => client.client.clone(), Err(e) => { SHARED_KAFKA_CLIENT.remove(&properties.connection).await; - bail!("{}", anyhow!(e.as_report())); + #[allow(rw::format_error)] + bail!("{}", e.as_report()); } }; From a9e34c7a701716a144e7f70beb23d4b9bf29c121 Mon Sep 17 00:00:00 2001 From: tabversion Date: Fri, 25 Oct 2024 18:33:28 +0800 Subject: [PATCH 17/51] fix --- src/connector/src/source/kafka/enumerator/client.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 91f893599ba53..ceef939965a46 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -27,7 +27,7 @@ use risingwave_common::bail; use risingwave_common::metrics::LabelGuardedMetric; use thiserror_ext::AsReport; -use crate::error::{ConnectorError, ConnectorResult}; +use crate::error::ConnectorResult; use crate::source::base::SplitEnumerator; use crate::source::kafka::split::KafkaSplit; use crate::source::kafka::{ @@ -176,7 +176,9 @@ impl SplitEnumerator for KafkaSplitEnumerator { Err(e) => { SHARED_KAFKA_CLIENT.remove(&properties.connection).await; #[allow(rw::format_error)] - bail!("{}", e.as_report()); + { + bail!("{}", e.as_report()); + } } }; From 725e23c851e2dc3836b53d6c6e30c3e86ef48f25 Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 28 Oct 2024 15:02:28 +0800 Subject: [PATCH 18/51] remove get hash func --- src/connector/src/connector_common/common.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index 34d151d2f87ef..2917707117765 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -322,12 +322,6 @@ impl RdKafkaPropertiesCommon { } impl KafkaConnection { - pub fn get_hash(&self) -> u64 { - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - self.hash(&mut hasher); - hasher.finish() - } - pub(crate) fn set_security_properties(&self, config: &mut ClientConfig) { // AWS_MSK_IAM if self.is_aws_msk_iam() { From 832f66fa2374a46f50f0bfd7f305f47e71a840af Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 28 Oct 2024 16:55:05 +0800 Subject: [PATCH 19/51] migrate to Weak --- src/connector/src/connector_common/common.rs | 2 +- src/connector/src/error.rs | 3 + .../src/source/kafka/enumerator/client.rs | 149 ++++++++---------- src/meta/src/stream/source_manager.rs | 59 ------- 4 files changed, 69 insertions(+), 144 deletions(-) diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index 2917707117765..3eaffa93d02a4 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -13,7 +13,7 @@ // limitations under the License. use std::collections::BTreeMap; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::io::Write; use std::time::Duration; diff --git a/src/connector/src/error.rs b/src/connector/src/error.rs index cfee9674d1ce4..c95d7fc97f4e3 100644 --- a/src/connector/src/error.rs +++ b/src/connector/src/error.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use risingwave_common::array::ArrayError; use risingwave_common::error::def_anyhow_newtype; use risingwave_pb::PbFieldNotFound; @@ -29,6 +31,7 @@ def_anyhow_newtype! { // Common errors std::io::Error => transparent, + Arc => transparent, // Fine-grained connector errors AccessError => transparent, diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index ceef939965a46..f9b77a40ec1c5 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -12,22 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; -use std::sync::{Arc, LazyLock}; +use std::collections::{BTreeMap, HashMap}; +use std::sync::{Arc, LazyLock, Weak}; use std::time::Duration; use anyhow::{anyhow, Context}; use async_trait::async_trait; use moka::future::Cache as MokaCache; +use moka::ops::compute::{CompResult, Op}; use prometheus::core::{AtomicI64, GenericGauge}; use rdkafka::consumer::{BaseConsumer, Consumer}; use rdkafka::error::KafkaResult; -use rdkafka::{Offset, TopicPartitionList}; +use rdkafka::{ClientConfig, Offset, TopicPartitionList}; use risingwave_common::bail; use risingwave_common::metrics::LabelGuardedMetric; -use thiserror_ext::AsReport; -use crate::error::ConnectorResult; +use crate::error::{ConnectorError, ConnectorResult}; use crate::source::base::SplitEnumerator; use crate::source::kafka::split::KafkaSplit; use crate::source::kafka::{ @@ -35,15 +35,10 @@ use crate::source::kafka::{ }; use crate::source::SourceEnumeratorContextRef; -pub static SHARED_KAFKA_CLIENT: LazyLock< - MokaCache>>, -> = LazyLock::new(|| moka::future::Cache::builder().build()); +type KafkaClientType = BaseConsumer; -#[derive(Clone)] -pub struct SharedKafkaItem { - pub client: Arc>, - pub ref_count: i32, -} +pub static SHARED_KAFKA_CLIENT: LazyLock>> = + LazyLock::new(|| moka::future::Cache::builder().build()); #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum KafkaEnumeratorOffset { @@ -57,7 +52,7 @@ pub struct KafkaSplitEnumerator { context: SourceEnumeratorContextRef, broker_address: String, topic: String, - client: Arc>, + client: Arc, start_offset: KafkaEnumeratorOffset, // maybe used in the future for batch processing @@ -107,86 +102,72 @@ impl SplitEnumerator for KafkaSplitEnumerator { scan_start_offset = KafkaEnumeratorOffset::Timestamp(time_offset) } - let client_arc = SHARED_KAFKA_CLIENT + async fn build_kafka_client( + config: &ClientConfig, + properties: &KafkaProperties, + rewrite_map: Option>, + ) -> ConnectorResult { + let ctx_common = KafkaContextCommon::new( + rewrite_map, + None, + None, + properties.aws_auth_props.clone(), + properties.connection.is_aws_msk_iam(), + ) + .await?; + let client_ctx = RwConsumerContext::new(ctx_common); + let client: BaseConsumer = + config.create_with_context(client_ctx).await?; + + // Note that before any SASL/OAUTHBEARER broker connection can succeed the application must call + // rd_kafka_oauthbearer_set_token() once – either directly or, more typically, by invoking either + // rd_kafka_poll(), rd_kafka_consumer_poll(), rd_kafka_queue_poll(), etc, in order to cause retrieval + // of an initial token to occur. + // https://docs.confluent.io/platform/current/clients/librdkafka/html/rdkafka_8h.html#a988395722598f63396d7a1bedb22adaf + if properties.connection.is_aws_msk_iam() { + #[cfg(not(madsim))] + client.poll(Duration::from_secs(10)); // note: this is a blocking call + #[cfg(madsim)] + client.poll(Duration::from_secs(10)).await; + } + Ok(client) + } + + let client_weak = match SHARED_KAFKA_CLIENT .entry_by_ref(&properties.connection) - .and_upsert_with(|maybe_item| async { - tracing::info!( - "entry {:?} maybe item: {:?}", - &properties.connection, - maybe_item.is_some() - ); - if let Some(item) = maybe_item { - let arc_item = item.into_value(); - match &arc_item.as_ref() { - Ok(item) => { - tracing::info!( - "reusing source {} shared kafka client to {} (ref count: {})", - &context.info.source_id, - &broker_address, - item.ref_count + 1 - ); - Arc::new(Ok(SharedKafkaItem { - client: item.client.clone(), - ref_count: item.ref_count + 1, - })) - } - Err(_) => arc_item.clone(), - } - } else { - let build_client: ConnectorResult = async { - let ctx_common = KafkaContextCommon::new( - broker_rewrite_map, - None, - None, - properties.aws_auth_props, - properties.connection.is_aws_msk_iam(), - ) - .await?; - let client_ctx = RwConsumerContext::new(ctx_common); - let client: BaseConsumer = - config.create_with_context(client_ctx).await?; - - // Note that before any SASL/OAUTHBEARER broker connection can succeed the application must call - // rd_kafka_oauthbearer_set_token() once – either directly or, more typically, by invoking either - // rd_kafka_poll(), rd_kafka_consumer_poll(), rd_kafka_queue_poll(), etc, in order to cause retrieval - // of an initial token to occur. - // https://docs.confluent.io/platform/current/clients/librdkafka/html/rdkafka_8h.html#a988395722598f63396d7a1bedb22adaf - if properties.connection.is_aws_msk_iam() { - #[cfg(not(madsim))] - client.poll(Duration::from_secs(10)); // note: this is a blocking call - #[cfg(madsim)] - client.poll(Duration::from_secs(10)).await; - } - - Ok(SharedKafkaItem { - client: Arc::new(client), - ref_count: 1, - }) + .and_try_compute_with::<_, _, ConnectorError>(|maybe_entry| async { + if let Some(entry) = maybe_entry { + let entry_value = entry.into_value(); + if entry_value.upgrade().is_some() { + // return if the client is already built + return Ok(Op::Nop); } - .await; - - Arc::new(build_client) } + let client_arc = Arc::new( + build_kafka_client(&config, &properties, broker_rewrite_map.clone()).await?, + ); + Ok(Op::Put(Arc::downgrade(&client_arc))) }) - .await - .into_value(); - - let kafka_client: Arc> = match client_arc.as_ref() { - Ok(client) => client.client.clone(), - Err(e) => { - SHARED_KAFKA_CLIENT.remove(&properties.connection).await; - #[allow(rw::format_error)] - { - bail!("{}", e.as_report()); - } - } + .await? + { + CompResult::Unchanged(entry) + | CompResult::Inserted(entry) + | CompResult::ReplacedWith(entry) => entry.into_value(), + CompResult::Removed(_) | CompResult::StillNone(_) => unreachable!(), + }; + + let client_arc: Arc; + if let Some(client_arc_upgrade) = client_weak.upgrade() { + client_arc = client_arc_upgrade; + } else { + unreachable!() }; Ok(Self { context, broker_address, topic, - client: kafka_client, + client: client_arc, start_offset: scan_start_offset, stop_offset: KafkaEnumeratorOffset::None, sync_call_timeout: properties.common.sync_call_timeout, diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index 48b260e837d93..0a0bc0076593d 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -21,12 +21,9 @@ use std::sync::Arc; use std::time::Duration; use anyhow::Context; -use moka::ops::compute::Op; use risingwave_common::catalog::TableId; use risingwave_common::metrics::LabelGuardedIntGauge; -use risingwave_connector::connector_common::KafkaConnection; use risingwave_connector::error::ConnectorResult; -use risingwave_connector::source::kafka::{SharedKafkaItem, SHARED_KAFKA_CLIENT}; use risingwave_connector::source::{ ConnectorProperties, SourceEnumeratorContext, SourceEnumeratorInfo, SourceProperties, SplitEnumerator, SplitId, SplitImpl, SplitMetaData, @@ -113,7 +110,6 @@ pub async fn create_source_worker_handle( let current_splits_ref = splits.clone(); let connector_properties = extract_prop_from_new_source(source)?; - let share_client_entry = get_kafka_connection(&connector_properties); let enable_scale_in = connector_properties.enable_split_scale_in(); let (sync_call_tx, sync_call_rx) = tokio::sync::mpsc::unbounded_channel(); let handle = dispatch_source_prop!(connector_properties, prop, { @@ -147,7 +143,6 @@ pub async fn create_source_worker_handle( sync_call_tx, splits, enable_scale_in, - share_client_entry, }) } @@ -268,7 +263,6 @@ pub struct ConnectorSourceWorkerHandle { sync_call_tx: UnboundedSender>>, splits: SharedSplitMapRef, enable_scale_in: bool, - pub share_client_entry: Option, } impl ConnectorSourceWorkerHandle { @@ -1038,46 +1032,6 @@ impl SourceManager { for source_id in source_ids { if let Some(handle) = core.managed_sources.remove(&source_id) { handle.handle.abort(); - // remove the shared kafka client entry if it exists - if let Some(entry) = handle.share_client_entry { - SHARED_KAFKA_CLIENT - .entry_by_ref(&entry) - .and_compute_with(|maybe_item| async { - if let Some(item) = maybe_item { - match item.into_value().as_ref() { - Ok(item_val) => { - let share_item = SharedKafkaItem { - client: item_val.client.clone(), - ref_count: item_val.ref_count - 1, - }; - tracing::info!( - "drop source {} shared kafka client to {} (ref count: {})", - &source_id, - &entry.brokers, - share_item.ref_count - ); - if share_item.ref_count == 0 { - tracing::info!( - "source shared kafka client to {} is freed, ref count drop to 0.", - &entry.brokers - ); - Op::Remove - } else { - Op::Put(Arc::new(Ok(share_item))) - } - }, - Err(_) => Op::Nop, - } - } else { - tracing::warn!( - "source worker's shared kafka client to {} is not found.", - &entry.brokers - ); - Op::Nop - } - }) - .await; - } } } } @@ -1096,8 +1050,6 @@ impl SourceManager { let connector_properties = extract_prop_from_existing_source(&source)?; - let share_client_entry = get_kafka_connection(&connector_properties); - let enable_scale_in = connector_properties.enable_split_scale_in(); let (sync_call_tx, sync_call_rx) = tokio::sync::mpsc::unbounded_channel(); let handle = tokio::spawn(async move { @@ -1137,7 +1089,6 @@ impl SourceManager { sync_call_tx, splits, enable_scale_in, - share_client_entry, }, ); Ok(()) @@ -1229,16 +1180,6 @@ pub fn build_actor_split_impls( .collect() } -fn get_kafka_connection(connector_properties: &ConnectorProperties) -> Option { - { - // for kafka source: get the hash of connection props as shared source entry (on meta) - if let ConnectorProperties::Kafka(kafka_props) = connector_properties { - return Some(kafka_props.connection.clone()); - } - None - } -} - #[cfg(test)] mod tests { use std::collections::{BTreeMap, HashMap, HashSet}; From 73f0b7bc90a46af1c425c2c3796971c2dbf33cb0 Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 28 Oct 2024 16:58:36 +0800 Subject: [PATCH 20/51] minor --- src/connector/src/source/kafka/enumerator/client.rs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index f9b77a40ec1c5..1dd9a4f581d8f 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -133,36 +133,31 @@ impl SplitEnumerator for KafkaSplitEnumerator { Ok(client) } - let client_weak = match SHARED_KAFKA_CLIENT + let client_arc = match SHARED_KAFKA_CLIENT .entry_by_ref(&properties.connection) .and_try_compute_with::<_, _, ConnectorError>(|maybe_entry| async { if let Some(entry) = maybe_entry { let entry_value = entry.into_value(); if entry_value.upgrade().is_some() { // return if the client is already built + tracing::info!("reuse existing kafka client for {}", broker_address); return Ok(Op::Nop); } } let client_arc = Arc::new( build_kafka_client(&config, &properties, broker_rewrite_map.clone()).await?, ); + tracing::info!("build new kafka client for {}", broker_address); Ok(Op::Put(Arc::downgrade(&client_arc))) }) .await? { CompResult::Unchanged(entry) | CompResult::Inserted(entry) - | CompResult::ReplacedWith(entry) => entry.into_value(), + | CompResult::ReplacedWith(entry) => entry.into_value().upgrade().unwrap(), CompResult::Removed(_) | CompResult::StillNone(_) => unreachable!(), }; - let client_arc: Arc; - if let Some(client_arc_upgrade) = client_weak.upgrade() { - client_arc = client_arc_upgrade; - } else { - unreachable!() - }; - Ok(Self { context, broker_address, From ac1d63d28f7ac5441d2260324184d13acc0d049f Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 28 Oct 2024 17:35:08 +0800 Subject: [PATCH 21/51] fix --- .../src/source/kafka/enumerator/client.rs | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 1dd9a4f581d8f..1d7525bc7a613 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -19,7 +19,7 @@ use std::time::Duration; use anyhow::{anyhow, Context}; use async_trait::async_trait; use moka::future::Cache as MokaCache; -use moka::ops::compute::{CompResult, Op}; +use moka::ops::compute::Op; use prometheus::core::{AtomicI64, GenericGauge}; use rdkafka::consumer::{BaseConsumer, Consumer}; use rdkafka::error::KafkaResult; @@ -133,36 +133,33 @@ impl SplitEnumerator for KafkaSplitEnumerator { Ok(client) } - let client_arc = match SHARED_KAFKA_CLIENT + let mut client_arc: Option> = None; + SHARED_KAFKA_CLIENT .entry_by_ref(&properties.connection) .and_try_compute_with::<_, _, ConnectorError>(|maybe_entry| async { if let Some(entry) = maybe_entry { let entry_value = entry.into_value(); - if entry_value.upgrade().is_some() { + if let Some(client) = entry_value.upgrade() { // return if the client is already built tracing::info!("reuse existing kafka client for {}", broker_address); + client_arc = Some(client); return Ok(Op::Nop); } } - let client_arc = Arc::new( + let new_client_arc = Arc::new( build_kafka_client(&config, &properties, broker_rewrite_map.clone()).await?, ); tracing::info!("build new kafka client for {}", broker_address); - Ok(Op::Put(Arc::downgrade(&client_arc))) + client_arc = Some(new_client_arc.clone()); + Ok(Op::Put(Arc::downgrade(&new_client_arc))) }) - .await? - { - CompResult::Unchanged(entry) - | CompResult::Inserted(entry) - | CompResult::ReplacedWith(entry) => entry.into_value().upgrade().unwrap(), - CompResult::Removed(_) | CompResult::StillNone(_) => unreachable!(), - }; + .await?; Ok(Self { context, broker_address, topic, - client: client_arc, + client: client_arc.unwrap(), start_offset: scan_start_offset, stop_offset: KafkaEnumeratorOffset::None, sync_call_timeout: properties.common.sync_call_timeout, From ec4909612d001ae14fcadc3c434760e8ee6c5032 Mon Sep 17 00:00:00 2001 From: tabVersion Date: Tue, 29 Oct 2024 15:38:50 +0800 Subject: [PATCH 22/51] test bump quanta to 0.12.3 --- Cargo.lock | 8 +------- Cargo.toml | 8 ++------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 08a3b30159399..5d09f70ac019e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9770,8 +9770,7 @@ checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" [[package]] name = "quanta" version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5167a477619228a0b284fac2674e3c388cba90631d7b7de620e6f1fcd08da5" +source = "git+https://github.com/tabVersion/quanta.git?rev=da62f4f04516fcfc72f686869026bad87b712f37#da62f4f04516fcfc72f686869026bad87b712f37" dependencies = [ "crossbeam-utils", "libc", @@ -16554,8 +16553,3 @@ dependencies = [ "libc", "pkg-config", ] - -[[patch.unused]] -name = "quanta" -version = "0.11.0" -source = "git+https://github.com/madsim-rs/quanta.git?rev=948bdc3#948bdc3d4cd3fcfe3d52d03dd83deee96d97d770" diff --git a/Cargo.toml b/Cargo.toml index da6b0bfa87385..1d22c656feb0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -153,11 +153,7 @@ arrow-udf-flight = "0.4" clap = { version = "4", features = ["cargo", "derive", "env"] } # Use a forked version which removes the dependencies on dynamo db to reduce # compile time and binary size. -deltalake = { version = "0.20.1", features = [ - "s3", - "gcs", - "datafusion", -] } +deltalake = { version = "0.20.1", features = ["s3", "gcs", "datafusion"] } itertools = "0.13.0" jsonbb = "0.1.4" lru = { git = "https://github.com/risingwavelabs/lru-rs.git", rev = "2682b85" } @@ -343,7 +339,7 @@ opt-level = 2 [patch.crates-io] # Patch third-party crates for deterministic simulation. -quanta = { git = "https://github.com/madsim-rs/quanta.git", rev = "948bdc3" } +quanta = { git = "https://github.com/tabVersion/quanta.git", rev = "da62f4f04516fcfc72f686869026bad87b712f37" } getrandom = { git = "https://github.com/madsim-rs/getrandom.git", rev = "e79a7ae" } # Don't patch `tokio-stream`, but only use the madsim version for **direct** dependencies. # Imagine an unpatched dependency depends on the original `tokio` and the patched `tokio-stream`. From 16d8c423c253a485f5f7205a57ed043d3c040881 Mon Sep 17 00:00:00 2001 From: tabVersion Date: Tue, 29 Oct 2024 15:58:42 +0800 Subject: [PATCH 23/51] update patch --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5d09f70ac019e..4030ac7873c64 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9770,7 +9770,7 @@ checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" [[package]] name = "quanta" version = "0.12.3" -source = "git+https://github.com/tabVersion/quanta.git?rev=da62f4f04516fcfc72f686869026bad87b712f37#da62f4f04516fcfc72f686869026bad87b712f37" +source = "git+https://github.com/tabVersion/quanta.git?rev=bb6c780894d06c0ec3f487d58c72920665b5cb0a#bb6c780894d06c0ec3f487d58c72920665b5cb0a" dependencies = [ "crossbeam-utils", "libc", diff --git a/Cargo.toml b/Cargo.toml index 1d22c656feb0c..14fcd7aa5e5ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -339,7 +339,7 @@ opt-level = 2 [patch.crates-io] # Patch third-party crates for deterministic simulation. -quanta = { git = "https://github.com/tabVersion/quanta.git", rev = "da62f4f04516fcfc72f686869026bad87b712f37" } +quanta = { git = "https://github.com/tabVersion/quanta.git", rev = "bb6c780894d06c0ec3f487d58c72920665b5cb0a" } getrandom = { git = "https://github.com/madsim-rs/getrandom.git", rev = "e79a7ae" } # Don't patch `tokio-stream`, but only use the madsim version for **direct** dependencies. # Imagine an unpatched dependency depends on the original `tokio` and the patched `tokio-stream`. From b3efda6f9fa0fe0be7101706a9eedc68974af397 Mon Sep 17 00:00:00 2001 From: tabversion Date: Wed, 30 Oct 2024 15:05:35 +0800 Subject: [PATCH 24/51] moka 0.12.3 --- src/meta/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/meta/Cargo.toml b/src/meta/Cargo.toml index b0c918407b367..b9c2835e76e1c 100644 --- a/src/meta/Cargo.toml +++ b/src/meta/Cargo.toml @@ -42,7 +42,7 @@ jsonbb = { workspace = true } maplit = "1.0.2" memcomparable = { version = "0.2" } mime_guess = "2" -moka = { version = "0.12.8", features = ["future"] } +moka = { version = "0.12.3", features = ["future"] } notify = { version = "7", default-features = false, features = [ "macos_fsevent", ] } From 6a729f5addfc8e5f5e863e352e9310d661329184 Mon Sep 17 00:00:00 2001 From: tabversion Date: Fri, 1 Nov 2024 16:25:49 +0800 Subject: [PATCH 25/51] update proto --- proto/catalog.proto | 10 ++++++++++ proto/ddl_service.proto | 2 +- src/connector/src/sink/catalog/mod.rs | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/proto/catalog.proto b/proto/catalog.proto index 0c67a92f23cdd..5356f4fb064a3 100644 --- a/proto/catalog.proto +++ b/proto/catalog.proto @@ -92,6 +92,9 @@ message StreamSourceInfo { // Handle the source relies on any sceret. The key is the propertity name and the value is the secret id and type. // For format and encode options. map format_encode_secret_refs = 16; + + // ref connection for schema registry + optional uint32 connection_id = 17; } message Source { @@ -121,6 +124,8 @@ message Source { uint32 associated_table_id = 12; } string definition = 13; + + // ref connection for connector optional uint32 connection_id = 14; optional uint64 initialized_at_epoch = 15; @@ -154,6 +159,9 @@ message SinkFormatDesc { optional plan_common.EncodeType key_encode = 4; // Secret used for format encode options. map secret_refs = 5; + + // ref connection for schema registry + optional uint32 connection_id = 6; } // the catalog of the sink. There are two kind of schema here. The full schema is all columns @@ -176,6 +184,8 @@ message Sink { uint32 owner = 11; map properties = 12; string definition = 13; + + // ref connection for connector optional uint32 connection_id = 14; optional uint64 initialized_at_epoch = 15; optional uint64 created_at_epoch = 16; diff --git a/proto/ddl_service.proto b/proto/ddl_service.proto index de860593e8105..ef97f10ecf277 100644 --- a/proto/ddl_service.proto +++ b/proto/ddl_service.proto @@ -404,7 +404,7 @@ message CreateConnectionRequest { uint32 database_id = 2; uint32 schema_id = 3; oneof payload { - PrivateLink private_link = 4; + PrivateLink private_link = 4 [deprecated = true]; } uint32 owner_id = 5; } diff --git a/src/connector/src/sink/catalog/mod.rs b/src/connector/src/sink/catalog/mod.rs index 23f34eab97417..cc3855d4022c2 100644 --- a/src/connector/src/sink/catalog/mod.rs +++ b/src/connector/src/sink/catalog/mod.rs @@ -221,6 +221,7 @@ impl SinkFormatDesc { options, key_encode, secret_refs: self.secret_refs.clone(), + connection_id: None, } } } From a79d5da5408b6909cc3ea014154591878b07ee53 Mon Sep 17 00:00:00 2001 From: tabversion Date: Sat, 2 Nov 2024 13:18:36 +0800 Subject: [PATCH 26/51] stash --- proto/catalog.proto | 11 +++++++++++ src/connector/src/connector_common/mod.rs | 14 ++++++++++++++ src/frontend/src/catalog/connection_catalog.rs | 1 + src/frontend/src/handler/show.rs | 7 +++++++ 4 files changed, 33 insertions(+) diff --git a/proto/catalog.proto b/proto/catalog.proto index 26dab7ea76bfc..cdbb14f86fb9b 100644 --- a/proto/catalog.proto +++ b/proto/catalog.proto @@ -248,12 +248,23 @@ message Connection { string endpoint_dns_name = 5; } + enum ConnectionType { + CONNECTION_TYPE_UNSPECIFIED = 0; + CONNECTION_TYPE_KAFKA = 1; + } + + message ConnectionImpl { + ConnectionType connection_type = 1; + map properties = 2; + } + uint32 id = 1; uint32 schema_id = 2; uint32 database_id = 3; string name = 4; oneof info { PrivateLinkService private_link_service = 5 [deprecated = true]; + ConnectionImpl connection_impl = 7; } uint32 owner = 6; } diff --git a/src/connector/src/connector_common/mod.rs b/src/connector/src/connector_common/mod.rs index 57b614fdf548a..08cd0f195d74d 100644 --- a/src/connector/src/connector_common/mod.rs +++ b/src/connector/src/connector_common/mod.rs @@ -16,6 +16,7 @@ mod mqtt_common; pub use mqtt_common::{MqttCommon, QualityOfService as MqttQualityOfService}; +use serde::Deserialize; mod common; pub use common::{ @@ -26,3 +27,16 @@ pub use common::{ mod iceberg; pub use iceberg::IcebergCommon; + +// #[derive(Debug, Clone, Deserialize)] +// pub enum ConnectionImpl { +// Kafka(KafkaConnection), +// } + +// macro_rules! impl_connection_enum { +// ($impl:expr, $inner_name:ident, $prop_type_name:ident, $body:expr) => { +// impl ConnectionImpl { +// pub fn get_ +// } +// }; +// } diff --git a/src/frontend/src/catalog/connection_catalog.rs b/src/frontend/src/catalog/connection_catalog.rs index 03b2ff4203c53..c1013cd9779ca 100644 --- a/src/frontend/src/catalog/connection_catalog.rs +++ b/src/frontend/src/catalog/connection_catalog.rs @@ -30,6 +30,7 @@ impl ConnectionCatalog { pub fn connection_type(&self) -> &str { match &self.info { Info::PrivateLinkService(srv) => srv.get_provider().unwrap().as_str_name(), + Info::ConnectionImpl(connection_impl) => connection_impl.connection_type, } } diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index d8ec8b44d827e..80ea379f32e60 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -413,6 +413,9 @@ pub async fn handle_show_object( connection::Info::PrivateLinkService(_) => { PRIVATELINK_CONNECTION.to_string() }, + connection::Info::ConnectionImpl(connection_impl) => { + connection_impl.connection_type.to_string() + } }; let source_names = schema .get_source_ids_by_connection(c.id) @@ -438,6 +441,10 @@ pub async fn handle_show_object( serde_json::to_string(&sink_names).unwrap(), ) } + connection::Info::ConnectionImpl(connection_impl) => { + // todo: check secrets are not exposed + serde_json::to_string(&connection_impl.properties).unwrap() + } }; ShowConnectionRow { name, From a561ea3eda82bc0c33a5b472c2ebdb36e7cbc8a1 Mon Sep 17 00:00:00 2001 From: tabversion Date: Sat, 2 Nov 2024 23:36:58 +0800 Subject: [PATCH 27/51] stash --- proto/catalog.proto | 23 ++++++++-------- proto/ddl_service.proto | 3 ++- src/connector/src/connector_common/mod.rs | 1 - src/ctl/src/cmd_impl/meta/connection.rs | 7 +++++ .../src/catalog/connection_catalog.rs | 3 ++- .../rw_catalog/rw_connections.rs | 1 + src/frontend/src/handler/create_connection.rs | 26 +++++++++---------- src/frontend/src/handler/show.rs | 8 +++--- src/meta/service/src/ddl_service.rs | 20 ++++++++++++-- 9 files changed, 59 insertions(+), 33 deletions(-) diff --git a/proto/catalog.proto b/proto/catalog.proto index cdbb14f86fb9b..c2339469a5f3f 100644 --- a/proto/catalog.proto +++ b/proto/catalog.proto @@ -234,6 +234,17 @@ message Subscription { SubscriptionState subscription_state = 19; } +message ConnectionParams { + enum ConnectionType { + CONNECTION_TYPE_UNSPECIFIED = 0; + CONNECTION_TYPE_KAFKA = 1; + CONNECTION_TYPE_ICEBERG = 2; + } + + ConnectionType connection_type = 1; + map properties = 2; +} + message Connection { message PrivateLinkService { enum PrivateLinkProvider { @@ -248,23 +259,13 @@ message Connection { string endpoint_dns_name = 5; } - enum ConnectionType { - CONNECTION_TYPE_UNSPECIFIED = 0; - CONNECTION_TYPE_KAFKA = 1; - } - - message ConnectionImpl { - ConnectionType connection_type = 1; - map properties = 2; - } - uint32 id = 1; uint32 schema_id = 2; uint32 database_id = 3; string name = 4; oneof info { PrivateLinkService private_link_service = 5 [deprecated = true]; - ConnectionImpl connection_impl = 7; + ConnectionParams connection_params = 7; } uint32 owner = 6; } diff --git a/proto/ddl_service.proto b/proto/ddl_service.proto index ef97f10ecf277..d39e886bfd05e 100644 --- a/proto/ddl_service.proto +++ b/proto/ddl_service.proto @@ -405,8 +405,9 @@ message CreateConnectionRequest { uint32 schema_id = 3; oneof payload { PrivateLink private_link = 4 [deprecated = true]; + catalog.ConnectionParams connection_params = 7; } - uint32 owner_id = 5; + uint32 owner_id = 6; } message CreateConnectionResponse { diff --git a/src/connector/src/connector_common/mod.rs b/src/connector/src/connector_common/mod.rs index 08cd0f195d74d..8ddc1d1c55151 100644 --- a/src/connector/src/connector_common/mod.rs +++ b/src/connector/src/connector_common/mod.rs @@ -16,7 +16,6 @@ mod mqtt_common; pub use mqtt_common::{MqttCommon, QualityOfService as MqttQualityOfService}; -use serde::Deserialize; mod common; pub use common::{ diff --git a/src/ctl/src/cmd_impl/meta/connection.rs b/src/ctl/src/cmd_impl/meta/connection.rs index 5df81d301ea01..c7bd94b58de62 100644 --- a/src/ctl/src/cmd_impl/meta/connection.rs +++ b/src/ctl/src/cmd_impl/meta/connection.rs @@ -35,6 +35,13 @@ pub async fn list_connections(context: &CtlContext) -> anyhow::Result<()> { "PrivateLink: service_name: {}, endpoint_id: {}, dns_entries: {:?}", svc.service_name, svc.endpoint_id, svc.dns_entries, ), + Some(Info::ConnectionParams(params)) => { + format!( + "CONNECTION_PARAMS_{}: {}", + params.get_connection_type().unwrap().as_str_name(), + serde_json::to_string(¶ms.get_properties()).unwrap() + ) + } None => "None".to_string(), } ); diff --git a/src/frontend/src/catalog/connection_catalog.rs b/src/frontend/src/catalog/connection_catalog.rs index c1013cd9779ca..9e17e11a84f23 100644 --- a/src/frontend/src/catalog/connection_catalog.rs +++ b/src/frontend/src/catalog/connection_catalog.rs @@ -30,13 +30,14 @@ impl ConnectionCatalog { pub fn connection_type(&self) -> &str { match &self.info { Info::PrivateLinkService(srv) => srv.get_provider().unwrap().as_str_name(), - Info::ConnectionImpl(connection_impl) => connection_impl.connection_type, + Info::ConnectionParams(params) => params.get_connection_type().unwrap().as_str_name(), } } pub fn provider(&self) -> &str { match &self.info { Info::PrivateLinkService(_) => "PRIVATELINK", + Info::ConnectionParams(_) => todo!(), } } } diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs index 2af0b29b16f76..77e10ae31018e 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs @@ -35,6 +35,7 @@ fn read_rw_connections(reader: &SysCatalogReaderImpl) -> Result, + with_properties: &mut BTreeMap, property: &str, ) -> Result { - with_properties - .get(property) - .map(|s| s.to_lowercase()) - .ok_or_else(|| { - RwError::from(ProtocolError(format!( - "Required property \"{property}\" is not provided" - ))) - }) + with_properties.remove(property).ok_or_else(|| { + RwError::from(ProtocolError(format!( + "Required property \"{property}\" is not provided" + ))) + }) } fn resolve_create_connection_payload( - with_properties: &BTreeMap, + with_properties: &mut BTreeMap, ) -> Result { let connection_type = get_connection_property_required(with_properties, CONNECTION_TYPE_PROP)?; match connection_type.as_str() { @@ -50,6 +48,8 @@ fn resolve_create_connection_payload( "CREATE CONNECTION to Private Link".to_string(), "RisingWave Cloud Portal (Please refer to the doc https://docs.risingwave.com/cloud/create-a-connection/)".to_string(), ))), + KAFKA_CONNECTOR => todo!(), + ICEBERG_CONNECTOR => todo!(), _ => Err(RwError::from(ProtocolError(format!( "Connection type \"{connection_type}\" is not supported" )))), @@ -78,9 +78,9 @@ pub async fn handle_create_connection( }; } let (database_id, schema_id) = session.get_database_and_schema_id_for_create(schema_name)?; - let with_properties = handler_args.with_options.clone().into_connector_props(); + let mut with_properties = handler_args.with_options.clone().into_connector_props(); - let create_connection_payload = resolve_create_connection_payload(&with_properties)?; + let create_connection_payload = resolve_create_connection_payload(&mut with_properties)?; let catalog_writer = session.catalog_writer()?; catalog_writer diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index 80ea379f32e60..12e8d5d348c16 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -413,8 +413,8 @@ pub async fn handle_show_object( connection::Info::PrivateLinkService(_) => { PRIVATELINK_CONNECTION.to_string() }, - connection::Info::ConnectionImpl(connection_impl) => { - connection_impl.connection_type.to_string() + connection::Info::ConnectionParams(params) => { + format!("CONNECTION_PARAMS_{}", params.connection_type) } }; let source_names = schema @@ -441,9 +441,9 @@ pub async fn handle_show_object( serde_json::to_string(&sink_names).unwrap(), ) } - connection::Info::ConnectionImpl(connection_impl) => { + connection::Info::ConnectionParams(params) => { // todo: check secrets are not exposed - serde_json::to_string(&connection_impl.properties).unwrap() + serde_json::to_string(¶ms.get_properties()).unwrap() } }; ShowConnectionRow { diff --git a/src/meta/service/src/ddl_service.rs b/src/meta/service/src/ddl_service.rs index 71b45b1887eff..7babf7c48ca78 100644 --- a/src/meta/service/src/ddl_service.rs +++ b/src/meta/service/src/ddl_service.rs @@ -25,8 +25,9 @@ use risingwave_connector::sink::catalog::SinkId; use risingwave_meta::manager::{EventLogManagerRef, MetadataManager}; use risingwave_meta::rpc::ddl_controller::fill_table_stream_graph_info; use risingwave_meta::rpc::metrics::MetaMetrics; +use risingwave_pb::catalog::connection::Info as ConnectionInfo; use risingwave_pb::catalog::table::OptionalAssociatedSourceId; -use risingwave_pb::catalog::{Comment, CreateType, Secret, Table}; +use risingwave_pb::catalog::{Comment, Connection, CreateType, Secret, Table}; use risingwave_pb::common::worker_node::State; use risingwave_pb::common::WorkerType; use risingwave_pb::ddl_service::ddl_service_server::DdlService; @@ -731,7 +732,22 @@ impl DdlService for DdlServiceImpl { create_connection_request::Payload::PrivateLink(_) => { panic!("Private Link Connection has been deprecated") } - }; + create_connection_request::Payload::ConnectionParams(params) => { + let pb_connection = Connection { + id: 0, + schema_id: req.schema_id, + database_id: req.database_id, + name: req.name, + info: Some(ConnectionInfo::ConnectionParams(params)), + owner: req.owner_id, + }; + let version = self + .ddl_controller + .run_command(DdlCommand::CreateConnection(pb_connection)) + .await?; + Ok(Response::new(CreateConnectionResponse { version })) + } + } } async fn list_connections( From 3a18c4c3b894c1cb9b70e64ab31f991c8fcdee58 Mon Sep 17 00:00:00 2001 From: tabversion Date: Sun, 3 Nov 2024 13:46:27 +0800 Subject: [PATCH 28/51] stash --- src/meta/model/migration/src/lib.rs | 2 ++ .../src/m20241103_043732_connection_params.rs | 35 +++++++++++++++++++ src/meta/model/src/lib.rs | 1 + 3 files changed, 38 insertions(+) create mode 100644 src/meta/model/migration/src/m20241103_043732_connection_params.rs diff --git a/src/meta/model/migration/src/lib.rs b/src/meta/model/migration/src/lib.rs index 0c8c244c0624e..8c5a41977f2b5 100644 --- a/src/meta/model/migration/src/lib.rs +++ b/src/meta/model/migration/src/lib.rs @@ -24,6 +24,7 @@ mod m20240820_081248_add_time_travel_per_table_epoch; mod m20240911_083152_variable_vnode_count; mod m20241016_065621_hummock_gc_history; mod m20241025_062548_singleton_vnode_count; +mod m20241103_043732_connection_params; pub struct Migrator; @@ -85,6 +86,7 @@ impl MigratorTrait for Migrator { Box::new(m20240911_083152_variable_vnode_count::Migration), Box::new(m20241016_065621_hummock_gc_history::Migration), Box::new(m20241025_062548_singleton_vnode_count::Migration), + Box::new(m20241103_043732_connection_params::Migration), ] } } diff --git a/src/meta/model/migration/src/m20241103_043732_connection_params.rs b/src/meta/model/migration/src/m20241103_043732_connection_params.rs new file mode 100644 index 0000000000000..f2e03bafd9207 --- /dev/null +++ b/src/meta/model/migration/src/m20241103_043732_connection_params.rs @@ -0,0 +1,35 @@ +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .alter_table( + Table::alter() + .table(Connection::Table) + .add_column(ColumnDef::new(Connection::Params).binary().not_null()) + .to_owned(), + ) + .await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .alter_table( + Table::alter() + .table(Connection::Table) + .drop_column(Connection::Params) + .to_owned(), + ) + .await + } +} + +#[derive(DeriveIden)] +enum Connection { + Table, + Params, +} diff --git a/src/meta/model/src/lib.rs b/src/meta/model/src/lib.rs index 0b99b48914b2a..0136c02ae5fac 100644 --- a/src/meta/model/src/lib.rs +++ b/src/meta/model/src/lib.rs @@ -397,6 +397,7 @@ derive_from_blob!( PrivateLinkService, risingwave_pb::catalog::connection::PbPrivateLinkService ); +derive_from_blob!(ConnectionInfo, risingwave_pb::catalog::connection::Info); derive_from_blob!(AuthInfo, risingwave_pb::user::PbAuthInfo); derive_from_blob!(ConnectorSplits, risingwave_pb::source::ConnectorSplits); From 60c09fdf4f4d08bbcfb3e685ea280c59bff6d6c8 Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 4 Nov 2024 19:44:22 +0800 Subject: [PATCH 29/51] basic --- .../src/catalog/connection_catalog.rs | 2 +- .../rw_catalog/rw_connections.rs | 31 ++++++++++++++----- src/frontend/src/handler/create_connection.rs | 30 ++++++++++++------ src/frontend/src/handler/show.rs | 3 +- src/meta/model/src/connection.rs | 12 ++++--- src/meta/model/src/lib.rs | 2 +- src/meta/src/controller/mod.rs | 11 ++++--- src/meta/src/rpc/ddl_controller.rs | 1 + 8 files changed, 63 insertions(+), 29 deletions(-) diff --git a/src/frontend/src/catalog/connection_catalog.rs b/src/frontend/src/catalog/connection_catalog.rs index 9e17e11a84f23..a938328c46d8f 100644 --- a/src/frontend/src/catalog/connection_catalog.rs +++ b/src/frontend/src/catalog/connection_catalog.rs @@ -37,7 +37,7 @@ impl ConnectionCatalog { pub fn provider(&self) -> &str { match &self.info { Info::PrivateLinkService(_) => "PRIVATELINK", - Info::ConnectionParams(_) => todo!(), + Info::ConnectionParams(_) => panic!("ConnectionParams is not supported as provider."), } } } diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs index 77e10ae31018e..cb1243b65532c 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs @@ -28,6 +28,7 @@ struct RwConnection { type_: String, provider: String, acl: String, + connection_params: String, } #[system_catalog(table, "rw_catalog.rw_connections")] @@ -38,14 +39,28 @@ fn read_rw_connections(reader: &SysCatalogReaderImpl) -> Result { + rw_connection.provider = conn.provider().into(); + } + risingwave_pb::catalog::connection::Info::ConnectionParams(params) => { + rw_connection.connection_params = + serde_json::to_string(¶ms.get_properties()).unwrap(); + } + }; + + rw_connection }) }) .collect()) diff --git a/src/frontend/src/handler/create_connection.rs b/src/frontend/src/handler/create_connection.rs index 7d3a9cbfe47a9..ff3e82edd3c69 100644 --- a/src/frontend/src/handler/create_connection.rs +++ b/src/frontend/src/handler/create_connection.rs @@ -17,6 +17,8 @@ use std::collections::BTreeMap; use pgwire::pg_response::{PgResponse, StatementType}; use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR; use risingwave_connector::source::kafka::{KAFKA_CONNECTOR, PRIVATELINK_CONNECTION}; +use risingwave_pb::catalog::connection_params::ConnectionType; +use risingwave_pb::catalog::ConnectionParams; use risingwave_pb::ddl_service::create_connection_request; use risingwave_sqlparser::ast::CreateConnectionStatement; @@ -43,17 +45,27 @@ fn resolve_create_connection_payload( with_properties: &mut BTreeMap, ) -> Result { let connection_type = get_connection_property_required(with_properties, CONNECTION_TYPE_PROP)?; - match connection_type.as_str() { - PRIVATELINK_CONNECTION => Err(RwError::from(ErrorCode::Deprecated( + let connection_type = match connection_type.as_str() { + PRIVATELINK_CONNECTION => { + return Err(RwError::from(ErrorCode::Deprecated( "CREATE CONNECTION to Private Link".to_string(), "RisingWave Cloud Portal (Please refer to the doc https://docs.risingwave.com/cloud/create-a-connection/)".to_string(), - ))), - KAFKA_CONNECTOR => todo!(), - ICEBERG_CONNECTOR => todo!(), - _ => Err(RwError::from(ProtocolError(format!( - "Connection type \"{connection_type}\" is not supported" - )))), - } + ))); + } + KAFKA_CONNECTOR => ConnectionType::Kafka, + ICEBERG_CONNECTOR => ConnectionType::Iceberg, + _ => { + return Err(RwError::from(ProtocolError(format!( + "Connection type \"{connection_type}\" is not supported" + )))); + } + }; + Ok(create_connection_request::Payload::ConnectionParams( + ConnectionParams { + connection_type: connection_type as i32, + properties: with_properties.clone().into_iter().collect(), + }, + )) } pub async fn handle_create_connection( diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index 12e8d5d348c16..68ca877a5f303 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -414,7 +414,7 @@ pub async fn handle_show_object( PRIVATELINK_CONNECTION.to_string() }, connection::Info::ConnectionParams(params) => { - format!("CONNECTION_PARAMS_{}", params.connection_type) + format!("{}", params.get_connection_type().unwrap().as_str_name()) } }; let source_names = schema @@ -443,6 +443,7 @@ pub async fn handle_show_object( } connection::Info::ConnectionParams(params) => { // todo: check secrets are not exposed + // todo: show dep relations serde_json::to_string(¶ms.get_properties()).unwrap() } }; diff --git a/src/meta/model/src/connection.rs b/src/meta/model/src/connection.rs index dce0daa462fc5..eb93a1d82fd38 100644 --- a/src/meta/model/src/connection.rs +++ b/src/meta/model/src/connection.rs @@ -18,7 +18,7 @@ use sea_orm::entity::prelude::*; use sea_orm::ActiveValue::Set; use serde::{Deserialize, Serialize}; -use crate::{ConnectionId, PrivateLinkService}; +use crate::{ConnectionId, ConnectionParams, PrivateLinkService}; #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] #[sea_orm(table_name = "connection")] @@ -27,8 +27,9 @@ pub struct Model { pub connection_id: ConnectionId, pub name: String, - // todo: Private link service has been deprecated, consider using a new field for the connection info + // Private link service has been deprecated pub info: PrivateLinkService, + pub params: ConnectionParams, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] @@ -69,14 +70,15 @@ impl ActiveModelBehavior for ActiveModel {} impl From for ActiveModel { fn from(conn: PbConnection) -> Self { - let Some(PbInfo::PrivateLinkService(private_link_srv)) = conn.info else { - unreachable!("private link not provided.") + let Some(PbInfo::ConnectionParams(connection_params)) = conn.info else { + unreachable!("private link has been deprecated.") }; Self { connection_id: Set(conn.id as _), name: Set(conn.name), - info: Set(PrivateLinkService::from(&private_link_srv)), + info: Set(PrivateLinkService::default()), + params: Set(ConnectionParams::from(&connection_params)), } } } diff --git a/src/meta/model/src/lib.rs b/src/meta/model/src/lib.rs index 0136c02ae5fac..81ecff9d830dc 100644 --- a/src/meta/model/src/lib.rs +++ b/src/meta/model/src/lib.rs @@ -397,7 +397,7 @@ derive_from_blob!( PrivateLinkService, risingwave_pb::catalog::connection::PbPrivateLinkService ); -derive_from_blob!(ConnectionInfo, risingwave_pb::catalog::connection::Info); +derive_from_blob!(ConnectionParams, risingwave_pb::catalog::ConnectionParams); derive_from_blob!(AuthInfo, risingwave_pb::user::PbAuthInfo); derive_from_blob!(ConnectorSplits, risingwave_pb::source::ConnectorSplits); diff --git a/src/meta/src/controller/mod.rs b/src/meta/src/controller/mod.rs index c7cf45daad9e7..3f89c6e42ef63 100644 --- a/src/meta/src/controller/mod.rs +++ b/src/meta/src/controller/mod.rs @@ -19,7 +19,7 @@ use risingwave_common::hash::VnodeCount; use risingwave_common::util::epoch::Epoch; use risingwave_meta_model::{ connection, database, function, index, object, schema, secret, sink, source, subscription, - table, view, + table, view, PrivateLinkService, }; use risingwave_pb::catalog::connection::PbInfo as PbConnectionInfo; use risingwave_pb::catalog::source::PbOptionalAssociatedTableId; @@ -327,15 +327,18 @@ impl From> for PbView { impl From> for PbConnection { fn from(value: ObjectModel) -> Self { + let info: PbConnectionInfo = if value.0.info == PrivateLinkService::default() { + PbConnectionInfo::ConnectionParams(value.0.params.to_protobuf()) + } else { + PbConnectionInfo::PrivateLinkService(value.0.info.to_protobuf()) + }; Self { id: value.1.oid as _, schema_id: value.1.schema_id.unwrap() as _, database_id: value.1.database_id.unwrap() as _, name: value.0.name, owner: value.1.owner_id as _, - info: Some(PbConnectionInfo::PrivateLinkService( - value.0.info.to_protobuf(), - )), + info: Some(info), } } } diff --git a/src/meta/src/rpc/ddl_controller.rs b/src/meta/src/rpc/ddl_controller.rs index d787be50f0ba3..8fb35963a8fce 100644 --- a/src/meta/src/rpc/ddl_controller.rs +++ b/src/meta/src/rpc/ddl_controller.rs @@ -481,6 +481,7 @@ impl DdlController { } async fn create_connection(&self, connection: Connection) -> MetaResult { + // todo: do validation here self.metadata_manager .catalog_controller .create_connection(connection) From e9f8d72e3cfa0dc6e6f961550284e5fd3a72a92c Mon Sep 17 00:00:00 2001 From: tabversion Date: Tue, 5 Nov 2024 23:08:04 +0800 Subject: [PATCH 30/51] handle secret ref --- proto/catalog.proto | 1 + src/frontend/src/handler/create_connection.rs | 18 ++++++++++------ src/frontend/src/handler/show.rs | 2 +- src/meta/src/controller/catalog.rs | 21 +++++++++++++++++++ 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/proto/catalog.proto b/proto/catalog.proto index c2339469a5f3f..1ddf52b72f741 100644 --- a/proto/catalog.proto +++ b/proto/catalog.proto @@ -243,6 +243,7 @@ message ConnectionParams { ConnectionType connection_type = 1; map properties = 2; + map secret_refs = 3; } message Connection { diff --git a/src/frontend/src/handler/create_connection.rs b/src/frontend/src/handler/create_connection.rs index ff3e82edd3c69..f90d73f0a77e3 100644 --- a/src/frontend/src/handler/create_connection.rs +++ b/src/frontend/src/handler/create_connection.rs @@ -27,6 +27,9 @@ use crate::binder::Binder; use crate::error::ErrorCode::ProtocolError; use crate::error::{ErrorCode, Result, RwError}; use crate::handler::HandlerArgs; +use crate::session::SessionImpl; +use crate::utils::resolve_secret_ref_in_with_options; +use crate::WithOptions; pub(crate) const CONNECTION_TYPE_PROP: &str = "type"; @@ -42,9 +45,12 @@ fn get_connection_property_required( }) } fn resolve_create_connection_payload( - with_properties: &mut BTreeMap, + with_properties: WithOptions, + session: &SessionImpl, ) -> Result { - let connection_type = get_connection_property_required(with_properties, CONNECTION_TYPE_PROP)?; + let (mut props, secret_refs) = + resolve_secret_ref_in_with_options(with_properties, session)?.into_parts(); + let connection_type = get_connection_property_required(&mut props, CONNECTION_TYPE_PROP)?; let connection_type = match connection_type.as_str() { PRIVATELINK_CONNECTION => { return Err(RwError::from(ErrorCode::Deprecated( @@ -63,7 +69,8 @@ fn resolve_create_connection_payload( Ok(create_connection_request::Payload::ConnectionParams( ConnectionParams { connection_type: connection_type as i32, - properties: with_properties.clone().into_iter().collect(), + properties: props.into_iter().collect(), + secret_refs: secret_refs.into_iter().collect(), }, )) } @@ -90,9 +97,8 @@ pub async fn handle_create_connection( }; } let (database_id, schema_id) = session.get_database_and_schema_id_for_create(schema_name)?; - let mut with_properties = handler_args.with_options.clone().into_connector_props(); - - let create_connection_payload = resolve_create_connection_payload(&mut with_properties)?; + let with_properties = handler_args.with_options.clone().into_connector_props(); + let create_connection_payload = resolve_create_connection_payload(with_properties, &session)?; let catalog_writer = session.catalog_writer()?; catalog_writer diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index 68ca877a5f303..0e44867598e5e 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -414,7 +414,7 @@ pub async fn handle_show_object( PRIVATELINK_CONNECTION.to_string() }, connection::Info::ConnectionParams(params) => { - format!("{}", params.get_connection_type().unwrap().as_str_name()) + params.get_connection_type().unwrap().as_str_name().to_string() } }; let source_names = schema diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs index bcff662f94f08..89a8e5faee5ef 100644 --- a/src/meta/src/controller/catalog.rs +++ b/src/meta/src/controller/catalog.rs @@ -36,6 +36,7 @@ use risingwave_meta_model::{ SourceId, StreamNode, StreamSourceInfo, StreamingParallelism, SubscriptionId, TableId, UserId, ViewId, }; +use risingwave_pb::catalog::connection::Info as ConnectionInfo; use risingwave_pb::catalog::subscription::SubscriptionState; use risingwave_pb::catalog::table::PbTableType; use risingwave_pb::catalog::{ @@ -1443,6 +1444,16 @@ impl CatalogController { ensure_object_id(ObjectType::Schema, pb_connection.schema_id as _, &txn).await?; check_connection_name_duplicate(&pb_connection, &txn).await?; + let mut dep_secrets = HashSet::new(); + if let Some(ConnectionInfo::ConnectionParams(params)) = &pb_connection.info { + dep_secrets.extend( + params + .secret_refs + .values() + .map(|secret_ref| secret_ref.secret_id), + ); + } + let conn_obj = Self::create_object( &txn, ObjectType::Connection, @@ -1455,6 +1466,16 @@ impl CatalogController { let connection: connection::ActiveModel = pb_connection.clone().into(); Connection::insert(connection).exec(&txn).await?; + for secret_id in dep_secrets { + ObjectDependency::insert(object_dependency::ActiveModel { + oid: Set(secret_id as _), + used_by: Set(conn_obj.oid), + ..Default::default() + }) + .exec(&txn) + .await?; + } + txn.commit().await?; let version = self From 673bccb543e6f02af8689f0a4eff23af03a00cb9 Mon Sep 17 00:00:00 2001 From: tabversion Date: Wed, 6 Nov 2024 15:18:22 +0800 Subject: [PATCH 31/51] stash --- proto/catalog.proto | 1 + proto/ddl_service.proto | 4 +- src/frontend/src/handler/create_connection.rs | 6 + src/frontend/src/handler/create_source.rs | 88 +++++++-- src/frontend/src/handler/create_table.rs | 6 +- src/frontend/src/handler/create_table_as.rs | 6 +- src/frontend/src/handler/create_view.rs | 7 +- src/frontend/src/handler/show.rs | 12 +- src/frontend/src/utils/with_options.rs | 186 +++++++++++++++--- src/sqlparser/src/ast/mod.rs | 4 +- src/sqlparser/src/ast/value.rs | 23 ++- src/sqlparser/src/parser.rs | 6 +- 12 files changed, 287 insertions(+), 62 deletions(-) diff --git a/proto/catalog.proto b/proto/catalog.proto index 9b08528ae123e..6d54903cc2592 100644 --- a/proto/catalog.proto +++ b/proto/catalog.proto @@ -241,6 +241,7 @@ message ConnectionParams { CONNECTION_TYPE_UNSPECIFIED = 0; CONNECTION_TYPE_KAFKA = 1; CONNECTION_TYPE_ICEBERG = 2; + CONNECTION_TYPE_SCHEMA_REGISTRY = 3; } ConnectionType connection_type = 1; diff --git a/proto/ddl_service.proto b/proto/ddl_service.proto index 414db77bcf5ca..0d4b7b806cda6 100644 --- a/proto/ddl_service.proto +++ b/proto/ddl_service.proto @@ -425,9 +425,9 @@ message CreateConnectionRequest { uint32 schema_id = 3; oneof payload { PrivateLink private_link = 4 [deprecated = true]; - catalog.ConnectionParams connection_params = 7; + catalog.ConnectionParams connection_params = 6; } - uint32 owner_id = 6; + uint32 owner_id = 5; } message CreateConnectionResponse { diff --git a/src/frontend/src/handler/create_connection.rs b/src/frontend/src/handler/create_connection.rs index f90d73f0a77e3..cbfe6979a9ef4 100644 --- a/src/frontend/src/handler/create_connection.rs +++ b/src/frontend/src/handler/create_connection.rs @@ -48,6 +48,12 @@ fn resolve_create_connection_payload( with_properties: WithOptions, session: &SessionImpl, ) -> Result { + if !with_properties.connection_ref().is_empty() { + return Err(RwError::from(ErrorCode::InvalidParameterValue( + "Connection reference is not allowed in options in CREATE CONNECTION".to_string(), + ))); + } + let (mut props, secret_refs) = resolve_secret_ref_in_with_options(with_properties, session)?.into_parts(); let connection_type = get_connection_property_required(&mut props, CONNECTION_TYPE_PROP)?; diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index 9c37799422a59..49ef26176e87c 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::rc::Rc; use std::sync::LazyLock; use anyhow::{anyhow, Context}; use either::Either; use itertools::Itertools; -use maplit::{convert_args, hashmap}; +use maplit::{convert_args, hashmap, hashset}; use pgwire::pg_response::{PgResponse, StatementType}; use rand::Rng; use risingwave_common::array::arrow::{arrow_schema_iceberg, IcebergArrowConvert}; @@ -59,6 +59,7 @@ use risingwave_connector::source::{ POSIX_FS_CONNECTOR, PULSAR_CONNECTOR, S3_CONNECTOR, }; use risingwave_connector::WithPropertiesExt; +use risingwave_pb::catalog::connection_params::PbConnectionType; use risingwave_pb::catalog::{PbSchemaRegistryNameStrategy, StreamSourceInfo, WatermarkDesc}; use risingwave_pb::plan_common::additional_column::ColumnType as AdditionalColumnType; use risingwave_pb::plan_common::{EncodeType, FormatType}; @@ -86,7 +87,8 @@ use crate::optimizer::plan_node::generic::SourceNodeKind; use crate::optimizer::plan_node::{LogicalSource, ToStream, ToStreamContext}; use crate::session::SessionImpl; use crate::utils::{ - resolve_privatelink_in_with_option, resolve_secret_ref_in_with_options, OverwriteOptions, + resolve_connection_ref_and_secret_ref, resolve_privatelink_in_with_option, + resolve_secret_ref_in_with_options, OverwriteOptions, }; use crate::{bind_data_type, build_graph, OptimizerContext, WithOptions, WithOptionsSecResolved}; @@ -308,16 +310,36 @@ pub(crate) async fn bind_columns_from_source( const NAME_STRATEGY_KEY: &str = "schema.registry.name.strategy"; let options_with_secret = match with_properties { - Either::Left(options) => resolve_secret_ref_in_with_options(options.clone(), session)?, + Either::Left(options) => { + let (sec_resolve_props, connection_type) = + resolve_connection_ref_and_secret_ref(options.clone(), session)?; + if !ALLOWED_CONNECTION_CONNECTOR.contains(&connection_type) { + return Err(RwError::from(ProtocolError(format!( + "connection type {:?} is not allowed, allowed types: {:?}", + connection_type, ALLOWED_CONNECTION_CONNECTOR + )))); + } + + sec_resolve_props + } Either::Right(options_with_secret) => options_with_secret.clone(), }; let is_kafka: bool = options_with_secret.is_kafka_connector(); - let (format_encode_options, format_encode_secret_refs) = resolve_secret_ref_in_with_options( + + // todo: need to resolve connection ref for schema registry + let (sec_resolve_props, connection_type) = resolve_connection_ref_and_secret_ref( WithOptions::try_from(format_encode.row_options())?, session, - )? - .into_parts(); + )?; + if !ALLOWED_CONNECTION_SCHEMA_REGISTRY.contains(&connection_type) { + return Err(RwError::from(ProtocolError(format!( + "connection type {:?} is not allowed, allowed types: {:?}", + connection_type, ALLOWED_CONNECTION_SCHEMA_REGISTRY + )))); + } + + let (format_encode_options, format_encode_secret_refs) = sec_resolve_props.into_parts(); // Need real secret to access the schema registry let mut format_encode_options_to_consume = LocalSecretManager::global().fill_secrets( format_encode_options.clone(), @@ -542,11 +564,15 @@ fn bind_columns_from_source_for_cdc( session: &SessionImpl, format_encode: &FormatEncodeOptions, ) -> Result<(Option>, StreamSourceInfo)> { - let (format_encode_options, format_encode_secret_refs) = resolve_secret_ref_in_with_options( - WithOptions::try_from(format_encode.row_options())?, - session, - )? - .into_parts(); + let with_options = WithOptions::try_from(format_encode.row_options())?; + if !with_options.connection_ref().is_empty() { + return Err(RwError::from(NotSupported( + "CDC connector does not support connection ref yet".to_string(), + "Explicitly specify the connection in WITH clause".to_string(), + ))); + } + let (format_encode_options, format_encode_secret_refs) = + resolve_secret_ref_in_with_options(with_options, session)?.into_parts(); // Need real secret to access the schema registry let mut format_encode_options_to_consume = LocalSecretManager::global().fill_secrets( @@ -1049,6 +1075,20 @@ pub(super) fn bind_source_watermark( Ok(watermark_descs) } +static ALLOWED_CONNECTION_CONNECTOR: LazyLock> = LazyLock::new(|| { + hashset! { + PbConnectionType::Kafka, + PbConnectionType::Iceberg, + } +}); + +static ALLOWED_CONNECTION_SCHEMA_REGISTRY: LazyLock> = + LazyLock::new(|| { + hashset! { + PbConnectionType::SchemaRegistry, + } + }); + // TODO: Better design if we want to support ENCODE KEY where we will have 4 dimensional array static CONNECTORS_COMPATIBLE_FORMATS: LazyLock>>> = LazyLock::new(|| { @@ -1561,7 +1601,21 @@ pub async fn bind_create_source_or_table_with_connector( let mut with_properties = with_properties; resolve_privatelink_in_with_option(&mut with_properties)?; - let with_properties = resolve_secret_ref_in_with_options(with_properties, session)?; + let connector = with_properties.get_connector().unwrap(); + let (with_properties, connection_type) = + resolve_connection_ref_and_secret_ref(with_properties, session)?; + if !ALLOWED_CONNECTION_CONNECTOR.contains(&connection_type) { + return Err(RwError::from(ProtocolError(format!( + "connection type {:?} is not allowed, allowed types: {:?}", + connection_type, ALLOWED_CONNECTION_CONNECTOR + )))); + } + if !connector.eq(connection_type_to_connector(&connection_type)) { + return Err(RwError::from(ProtocolError(format!( + "connector {} and connection type {:?} are not compatible", + connector, connection_type + )))); + } let pk_names = bind_source_pk( &format_encode, @@ -1772,6 +1826,14 @@ fn row_encode_to_prost(row_encode: &Encode) -> EncodeType { } } +fn connection_type_to_connector(connection_type: &PbConnectionType) -> &str { + match connection_type { + PbConnectionType::Kafka => KAFKA_CONNECTOR, + PbConnectionType::Iceberg => ICEBERG_CONNECTOR, + _ => unreachable!(), + } +} + #[cfg(test)] pub mod tests { use std::collections::HashMap; diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs index 1e5dc489c1a0c..e002fbfb121cd 100644 --- a/src/frontend/src/handler/create_table.rs +++ b/src/frontend/src/handler/create_table.rs @@ -551,9 +551,9 @@ pub(crate) fn gen_create_table_plan( c.column_desc.column_id = col_id_gen.generate(c.name()) } - let (_, secret_refs) = context.with_options().clone().into_parts(); - if !secret_refs.is_empty() { - return Err(crate::error::ErrorCode::InvalidParameterValue("Secret reference is not allowed in options when creating table without external source".to_string()).into()); + let (_, secret_refs, connection_refs) = context.with_options().clone().into_parts(); + if !secret_refs.is_empty() || !connection_refs.is_empty() { + return Err(crate::error::ErrorCode::InvalidParameterValue("Secret reference and Connection reference are not allowed in options when creating table without external source".to_string()).into()); } gen_create_table_plan_without_source( diff --git a/src/frontend/src/handler/create_table_as.rs b/src/frontend/src/handler/create_table_as.rs index 27c527969f9b2..f2ba78a125b93 100644 --- a/src/frontend/src/handler/create_table_as.rs +++ b/src/frontend/src/handler/create_table_as.rs @@ -89,10 +89,10 @@ pub async fn handle_create_as( let (graph, source, table) = { let context = OptimizerContext::from_handler_args(handler_args.clone()); - let (_, secret_refs) = context.with_options().clone().into_parts(); - if !secret_refs.is_empty() { + let (_, secret_refs, connection_refs) = context.with_options().clone().into_parts(); + if !secret_refs.is_empty() || !connection_refs.is_empty() { return Err(crate::error::ErrorCode::InvalidParameterValue( - "Secret reference is not allowed in options for CREATE TABLE AS".to_string(), + "Secret reference and Connection reference are not allowed in options for CREATE TABLE AS".to_string(), ) .into()); } diff --git a/src/frontend/src/handler/create_view.rs b/src/frontend/src/handler/create_view.rs index 5ad0e8956b967..851c3a4fa89df 100644 --- a/src/frontend/src/handler/create_view.rs +++ b/src/frontend/src/handler/create_view.rs @@ -87,10 +87,11 @@ pub async fn handle_create_view( .collect() }; - let (properties, secret_refs) = properties.into_parts(); - if !secret_refs.is_empty() { + let (properties, secret_refs, connection_refs) = properties.into_parts(); + if !secret_refs.is_empty() || !connection_refs.is_empty() { return Err(crate::error::ErrorCode::InvalidParameterValue( - "Secret reference is not allowed in create view options".to_string(), + "Secret reference and Connection reference are not allowed in create view options" + .to_string(), ) .into()); } diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index 0e44867598e5e..ac64f133d21d2 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -27,13 +27,14 @@ use risingwave_common::util::addr::HostAddr; use risingwave_connector::source::kafka::PRIVATELINK_CONNECTION; use risingwave_expr::scalar::like::{i_like_default, like_default}; use risingwave_pb::catalog::connection; +use risingwave_pb::secret::SecretRef; use risingwave_sqlparser::ast::{ display_comma_separated, Ident, ObjectName, ShowCreateType, ShowObject, ShowStatementFilter, }; use super::{fields_to_descriptors, RwPgResponse, RwPgResponseBuilderExt}; use crate::binder::{Binder, Relation}; -use crate::catalog::{CatalogError, IndexCatalog}; +use crate::catalog::{CatalogError, IndexCatalog, SecretId}; use crate::error::Result; use crate::handler::HandlerArgs; use crate::session::cursor_manager::SubscriptionCursor; @@ -444,7 +445,14 @@ pub async fn handle_show_object( connection::Info::ConnectionParams(params) => { // todo: check secrets are not exposed // todo: show dep relations - serde_json::to_string(¶ms.get_properties()).unwrap() + let print_secret_ref = |secret_ref: &SecretRef| -> String { + let secret_name = schema.get_secret_by_id(&SecretId::from(secret_ref.secret_id)).map(|s| s.name.as_str()).unwrap(); + format!("SECRET {} AS {}", secret_name, secret_ref.get_ref_as().unwrap().as_str_name()) + }; + let deref_secrets = params.get_secret_refs().iter().map(|(k, v)| (k.clone(), print_secret_ref(v))); + let mut props = params.get_properties().clone(); + props.extend(deref_secrets); + serde_json::to_string(&props).unwrap() } }; ShowConnectionRow { diff --git a/src/frontend/src/utils/with_options.rs b/src/frontend/src/utils/with_options.rs index 9d61021dab4fe..870680482b2d6 100644 --- a/src/frontend/src/utils/with_options.rs +++ b/src/frontend/src/utils/with_options.rs @@ -15,20 +15,22 @@ use std::collections::BTreeMap; use std::num::NonZeroU32; +use risingwave_common::catalog::ConnectionId; use risingwave_connector::source::kafka::private_link::{ insert_privatelink_broker_rewrite_map, PRIVATELINK_ENDPOINT_KEY, }; pub use risingwave_connector::WithOptionsSecResolved; use risingwave_connector::WithPropertiesExt; +use risingwave_pb::catalog::connection::Info as ConnectionInfo; +use risingwave_pb::catalog::connection_params::PbConnectionType; use risingwave_pb::secret::secret_ref::PbRefAsType; use risingwave_pb::secret::PbSecretRef; use risingwave_sqlparser::ast::{ - CreateConnectionStatement, CreateSinkStatement, CreateSourceStatement, - CreateSubscriptionStatement, SecretRef, SecretRefAsType, SqlOption, Statement, Value, + ConnectionRefValue, CreateConnectionStatement, CreateSinkStatement, CreateSourceStatement, + CreateSubscriptionStatement, SecretRefAsType, SecretRefValue, SqlOption, Statement, Value, }; use super::OverwriteOptions; -use crate::catalog::ConnectionId; use crate::error::{ErrorCode, Result as RwResult, RwError}; use crate::session::SessionImpl; use crate::Binder; @@ -38,11 +40,14 @@ mod options { pub const RETENTION_SECONDS: &str = "retention_seconds"; } +const CONNECTION_REF_KEY: &str = "profile"; + /// Options or properties extracted from the `WITH` clause of DDLs. #[derive(Default, Clone, Debug, PartialEq, Eq, Hash)] pub struct WithOptions { inner: BTreeMap, - secret_ref: BTreeMap, + secret_ref: BTreeMap, + connection_ref: BTreeMap, } impl std::ops::Deref for WithOptions { @@ -65,12 +70,21 @@ impl WithOptions { Self { inner, secret_ref: Default::default(), + connection_ref: Default::default(), } } /// Create a new [`WithOptions`] from a option [`BTreeMap`] and secret ref. - pub fn new(inner: BTreeMap, secret_ref: BTreeMap) -> Self { - Self { inner, secret_ref } + pub fn new( + inner: BTreeMap, + secret_ref: BTreeMap, + connection_ref: BTreeMap, + ) -> Self { + Self { + inner, + secret_ref, + connection_ref, + } } pub fn inner_mut(&mut self) -> &mut BTreeMap { @@ -78,8 +92,14 @@ impl WithOptions { } /// Take the value of the option map and secret refs. - pub fn into_parts(self) -> (BTreeMap, BTreeMap) { - (self.inner, self.secret_ref) + pub fn into_parts( + self, + ) -> ( + BTreeMap, + BTreeMap, + BTreeMap, + ) { + (self.inner, self.secret_ref, self.connection_ref) } /// Convert to connector props, remove the key-value pairs used in the top-level. @@ -95,6 +115,7 @@ impl WithOptions { Self { inner, secret_ref: self.secret_ref, + connection_ref: self.connection_ref, } } @@ -119,6 +140,7 @@ impl WithOptions { Self { inner, secret_ref: self.secret_ref.clone(), + connection_ref: self.connection_ref.clone(), } } @@ -131,23 +153,26 @@ impl WithOptions { false } - pub fn secret_ref(&self) -> &BTreeMap { + pub fn secret_ref(&self) -> &BTreeMap { &self.secret_ref } - pub fn encode_options_to_map(sql_options: &[SqlOption]) -> RwResult> { - let WithOptions { inner, secret_ref } = WithOptions::try_from(sql_options)?; - if secret_ref.is_empty() { - Ok(inner) - } else { - Err(RwError::from(ErrorCode::InvalidParameterValue( - "Secret reference is not allowed in encode options".to_string(), - ))) - } + pub fn secret_ref_mut(&mut self) -> &mut BTreeMap { + &mut self.secret_ref + } + + pub fn connection_ref(&self) -> &BTreeMap { + &self.connection_ref + } + + pub fn connection_ref_mut(&mut self) -> &mut BTreeMap { + &mut self.connection_ref } pub fn oauth_options_to_map(sql_options: &[SqlOption]) -> RwResult> { - let WithOptions { inner, secret_ref } = WithOptions::try_from(sql_options)?; + let WithOptions { + inner, secret_ref, .. + } = WithOptions::try_from(sql_options)?; if secret_ref.is_empty() { Ok(inner) } else { @@ -158,12 +183,88 @@ impl WithOptions { } } +pub(crate) fn resolve_connection_ref_and_secret_ref( + with_options: WithOptions, + session: &SessionImpl, +) -> RwResult<(WithOptionsSecResolved, PbConnectionType)> { + let db_name: &str = session.database(); + let (mut options, secret_refs, connection_refs) = with_options.clone().into_parts(); + + let mut connection_params = None; + for connection_ref in connection_refs.values() { + // at most one connection ref in the map + connection_params = { + // get connection params from catalog + let (schema_name, connection_name) = Binder::resolve_schema_qualified_name( + db_name, + connection_ref.connection_name.clone(), + )?; + let connection_catalog = + session.get_connection_by_name(schema_name, &connection_name)?; + if let ConnectionInfo::ConnectionParams(params) = &connection_catalog.info { + Some(params.clone()) + } else { + return Err(RwError::from(ErrorCode::InvalidParameterValue( + "Private Link Service has been deprecated. Please create a new connection instead." + .to_string(), + ))); + } + }; + } + + let mut inner_secret_refs = { + let mut resolved_secret_refs = BTreeMap::new(); + for (key, secret_ref) in secret_refs { + let (schema_name, secret_name) = + Binder::resolve_schema_qualified_name(db_name, secret_ref.secret_name.clone())?; + let secret_catalog = session.get_secret_by_name(schema_name, &secret_name)?; + let ref_as = match secret_ref.ref_as { + SecretRefAsType::Text => PbRefAsType::Text, + SecretRefAsType::File => PbRefAsType::File, + }; + let pb_secret_ref = PbSecretRef { + secret_id: secret_catalog.id.secret_id(), + ref_as: ref_as.into(), + }; + resolved_secret_refs.insert(key.clone(), pb_secret_ref); + } + resolved_secret_refs + }; + + let mut connection_type = PbConnectionType::Unspecified; + if let Some(connection_params) = connection_params { + connection_type = connection_params.connection_type(); + for (k, v) in connection_params.properties { + if options.insert(k.clone(), v).is_some() { + return Err(RwError::from(ErrorCode::InvalidParameterValue(format!( + "Duplicated key: {}", + k + )))); + } + } + + for (k, v) in connection_params.secret_refs { + if inner_secret_refs.insert(k.clone(), v).is_some() { + return Err(RwError::from(ErrorCode::InvalidParameterValue(format!( + "Duplicated key: {}", + k + )))); + } + } + } + + Ok(( + WithOptionsSecResolved::new(options, inner_secret_refs), + connection_type, + )) +} + /// Get the secret id from the name. pub(crate) fn resolve_secret_ref_in_with_options( with_options: WithOptions, session: &SessionImpl, ) -> RwResult { - let (options, secret_refs) = with_options.into_parts(); + let (options, secret_refs, _) = with_options.into_parts(); let mut resolved_secret_refs = BTreeMap::new(); let db_name: &str = session.database(); for (key, secret_ref) in secret_refs { @@ -207,17 +308,40 @@ impl TryFrom<&[SqlOption]> for WithOptions { fn try_from(options: &[SqlOption]) -> Result { let mut inner: BTreeMap = BTreeMap::new(); - let mut secret_ref: BTreeMap = BTreeMap::new(); + let mut secret_ref: BTreeMap = BTreeMap::new(); + let mut connection_ref: BTreeMap = BTreeMap::new(); for option in options { let key = option.name.real_value(); - if let Value::Ref(r) = &option.value { - if secret_ref.insert(key.clone(), r.clone()).is_some() || inner.contains_key(&key) { - return Err(RwError::from(ErrorCode::InvalidParameterValue(format!( - "Duplicated option: {}", - key - )))); + match &option.value { + Value::SecretRef(r) => { + if secret_ref.insert(key.clone(), r.clone()).is_some() + || inner.contains_key(&key) + { + return Err(RwError::from(ErrorCode::InvalidParameterValue(format!( + "Duplicated option: {}", + key + )))); + } + continue; } - continue; + Value::ConnectionRef(r) => { + if key != CONNECTION_REF_KEY { + return Err(RwError::from(ErrorCode::InvalidParameterValue(format!( + "expect 'profile' as the key for connection ref, but got: {}", + key + )))); + } + if connection_ref.insert(key.clone(), r.clone()).is_some() + || inner.contains_key(&key) + { + return Err(RwError::from(ErrorCode::InvalidParameterValue(format!( + "Duplicated option: {}", + key + )))); + } + continue; + } + _ => {} } let value: String = match option.value.clone() { Value::CstyleEscapedString(s) => s.value, @@ -239,7 +363,11 @@ impl TryFrom<&[SqlOption]> for WithOptions { } } - Ok(Self { inner, secret_ref }) + Ok(Self { + inner, + secret_ref, + connection_ref, + }) } } diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index 563dc66be4780..c737913d43ad9 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -52,8 +52,8 @@ pub use self::query::{ }; pub use self::statement::*; pub use self::value::{ - CstyleEscapedString, DateTimeField, DollarQuotedString, JsonPredicateType, SecretRef, - SecretRefAsType, TrimWhereField, Value, + ConnectionRefValue, CstyleEscapedString, DateTimeField, DollarQuotedString, JsonPredicateType, + SecretRefAsType, SecretRefValue, TrimWhereField, Value, }; pub use crate::ast::ddl::{ AlterIndexOperation, AlterSinkOperation, AlterSourceOperation, AlterSubscriptionOperation, diff --git a/src/sqlparser/src/ast/value.rs b/src/sqlparser/src/ast/value.rs index 2bf8a6fdf3a02..051e0814a3d9f 100644 --- a/src/sqlparser/src/ast/value.rs +++ b/src/sqlparser/src/ast/value.rs @@ -60,7 +60,9 @@ pub enum Value { /// `NULL` value Null, /// name of the reference to secret - Ref(SecretRef), + SecretRef(SecretRefValue), + /// name of the reference to connection + ConnectionRef(ConnectionRefValue), } impl fmt::Display for Value { @@ -115,7 +117,8 @@ impl fmt::Display for Value { Ok(()) } Value::Null => write!(f, "NULL"), - Value::Ref(v) => write!(f, "secret {}", v), + Value::SecretRef(v) => write!(f, "secret {}", v), + Value::ConnectionRef(v) => write!(f, "connection {}", v), } } } @@ -240,12 +243,12 @@ impl fmt::Display for JsonPredicateType { } #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct SecretRef { +pub struct SecretRefValue { pub secret_name: ObjectName, pub ref_as: SecretRefAsType, } -impl fmt::Display for SecretRef { +impl fmt::Display for SecretRefValue { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.ref_as { SecretRefAsType::Text => write!(f, "{}", self.secret_name), @@ -260,3 +263,15 @@ pub enum SecretRefAsType { Text, File, } + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct ConnectionRefValue { + pub connection_name: ObjectName, +} + +impl fmt::Display for ConnectionRefValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.connection_name) + } +} diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index d5582f31a64de..d16c6d12116a8 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -3641,11 +3641,15 @@ impl Parser<'_> { } else { SecretRefAsType::Text }; - Ok(Value::Ref(SecretRef { + Ok(Value::SecretRef(SecretRefValue { secret_name, ref_as, })) } + Keyword::CONNECTION => { + let connection_name = self.parse_object_name()?; + Ok(Value::ConnectionRef(ConnectionRefValue { connection_name })) + } _ => self.expected_at(checkpoint, "a concrete value"), }, Token::Number(ref n) => Ok(Value::Number(n.clone())), From 635975d2126c2b9391f387f1899f7f8d99121623 Mon Sep 17 00:00:00 2001 From: tabversion Date: Wed, 6 Nov 2024 17:56:37 +0800 Subject: [PATCH 32/51] fix --- src/frontend/src/handler/create_source.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index 49ef26176e87c..530ff1f2689a5 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -1085,6 +1085,7 @@ static ALLOWED_CONNECTION_CONNECTOR: LazyLock> = LazyL static ALLOWED_CONNECTION_SCHEMA_REGISTRY: LazyLock> = LazyLock::new(|| { hashset! { + PbConnectionType::Unspecified, PbConnectionType::SchemaRegistry, } }); From 01363ec40326926f19837cdd8f4bcca9b413d860 Mon Sep 17 00:00:00 2001 From: tabversion Date: Fri, 15 Nov 2024 15:48:41 +0800 Subject: [PATCH 33/51] macro expansion --- src/connector/src/connector_common/common.rs | 27 ++++++ .../src/connector_common/iceberg/mod.rs | 4 + src/connector/src/connector_common/mod.rs | 35 ++++---- src/connector/src/lib.rs | 1 + src/connector/src/macros.rs | 82 +++++++++++++++++++ src/connector/src/source/base.rs | 6 +- 6 files changed, 139 insertions(+), 16 deletions(-) diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index 3eaffa93d02a4..5b2c0200c4724 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -241,6 +241,33 @@ pub struct KafkaConnection { sasl_oathbearer_config: Option, } +impl KafkaConnection { + #[cfg(test)] + pub fn test_default() -> Self { + Self { + brokers: "localhost:9092".to_string(), + security_protocol: None, + ssl_ca_location: None, + ssl_certificate_location: None, + ssl_key_location: None, + ssl_ca_pem: None, + ssl_certificate_pem: None, + ssl_key_pem: None, + ssl_key_password: None, + ssl_endpoint_identification_algorithm: None, + sasl_mechanism: None, + sasl_username: None, + sasl_password: None, + sasl_kerberos_service_name: None, + sasl_kerberos_keytab: None, + sasl_kerberos_principal: None, + sasl_kerberos_kinit_cmd: None, + sasl_kerberos_min_time_before_relogin: None, + sasl_oathbearer_config: None, + } + } +} + #[serde_as] #[derive(Debug, Clone, Deserialize, WithOptions)] pub struct KafkaCommon { diff --git a/src/connector/src/connector_common/iceberg/mod.rs b/src/connector/src/connector_common/iceberg/mod.rs index bf96e474eee80..87a35370483cf 100644 --- a/src/connector/src/connector_common/iceberg/mod.rs +++ b/src/connector/src/connector_common/iceberg/mod.rs @@ -32,6 +32,10 @@ use with_options::WithOptions; use crate::deserialize_optional_bool_from_string; use crate::error::ConnectorResult; +#[serde_as] +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, WithOptions)] +pub struct IcebergConnection {} + #[serde_as] #[derive(Debug, Clone, PartialEq, Eq, Deserialize, WithOptions)] pub struct IcebergCommon { diff --git a/src/connector/src/connector_common/mod.rs b/src/connector/src/connector_common/mod.rs index 8ddc1d1c55151..8d5e5c6946674 100644 --- a/src/connector/src/connector_common/mod.rs +++ b/src/connector/src/connector_common/mod.rs @@ -25,17 +25,24 @@ pub use common::{ }; mod iceberg; -pub use iceberg::IcebergCommon; - -// #[derive(Debug, Clone, Deserialize)] -// pub enum ConnectionImpl { -// Kafka(KafkaConnection), -// } - -// macro_rules! impl_connection_enum { -// ($impl:expr, $inner_name:ident, $prop_type_name:ident, $body:expr) => { -// impl ConnectionImpl { -// pub fn get_ -// } -// }; -// } +pub use iceberg::{IcebergCommon, IcebergConnection}; + +#[cfg(test)] +mod tests { + + use super::*; + use crate::error::ConnectorResult; + use crate::{dispatch_connection_impl, ConnectionImpl}; + + #[test] + fn test_dispatch_connection() -> ConnectorResult<()> { + let kafka_conn = KafkaConnection::test_default(); + let conn_impl = ConnectionImpl::from(kafka_conn); + + let x = dispatch_connection_impl!(conn_impl, inner, { + println!("{:?}", inner); + Ok(()) + }); + Ok(()) + } +} diff --git a/src/connector/src/lib.rs b/src/connector/src/lib.rs index f66b5116c110b..c6e81865c9c83 100644 --- a/src/connector/src/lib.rs +++ b/src/connector/src/lib.rs @@ -49,6 +49,7 @@ pub mod parser; pub mod schema; pub mod sink; pub mod source; +pub use source::ConnectionImpl; pub mod connector_common; diff --git a/src/connector/src/macros.rs b/src/connector/src/macros.rs index 1dc304c651a85..86b3f5f333803 100644 --- a/src/connector/src/macros.rs +++ b/src/connector/src/macros.rs @@ -50,6 +50,19 @@ macro_rules! for_all_classified_sources { }; } +#[macro_export] +macro_rules! for_all_connections { + ($macro:path $(, $extra_args:tt)*) => { + $macro! { + { + { Kafka, $crate::connector_common::KafkaConnection, risingwave_pb::catalog::connection_params::PbConnectionType::Kafka }, + { Iceberg, $crate::connector_common::IcebergConnection, risingwave_pb::catalog::connection_params::PbConnectionType::Iceberg } + } + $(,$extra_args)* + } + }; +} + #[macro_export] macro_rules! for_all_sources_inner { ( @@ -161,6 +174,75 @@ macro_rules! dispatch_split_impl { }}; } +#[macro_export] +macro_rules! dispatch_connection_impl { + ($impl:expr, $inner_name:ident, $body:expr) => { + $crate::dispatch_connection_enum! { $impl, $inner_name, $body } + }; +} + +#[macro_export] +macro_rules! dispatch_connection_enum { + ($impl:expr, $inner_name:ident, $body:expr) => { + $crate::for_all_connections! { + $crate::dispatch_connection_impl_inner, + $impl, + $inner_name, + $body + } + }; +} + +#[macro_export] +macro_rules! dispatch_connection_impl_inner { + ( + { $({$conn_variant_name:ident, $connection:ty, $pb_variant_type:ty }),* }, + $impl:expr, + $inner_name:ident, + $body:expr + ) => {{ + match $impl { + $( + ConnectionImpl::$conn_variant_name($inner_name) => { + $body + } + ),* + } + }}; +} + +#[macro_export] +macro_rules! impl_connection { + ({$({ $variant_name:ident, $connection:ty, $pb_connection_type:ty }),*}) => { + #[derive(Debug, Clone, EnumAsInner, PartialEq)] + pub enum ConnectionImpl { + $( + $variant_name(Box<$connection>), + )* + } + + $( + impl TryFrom for $connection { + type Error = $crate::error::ConnectorError; + + fn try_from(connection: ConnectionImpl) -> std::result::Result { + match connection { + ConnectionImpl::$variant_name(inner) => Ok(Box::into_inner(inner)), + other => risingwave_common::bail!("expect {} but get {:?}", stringify!($connection), other), + } + } + } + + impl From<$connection> for ConnectionImpl { + fn from(connection: $connection) -> ConnectionImpl { + ConnectionImpl::$variant_name(Box::new(connection)) + } + } + + )* + } +} + #[macro_export] macro_rules! impl_split { ({$({ $variant_name:ident, $prop_name:ty, $split:ty}),*}) => { diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs index 59e3585431a60..1efa03d41ff7e 100644 --- a/src/connector/src/source/base.rs +++ b/src/connector/src/source/base.rs @@ -50,8 +50,9 @@ use crate::source::monitor::EnumeratorMetrics; use crate::source::SplitImpl::{CitusCdc, MongodbCdc, MysqlCdc, PostgresCdc, SqlServerCdc}; use crate::with_options::WithOptions; use crate::{ - dispatch_source_prop, dispatch_split_impl, for_all_sources, impl_connector_properties, - impl_split, match_source_name_str, WithOptionsSecResolved, + dispatch_source_prop, dispatch_split_impl, for_all_connections, for_all_sources, + impl_connection, impl_connector_properties, impl_split, match_source_name_str, + WithOptionsSecResolved, }; const SPLIT_TYPE_FIELD: &str = "split_type"; @@ -472,6 +473,7 @@ impl ConnectorProperties { } for_all_sources!(impl_split); +for_all_connections!(impl_connection); impl From<&SplitImpl> for ConnectorSplit { fn from(split: &SplitImpl) -> Self { From 94f730e08fa5a11f4d5f39e881a56bd7ced7e253 Mon Sep 17 00:00:00 2001 From: William Wen Date: Fri, 15 Nov 2024 16:07:40 +0800 Subject: [PATCH 34/51] fix compile --- src/connector/src/connector_common/mod.rs | 2 +- src/connector/src/macros.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/connector/src/connector_common/mod.rs b/src/connector/src/connector_common/mod.rs index 8d5e5c6946674..5822658c1f104 100644 --- a/src/connector/src/connector_common/mod.rs +++ b/src/connector/src/connector_common/mod.rs @@ -39,7 +39,7 @@ mod tests { let kafka_conn = KafkaConnection::test_default(); let conn_impl = ConnectionImpl::from(kafka_conn); - let x = dispatch_connection_impl!(conn_impl, inner, { + let x: Result<(), ()> = dispatch_connection_impl!(conn_impl, inner, { println!("{:?}", inner); Ok(()) }); diff --git a/src/connector/src/macros.rs b/src/connector/src/macros.rs index 86b3f5f333803..a54c06819edb3 100644 --- a/src/connector/src/macros.rs +++ b/src/connector/src/macros.rs @@ -183,14 +183,14 @@ macro_rules! dispatch_connection_impl { #[macro_export] macro_rules! dispatch_connection_enum { - ($impl:expr, $inner_name:ident, $body:expr) => { + ($impl:expr, $inner_name:ident, $body:expr) => {{ $crate::for_all_connections! { $crate::dispatch_connection_impl_inner, $impl, $inner_name, $body } - }; + }}; } #[macro_export] From 5baccf3a2ed2cf69353d0acae92ff72c534eb2f0 Mon Sep 17 00:00:00 2001 From: tabversion Date: Fri, 15 Nov 2024 16:27:17 +0800 Subject: [PATCH 35/51] dep graph on connection --- src/meta/src/controller/catalog.rs | 10 +++++---- src/meta/src/controller/streaming_job.rs | 9 ++++++-- src/meta/src/manager/mod.rs | 26 ++++++++++++++++++++++++ src/meta/src/manager/streaming_job.rs | 20 +++++++++++++++++- 4 files changed, 58 insertions(+), 7 deletions(-) diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs index 4e093c20e92d5..af89db7bfc466 100644 --- a/src/meta/src/controller/catalog.rs +++ b/src/meta/src/controller/catalog.rs @@ -79,8 +79,8 @@ use crate::controller::utils::{ }; use crate::controller::ObjectModel; use crate::manager::{ - get_referred_secret_ids_from_source, MetaSrvEnv, NotificationVersion, - IGNORED_NOTIFICATION_VERSION, + get_referred_connection_ids_from_source, get_referred_secret_ids_from_source, MetaSrvEnv, + NotificationVersion, IGNORED_NOTIFICATION_VERSION, }; use crate::rpc::ddl_controller::DropMode; use crate::telemetry::MetaTelemetryJobDesc; @@ -1238,6 +1238,7 @@ impl CatalogController { // handle secret ref let secret_ids = get_referred_secret_ids_from_source(&pb_source)?; + let connection_ids = get_referred_connection_ids_from_source(&pb_source); let source_obj = Self::create_object( &txn, @@ -1253,8 +1254,9 @@ impl CatalogController { Source::insert(source).exec(&txn).await?; // add secret dependency - if !secret_ids.is_empty() { - ObjectDependency::insert_many(secret_ids.iter().map(|id| { + let dep_relation_ids = secret_ids.iter().chain(connection_ids.iter()); + if !secret_ids.is_empty() || !connection_ids.is_empty() { + ObjectDependency::insert_many(dep_relation_ids.map(|id| { object_dependency::ActiveModel { oid: Set(*id as _), used_by: Set(source_id as _), diff --git a/src/meta/src/controller/streaming_job.rs b/src/meta/src/controller/streaming_job.rs index 8496904d44c0c..31fe6b42063ed 100644 --- a/src/meta/src/controller/streaming_job.rs +++ b/src/meta/src/controller/streaming_job.rs @@ -309,12 +309,17 @@ impl CatalogController { // get dependent secrets. let dependent_secret_ids = streaming_job.dependent_secret_ids()?; + let dependent_connection_ids = streaming_job.dependent_connection_ids()?; let dependent_objs = dependent_relations .iter() - .chain(dependent_secret_ids.iter()); + .chain(dependent_secret_ids.iter()) + .chain(dependent_connection_ids.iter()); // record object dependency. - if !dependent_secret_ids.is_empty() || !dependent_relations.is_empty() { + if !dependent_secret_ids.is_empty() + || !dependent_relations.is_empty() + || !dependent_connection_ids.is_empty() + { ObjectDependency::insert_many(dependent_objs.map(|id| { object_dependency::ActiveModel { oid: Set(*id as _), diff --git a/src/meta/src/manager/mod.rs b/src/meta/src/manager/mod.rs index b49ce350c5501..80ecda8cabaf8 100644 --- a/src/meta/src/manager/mod.rs +++ b/src/meta/src/manager/mod.rs @@ -71,6 +71,32 @@ pub fn get_referred_secret_ids_from_source(source: &PbSource) -> MetaResult HashSet { + let mut connection_ids = HashSet::new(); + if let Some(conn_id) = source.connection_id { + connection_ids.insert(conn_id); + } + if let Some(info) = &source.info + && let Some(conn_id) = info.connection_id + { + connection_ids.insert(conn_id); + } + connection_ids +} + +pub fn get_referred_connection_ids_from_sink(sink: &PbSink) -> HashSet { + let mut connection_ids = HashSet::new(); + if let Some(format_desc) = &sink.format_desc + && let Some(conn_id) = format_desc.connection_id + { + connection_ids.insert(conn_id); + } + if let Some(conn_id) = sink.connection_id { + connection_ids.insert(conn_id); + } + connection_ids +} + pub fn get_referred_secret_ids_from_sink(sink: &PbSink) -> HashSet { let mut secret_ids = HashSet::new(); for secret_ref in sink.get_secret_refs().values() { diff --git a/src/meta/src/manager/streaming_job.rs b/src/meta/src/manager/streaming_job.rs index 9d5b135095fb1..79216f062a137 100644 --- a/src/meta/src/manager/streaming_job.rs +++ b/src/meta/src/manager/streaming_job.rs @@ -21,7 +21,10 @@ use risingwave_pb::catalog::{CreateType, Index, PbSource, Sink, Table}; use risingwave_pb::ddl_service::TableJobType; use strum::{EnumDiscriminants, EnumIs}; -use super::{get_referred_secret_ids_from_sink, get_referred_secret_ids_from_source}; +use super::{ + get_referred_connection_ids_from_sink, get_referred_connection_ids_from_source, + get_referred_secret_ids_from_sink, get_referred_secret_ids_from_source, +}; use crate::model::FragmentId; use crate::MetaResult; @@ -313,6 +316,21 @@ impl StreamingJob { } } + pub fn dependent_connection_ids(&self) -> MetaResult> { + match self { + StreamingJob::Source(source) => Ok(get_referred_connection_ids_from_source(source)), + StreamingJob::Table(source, _, _) => { + if let Some(source) = source { + Ok(get_referred_connection_ids_from_source(source)) + } else { + Ok(HashSet::new()) + } + } + StreamingJob::Sink(sink, _) => Ok(get_referred_connection_ids_from_sink(sink)), + StreamingJob::MaterializedView(_) | StreamingJob::Index(_, _) => Ok(HashSet::new()), + } + } + // Get the secret ids that are referenced by this job. pub fn dependent_secret_ids(&self) -> MetaResult> { match self { From 1da1d1a7677727a21fc3a8f54fceaab8ef90b806 Mon Sep 17 00:00:00 2001 From: tabversion Date: Fri, 15 Nov 2024 16:32:04 +0800 Subject: [PATCH 36/51] merge fix --- .../src/catalog/system_catalog/rw_catalog/rw_connections.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs index e8f7419791260..11e780badec49 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs @@ -47,7 +47,7 @@ fn read_rw_connections(reader: &SysCatalogReaderImpl) -> Result Date: Fri, 15 Nov 2024 18:13:50 +0800 Subject: [PATCH 37/51] connection dep for sink --- src/bench/sink_bench/main.rs | 1 + src/connector/src/sink/catalog/mod.rs | 6 ++- src/frontend/src/handler/create_sink.rs | 54 ++++++++++++++++++----- src/frontend/src/handler/create_source.rs | 53 ++++++++-------------- src/frontend/src/handler/util.rs | 41 ++++++++++++++++- src/frontend/src/utils/with_options.rs | 9 +++- 6 files changed, 116 insertions(+), 48 deletions(-) diff --git a/src/bench/sink_bench/main.rs b/src/bench/sink_bench/main.rs index 850c41c31460f..14f106a61b9e8 100644 --- a/src/bench/sink_bench/main.rs +++ b/src/bench/sink_bench/main.rs @@ -487,6 +487,7 @@ fn mock_from_legacy_type( options: Default::default(), secret_refs: Default::default(), key_encode: None, + connection_id: None, })) } else { SinkFormatDesc::from_legacy_type(connector, r#type) diff --git a/src/connector/src/sink/catalog/mod.rs b/src/connector/src/sink/catalog/mod.rs index 0be567e50d874..4a789101ec740 100644 --- a/src/connector/src/sink/catalog/mod.rs +++ b/src/connector/src/sink/catalog/mod.rs @@ -124,6 +124,7 @@ pub struct SinkFormatDesc { pub options: BTreeMap, pub secret_refs: BTreeMap, pub key_encode: Option, + pub connection_id: Option, } /// TODO: consolidate with [`crate::source::SourceFormat`] and [`crate::parser::ProtocolProperties`]. @@ -188,6 +189,7 @@ impl SinkFormatDesc { options: Default::default(), secret_refs: Default::default(), key_encode: None, + connection_id: None, })) } @@ -223,7 +225,7 @@ impl SinkFormatDesc { options, key_encode, secret_refs: self.secret_refs.clone(), - connection_id: None, + connection_id: self.connection_id, } } @@ -236,6 +238,7 @@ impl SinkFormatDesc { options: Default::default(), secret_refs: Default::default(), key_encode: None, + connection_id: None, } } } @@ -300,6 +303,7 @@ impl TryFrom for SinkFormatDesc { options: value.options, key_encode, secret_refs: value.secret_refs, + connection_id: value.connection_id, }) } } diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index c33aeccb81e1d..e86cb5bf26f2d 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -18,11 +18,13 @@ use std::sync::{Arc, LazyLock}; use anyhow::Context; use either::Either; use itertools::Itertools; -use maplit::{convert_args, hashmap}; +use maplit::{convert_args, hashmap, hashset}; use pgwire::pg_response::{PgResponse, StatementType}; use risingwave_common::array::arrow::arrow_schema_iceberg::DataType as ArrowDataType; use risingwave_common::array::arrow::IcebergArrowConvert; -use risingwave_common::catalog::{ColumnCatalog, DatabaseId, Schema, SchemaId, TableId, UserId}; +use risingwave_common::catalog::{ + ColumnCatalog, ConnectionId, DatabaseId, Schema, SchemaId, TableId, UserId, +}; use risingwave_common::secret::LocalSecretManager; use risingwave_common::types::DataType; use risingwave_common::{bail, catalog}; @@ -32,6 +34,8 @@ use risingwave_connector::sink::kafka::KAFKA_SINK; use risingwave_connector::sink::{ CONNECTOR_TYPE_KEY, SINK_TYPE_OPTION, SINK_USER_FORCE_APPEND_ONLY_OPTION, SINK_WITHOUT_BACKFILL, }; +use risingwave_connector::WithPropertiesExt; +use risingwave_pb::catalog::connection_params::PbConnectionType; use risingwave_pb::catalog::{PbSink, PbSource, Table}; use risingwave_pb::ddl_service::{ReplaceTablePlan, TableJobType}; use risingwave_pb::stream_plan::stream_node::{NodeBody, PbNodeBody}; @@ -57,7 +61,9 @@ use crate::handler::alter_table_column::fetch_table_catalog_for_alter; use crate::handler::create_mv::parse_column_names; use crate::handler::create_table::{generate_stream_graph_for_replace_table, ColumnIdGenerator}; use crate::handler::privilege::resolve_query_privileges; -use crate::handler::util::SourceSchemaCompatExt; +use crate::handler::util::{ + check_connector_match_connection_type, ensure_connection_type_allowed, SourceSchemaCompatExt, +}; use crate::handler::HandlerArgs; use crate::optimizer::plan_node::{ generic, IcebergPartitionInfo, LogicalSource, PartitionComputeInfo, StreamProject, @@ -66,9 +72,24 @@ use crate::optimizer::{OptimizerContext, PlanRef, RelationCollectorVisitor}; use crate::scheduler::streaming_manager::CreatingStreamingJobInfo; use crate::session::SessionImpl; use crate::stream_fragmenter::build_graph; -use crate::utils::{resolve_privatelink_in_with_option, resolve_secret_ref_in_with_options}; +use crate::utils::{resolve_connection_ref_and_secret_ref, resolve_privatelink_in_with_option}; use crate::{Explain, Planner, TableCatalog, WithOptions, WithOptionsSecResolved}; +static ALLOWED_CONNECTION_CONNECTOR: LazyLock> = LazyLock::new(|| { + hashset! { + PbConnectionType::Kafka, + PbConnectionType::Iceberg, + } +}); + +static ALLOWED_CONNECTION_SCHEMA_REGISTRY: LazyLock> = + LazyLock::new(|| { + hashset! { + PbConnectionType::Unspecified, + PbConnectionType::SchemaRegistry, + } + }); + // used to store result of `gen_sink_plan` pub struct SinkPlanContext { pub query: Box, @@ -92,7 +113,15 @@ pub async fn gen_sink_plan( let mut with_options = handler_args.with_options.clone(); resolve_privatelink_in_with_option(&mut with_options)?; - let mut resolved_with_options = resolve_secret_ref_in_with_options(with_options, session)?; + let (mut resolved_with_options, connection_type, connector_conn_ref) = + resolve_connection_ref_and_secret_ref(with_options, session)?; + ensure_connection_type_allowed(connection_type, &ALLOWED_CONNECTION_CONNECTOR)?; + + // if not using connection, we don't need to check connector match connection type + if !matches!(connection_type, PbConnectionType::Unspecified) { + let connector = resolved_with_options.get_connector().unwrap(); + check_connector_match_connection_type(connector.as_str(), &connection_type)?; + } let partition_info = get_partition_compute_info(&resolved_with_options).await?; @@ -260,7 +289,7 @@ pub async fn gen_sink_plan( SchemaId::new(sink_schema_id), DatabaseId::new(sink_database_id), UserId::new(session.user_id()), - None, // deprecated: private link connection id + connector_conn_ref.map(ConnectionId::from), dependent_relations.into_iter().collect_vec(), ); @@ -852,11 +881,13 @@ fn bind_sink_format_desc( } } - let (mut options, secret_refs) = resolve_secret_ref_in_with_options( - WithOptions::try_from(value.row_options.as_slice())?, - session, - )? - .into_parts(); + let (props, connection_type_flag, schema_registry_conn_ref) = + resolve_connection_ref_and_secret_ref( + WithOptions::try_from(value.row_options.as_slice())?, + session, + )?; + ensure_connection_type_allowed(connection_type_flag, &ALLOWED_CONNECTION_SCHEMA_REGISTRY)?; + let (mut options, secret_refs) = props.into_parts(); options .entry(TimestamptzHandlingMode::OPTION_KEY.to_owned()) @@ -868,6 +899,7 @@ fn bind_sink_format_desc( options, secret_refs, key_encode, + connection_id: schema_registry_conn_ref, }) } diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index d85b55b2e909b..ee5f40505227d 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -81,7 +81,9 @@ use crate::handler::create_table::{ bind_pk_and_row_id_on_relation, bind_sql_column_constraints, bind_sql_columns, bind_sql_pk_names, bind_table_constraints, ColumnIdGenerator, }; -use crate::handler::util::SourceSchemaCompatExt; +use crate::handler::util::{ + check_connector_match_connection_type, ensure_connection_type_allowed, SourceSchemaCompatExt, +}; use crate::handler::HandlerArgs; use crate::optimizer::plan_node::generic::SourceNodeKind; use crate::optimizer::plan_node::{LogicalSource, ToStream, ToStreamContext}; @@ -311,7 +313,7 @@ pub(crate) async fn bind_columns_from_source( let options_with_secret = match with_properties { Either::Left(options) => { - let (sec_resolve_props, connection_type) = + let (sec_resolve_props, connection_type, _) = resolve_connection_ref_and_secret_ref(options.clone(), session)?; if !ALLOWED_CONNECTION_CONNECTOR.contains(&connection_type) { return Err(RwError::from(ProtocolError(format!( @@ -328,16 +330,12 @@ pub(crate) async fn bind_columns_from_source( let is_kafka: bool = options_with_secret.is_kafka_connector(); // todo: need to resolve connection ref for schema registry - let (sec_resolve_props, connection_type) = resolve_connection_ref_and_secret_ref( - WithOptions::try_from(format_encode.row_options())?, - session, - )?; - if !ALLOWED_CONNECTION_SCHEMA_REGISTRY.contains(&connection_type) { - return Err(RwError::from(ProtocolError(format!( - "connection type {:?} is not allowed, allowed types: {:?}", - connection_type, ALLOWED_CONNECTION_SCHEMA_REGISTRY - )))); - } + let (sec_resolve_props, connection_type, schema_registry_conn_ref) = + resolve_connection_ref_and_secret_ref( + WithOptions::try_from(format_encode.row_options())?, + session, + )?; + ensure_connection_type_allowed(connection_type, &ALLOWED_CONNECTION_SCHEMA_REGISTRY)?; let (format_encode_options, format_encode_secret_refs) = sec_resolve_props.into_parts(); // Need real secret to access the schema registry @@ -372,6 +370,7 @@ pub(crate) async fn bind_columns_from_source( row_encode: row_encode_to_prost(&format_encode.row_encode) as i32, format_encode_options, format_encode_secret_refs, + connection_id: schema_registry_conn_ref, ..Default::default() }; @@ -1601,20 +1600,14 @@ pub async fn bind_create_source_or_table_with_connector( let mut with_properties = with_properties; resolve_privatelink_in_with_option(&mut with_properties)?; - let connector = with_properties.get_connector().unwrap(); - let (with_properties, connection_type) = + let (with_properties, connection_type, connector_conn_ref) = resolve_connection_ref_and_secret_ref(with_properties, session)?; - if !ALLOWED_CONNECTION_CONNECTOR.contains(&connection_type) { - return Err(RwError::from(ProtocolError(format!( - "connection type {:?} is not allowed, allowed types: {:?}", - connection_type, ALLOWED_CONNECTION_CONNECTOR - )))); - } - if !connector.eq(connection_type_to_connector(&connection_type)) { - return Err(RwError::from(ProtocolError(format!( - "connector {} and connection type {:?} are not compatible", - connector, connection_type - )))); + ensure_connection_type_allowed(connection_type, &ALLOWED_CONNECTION_CONNECTOR)?; + + // if not using connection, we don't need to check connector match connection type + if !matches!(connection_type, PbConnectionType::Unspecified) { + let connector = with_properties.get_connector().unwrap(); + check_connector_match_connection_type(connector.as_str(), &connection_type)?; } let pk_names = bind_source_pk( @@ -1689,7 +1682,7 @@ pub async fn bind_create_source_or_table_with_connector( watermark_descs, associated_table_id, definition, - connection_id: None, // deprecated: private link connection id + connection_id: connector_conn_ref, created_at_epoch: None, initialized_at_epoch: None, version: INITIAL_SOURCE_VERSION_ID, @@ -1826,14 +1819,6 @@ fn row_encode_to_prost(row_encode: &Encode) -> EncodeType { } } -fn connection_type_to_connector(connection_type: &PbConnectionType) -> &str { - match connection_type { - PbConnectionType::Kafka => KAFKA_CONNECTOR, - PbConnectionType::Iceberg => ICEBERG_CONNECTOR, - _ => unreachable!(), - } -} - #[cfg(test)] pub mod tests { use std::collections::HashMap; diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index 169716cd504a2..899b43f3cd844 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -34,14 +34,19 @@ use risingwave_common::types::{ }; use risingwave_common::util::epoch::Epoch; use risingwave_common::util::iter_util::ZipEqFast; +use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR; +use risingwave_connector::source::KAFKA_CONNECTOR; +use risingwave_pb::catalog::connection_params::PbConnectionType; use risingwave_sqlparser::ast::{ CompatibleFormatEncode, Expr, FormatEncodeOptions, Ident, ObjectName, OrderByExpr, Query, Select, SelectItem, SetExpr, TableFactor, TableWithJoins, }; use thiserror_ext::AsReport; -use crate::error::{ErrorCode, Result as RwResult}; +use crate::error::ErrorCode::ProtocolError; +use crate::error::{ErrorCode, Result as RwResult, RwError}; use crate::session::{current, SessionImpl}; +use crate::HashSet; pin_project! { /// Wrapper struct that converts a stream of DataChunk to a stream of RowSet based on formatting @@ -271,6 +276,40 @@ pub fn convert_interval_to_u64_seconds(interval: &String) -> RwResult { Ok(seconds) } +pub fn ensure_connection_type_allowed( + connection_type: PbConnectionType, + allowed_types: &HashSet, +) -> RwResult<()> { + if !allowed_types.contains(&connection_type) { + return Err(RwError::from(ProtocolError(format!( + "connection type {:?} is not allowed, allowed types: {:?}", + connection_type, allowed_types + )))); + } + Ok(()) +} + +fn connection_type_to_connector(connection_type: &PbConnectionType) -> &str { + match connection_type { + PbConnectionType::Kafka => KAFKA_CONNECTOR, + PbConnectionType::Iceberg => ICEBERG_CONNECTOR, + _ => unreachable!(), + } +} + +pub fn check_connector_match_connection_type( + connector: &str, + connection_type: &PbConnectionType, +) -> RwResult<()> { + if !connector.eq(connection_type_to_connector(connection_type)) { + return Err(RwError::from(ProtocolError(format!( + "connector {} and connection type {:?} are not compatible", + connector, connection_type + )))); + } + Ok(()) +} + #[cfg(test)] mod tests { use postgres_types::{ToSql, Type}; diff --git a/src/frontend/src/utils/with_options.rs b/src/frontend/src/utils/with_options.rs index 870680482b2d6..27bd68c5c8e6b 100644 --- a/src/frontend/src/utils/with_options.rs +++ b/src/frontend/src/utils/with_options.rs @@ -186,10 +186,11 @@ impl WithOptions { pub(crate) fn resolve_connection_ref_and_secret_ref( with_options: WithOptions, session: &SessionImpl, -) -> RwResult<(WithOptionsSecResolved, PbConnectionType)> { +) -> RwResult<(WithOptionsSecResolved, PbConnectionType, Option)> { let db_name: &str = session.database(); let (mut options, secret_refs, connection_refs) = with_options.clone().into_parts(); + let mut connection_id = None; let mut connection_params = None; for connection_ref in connection_refs.values() { // at most one connection ref in the map @@ -202,6 +203,7 @@ pub(crate) fn resolve_connection_ref_and_secret_ref( let connection_catalog = session.get_connection_by_name(schema_name, &connection_name)?; if let ConnectionInfo::ConnectionParams(params) = &connection_catalog.info { + connection_id = Some(connection_catalog.id); Some(params.clone()) } else { return Err(RwError::from(ErrorCode::InvalidParameterValue( @@ -232,6 +234,7 @@ pub(crate) fn resolve_connection_ref_and_secret_ref( }; let mut connection_type = PbConnectionType::Unspecified; + let connection_params_none_flag = connection_params.is_none(); if let Some(connection_params) = connection_params { connection_type = connection_params.connection_type(); for (k, v) in connection_params.properties { @@ -252,10 +255,14 @@ pub(crate) fn resolve_connection_ref_and_secret_ref( } } } + debug_assert!( + matches!(connection_type, PbConnectionType::Unspecified) && connection_params_none_flag + ); Ok(( WithOptionsSecResolved::new(options, inner_secret_refs), connection_type, + connection_id, )) } From 1d2cb3db022dddb6dc8e4ea11b91e03910421b43 Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 18 Nov 2024 17:28:53 +0800 Subject: [PATCH 38/51] stash --- src/connector/src/connector_common/common.rs | 59 ++++----- .../src/connector_common/connection_util.rs | 125 ++++++++++++++++++ .../src/connector_common/iceberg/mod.rs | 4 - src/connector/src/connector_common/mod.rs | 29 +--- src/connector/src/macros.rs | 20 ++- src/connector/src/sink/kafka.rs | 5 +- src/connector/src/sink/redis.rs | 2 + src/connector/src/source/base.rs | 1 + .../src/source/kafka/enumerator/client.rs | 5 +- src/connector/src/source/kafka/mod.rs | 4 +- src/frontend/src/handler/create_connection.rs | 5 +- src/frontend/src/utils/with_options.rs | 29 +++- src/meta/src/error.rs | 8 ++ src/meta/src/rpc/ddl_controller.rs | 3 +- 14 files changed, 229 insertions(+), 70 deletions(-) create mode 100644 src/connector/src/connector_common/connection_util.rs diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index 5b2c0200c4724..0f49ff1c60cd0 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -163,7 +163,7 @@ impl AwsAuthProps { #[serde_as] #[derive(Debug, Clone, Deserialize, WithOptions, PartialEq, Hash, Eq)] -pub struct KafkaConnection { +pub struct KafkaConnectionInner { #[serde(rename = "properties.bootstrap.server", alias = "kafka.brokers")] pub brokers: String, @@ -241,36 +241,10 @@ pub struct KafkaConnection { sasl_oathbearer_config: Option, } -impl KafkaConnection { - #[cfg(test)] - pub fn test_default() -> Self { - Self { - brokers: "localhost:9092".to_string(), - security_protocol: None, - ssl_ca_location: None, - ssl_certificate_location: None, - ssl_key_location: None, - ssl_ca_pem: None, - ssl_certificate_pem: None, - ssl_key_pem: None, - ssl_key_password: None, - ssl_endpoint_identification_algorithm: None, - sasl_mechanism: None, - sasl_username: None, - sasl_password: None, - sasl_kerberos_service_name: None, - sasl_kerberos_keytab: None, - sasl_kerberos_principal: None, - sasl_kerberos_kinit_cmd: None, - sasl_kerberos_min_time_before_relogin: None, - sasl_oathbearer_config: None, - } - } -} - #[serde_as] #[derive(Debug, Clone, Deserialize, WithOptions)] pub struct KafkaCommon { + // connection related props are moved to `KafkaConnection` #[serde(rename = "topic", alias = "kafka.topic")] pub topic: String, @@ -283,7 +257,7 @@ pub struct KafkaCommon { } #[serde_as] -#[derive(Debug, Clone, Deserialize, WithOptions, PartialEq)] +#[derive(Debug, Clone, Deserialize, WithOptions, PartialEq, Hash, Eq)] pub struct KafkaPrivateLinkCommon { /// This is generated from `private_link_targets` and `private_link_endpoint` in frontend, instead of given by users. #[serde(rename = "broker.rewrite.endpoints")] @@ -348,7 +322,32 @@ impl RdKafkaPropertiesCommon { } } -impl KafkaConnection { +impl KafkaConnectionInner { + #[cfg(test)] + pub fn test_default() -> Self { + Self { + brokers: "localhost:9092".to_string(), + security_protocol: None, + ssl_ca_location: None, + ssl_certificate_location: None, + ssl_key_location: None, + ssl_ca_pem: None, + ssl_certificate_pem: None, + ssl_key_pem: None, + ssl_key_password: None, + ssl_endpoint_identification_algorithm: None, + sasl_mechanism: None, + sasl_username: None, + sasl_password: None, + sasl_kerberos_service_name: None, + sasl_kerberos_keytab: None, + sasl_kerberos_principal: None, + sasl_kerberos_kinit_cmd: None, + sasl_kerberos_min_time_before_relogin: None, + sasl_oathbearer_config: None, + } + } + pub(crate) fn set_security_properties(&self, config: &mut ClientConfig) { // AWS_MSK_IAM if self.is_aws_msk_iam() { diff --git a/src/connector/src/connector_common/connection_util.rs b/src/connector/src/connector_common/connection_util.rs new file mode 100644 index 0000000000000..75aa990d69642 --- /dev/null +++ b/src/connector/src/connector_common/connection_util.rs @@ -0,0 +1,125 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::time::Duration; + +use rdkafka::consumer::{BaseConsumer, Consumer}; +use rdkafka::ClientConfig; +use risingwave_common::secret::LocalSecretManager; +use risingwave_pb::catalog::PbConnection; +use serde_derive::Deserialize; +use serde_with::serde_as; +use tonic::async_trait; +use with_options::WithOptions; + +use crate::connector_common::{AwsAuthProps, KafkaConnectionInner, KafkaPrivateLinkCommon}; +use crate::error::ConnectorResult; +use crate::source::kafka::{KafkaContextCommon, RwConsumerContext}; +use crate::{dispatch_connection_impl, ConnectionImpl}; + +#[async_trait] +pub trait ConnectionValidate { + async fn test_connection(&self) -> ConnectorResult<()>; +} + +#[serde_as] +#[derive(Debug, Clone, Deserialize, WithOptions, PartialEq)] +pub struct KafkaConnection { + #[serde(flatten)] + pub inner: KafkaConnectionInner, + #[serde(flatten)] + pub kafka_private_link_common: KafkaPrivateLinkCommon, + #[serde(flatten)] + pub aws_auth_props: AwsAuthProps, +} + +pub async fn validate_connection(connection: &PbConnection) -> ConnectorResult<()> { + if let Some(ref info) = connection.info { + match info { + risingwave_pb::catalog::connection::Info::ConnectionParams(cp) => { + let options = cp.properties.clone().into_iter().collect(); + let secret_refs = cp.secret_refs.clone().into_iter().collect(); + let props_secret_resolved = + LocalSecretManager::global().fill_secrets(options, secret_refs)?; + let connection_impl = + ConnectionImpl::from_proto(cp.connection_type(), props_secret_resolved)?; + dispatch_connection_impl!(connection_impl, inner, inner.test_connection().await?) + } + risingwave_pb::catalog::connection::Info::PrivateLinkService(_) => unreachable!(), + } + } + Ok(()) +} + +#[async_trait] +impl ConnectionValidate for KafkaConnection { + async fn test_connection(&self) -> ConnectorResult<()> { + let client = self.build_client().await?; + // describe cluster here + client.fetch_metadata(None, Duration::from_secs(10)).await?; + Ok(()) + } +} + +impl KafkaConnection { + async fn build_client(&self) -> ConnectorResult> { + let mut config = ClientConfig::new(); + let bootstrap_servers = &self.inner.brokers; + let broker_rewrite_map = self.kafka_private_link_common.broker_rewrite_map.clone(); + config.set("bootstrap.servers", bootstrap_servers); + self.inner.set_security_properties(&mut config); + + // dup with Kafka Enumerator + let ctx_common = KafkaContextCommon::new( + broker_rewrite_map, + None, + None, + self.aws_auth_props.clone(), + self.inner.is_aws_msk_iam(), + ) + .await?; + let client_ctx = RwConsumerContext::new(ctx_common); + let client: BaseConsumer = + config.create_with_context(client_ctx).await?; + if self.inner.is_aws_msk_iam() { + #[cfg(not(madsim))] + client.poll(Duration::from_secs(10)); // note: this is a blocking call + #[cfg(madsim)] + client.poll(Duration::from_secs(10)).await; + } + Ok(client) + } +} + +#[serde_as] +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, WithOptions)] +pub struct IcebergConnection {} + +#[async_trait] +impl ConnectionValidate for IcebergConnection { + async fn test_connection(&self) -> ConnectorResult<()> { + todo!() + } +} + +#[serde_as] +#[derive(Debug, Clone, Deserialize, WithOptions, PartialEq, Hash, Eq)] +pub struct SchemaRegistryConnection {} + +#[async_trait] +impl ConnectionValidate for SchemaRegistryConnection { + async fn test_connection(&self) -> ConnectorResult<()> { + todo!() + } +} diff --git a/src/connector/src/connector_common/iceberg/mod.rs b/src/connector/src/connector_common/iceberg/mod.rs index ded75ad4080a6..d10a9eefb68aa 100644 --- a/src/connector/src/connector_common/iceberg/mod.rs +++ b/src/connector/src/connector_common/iceberg/mod.rs @@ -32,10 +32,6 @@ use with_options::WithOptions; use crate::deserialize_optional_bool_from_string; use crate::error::ConnectorResult; -#[serde_as] -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, WithOptions)] -pub struct IcebergConnection {} - #[serde_as] #[derive(Debug, Clone, PartialEq, Eq, Deserialize, WithOptions)] pub struct IcebergCommon { diff --git a/src/connector/src/connector_common/mod.rs b/src/connector/src/connector_common/mod.rs index 5822658c1f104..c75361132a61d 100644 --- a/src/connector/src/connector_common/mod.rs +++ b/src/connector/src/connector_common/mod.rs @@ -19,30 +19,15 @@ pub use mqtt_common::{MqttCommon, QualityOfService as MqttQualityOfService}; mod common; pub use common::{ - AwsAuthProps, AwsPrivateLinkItem, KafkaCommon, KafkaConnection, KafkaPrivateLinkCommon, + AwsAuthProps, AwsPrivateLinkItem, KafkaCommon, KafkaConnectionInner, KafkaPrivateLinkCommon, KinesisCommon, MongodbCommon, NatsCommon, PulsarCommon, PulsarOauthCommon, RdKafkaPropertiesCommon, PRIVATE_LINK_BROKER_REWRITE_MAP_KEY, PRIVATE_LINK_TARGETS_KEY, }; +mod connection_util; +pub use connection_util::{ + validate_connection, ConnectionValidate, IcebergConnection, KafkaConnection, + SchemaRegistryConnection, +}; mod iceberg; -pub use iceberg::{IcebergCommon, IcebergConnection}; - -#[cfg(test)] -mod tests { - - use super::*; - use crate::error::ConnectorResult; - use crate::{dispatch_connection_impl, ConnectionImpl}; - - #[test] - fn test_dispatch_connection() -> ConnectorResult<()> { - let kafka_conn = KafkaConnection::test_default(); - let conn_impl = ConnectionImpl::from(kafka_conn); - - let x: Result<(), ()> = dispatch_connection_impl!(conn_impl, inner, { - println!("{:?}", inner); - Ok(()) - }); - Ok(()) - } -} +pub use iceberg::IcebergCommon; diff --git a/src/connector/src/macros.rs b/src/connector/src/macros.rs index a54c06819edb3..e5e4140f3a2f2 100644 --- a/src/connector/src/macros.rs +++ b/src/connector/src/macros.rs @@ -55,8 +55,9 @@ macro_rules! for_all_connections { ($macro:path $(, $extra_args:tt)*) => { $macro! { { - { Kafka, $crate::connector_common::KafkaConnection, risingwave_pb::catalog::connection_params::PbConnectionType::Kafka }, - { Iceberg, $crate::connector_common::IcebergConnection, risingwave_pb::catalog::connection_params::PbConnectionType::Iceberg } + { Kafka, $crate::connector_common::KafkaConnection, risingwave_pb::catalog::connection_params::PbConnectionType }, + { Iceberg, $crate::connector_common::IcebergConnection, risingwave_pb::catalog::connection_params::PbConnectionType }, + { SchemaRegistry, $crate::connector_common::SchemaRegistryConnection, risingwave_pb::catalog::connection_params::PbConnectionType } } $(,$extra_args)* } @@ -213,7 +214,7 @@ macro_rules! dispatch_connection_impl_inner { #[macro_export] macro_rules! impl_connection { - ({$({ $variant_name:ident, $connection:ty, $pb_connection_type:ty }),*}) => { + ({$({ $variant_name:ident, $connection:ty, $pb_connection_path:path }),*}) => { #[derive(Debug, Clone, EnumAsInner, PartialEq)] pub enum ConnectionImpl { $( @@ -240,6 +241,19 @@ macro_rules! impl_connection { } )* + + impl ConnectionImpl { + pub fn from_proto(pb_connection_type: risingwave_pb::catalog::connection_params::PbConnectionType, value_secret_filled: std::collections::BTreeMap) -> $crate::error::ConnectorResult { + match pb_connection_type { + $( + <$pb_connection_path>::$variant_name => { + Ok(serde_json::from_value(json!(value_secret_filled)).map(ConnectionImpl::$variant_name).map_err($crate::error::ConnectorError::from)?) + }, + )* + risingwave_pb::catalog::connection_params::PbConnectionType::Unspecified => unreachable!(), + } + } + } } } diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index 9fc8da7ef7a48..279d28914a2fa 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -35,7 +35,8 @@ use with_options::WithOptions; use super::catalog::{SinkFormat, SinkFormatDesc}; use super::{Sink, SinkError, SinkParam}; use crate::connector_common::{ - AwsAuthProps, KafkaCommon, KafkaConnection, KafkaPrivateLinkCommon, RdKafkaPropertiesCommon, + AwsAuthProps, KafkaCommon, KafkaConnectionInner, KafkaPrivateLinkCommon, + RdKafkaPropertiesCommon, }; use crate::sink::formatter::SinkFormatterImpl; use crate::sink::log_store::DeliveryFutureManagerAddFuture; @@ -215,7 +216,7 @@ pub struct KafkaConfig { pub common: KafkaCommon, #[serde(flatten)] - pub connection: KafkaConnection, + pub connection: KafkaConnectionInner, #[serde( rename = "properties.retry.max", diff --git a/src/connector/src/sink/redis.rs b/src/connector/src/sink/redis.rs index 763d7e9bba49a..473b3ef7f70dc 100644 --- a/src/connector/src/sink/redis.rs +++ b/src/connector/src/sink/redis.rs @@ -412,6 +412,7 @@ mod test { options: BTreeMap::default(), secret_refs: BTreeMap::default(), key_encode: None, + connection_id: None, }; let mut redis_sink_writer = RedisSinkWriter::mock(schema, vec![0], &format_desc) @@ -490,6 +491,7 @@ mod test { options: btree_map, secret_refs: Default::default(), key_encode: None, + connection_id: None, }; let mut redis_sink_writer = RedisSinkWriter::mock(schema, vec![0], &format_desc) diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs index 1efa03d41ff7e..bcf8ab4b3f027 100644 --- a/src/connector/src/source/base.rs +++ b/src/connector/src/source/base.rs @@ -32,6 +32,7 @@ use risingwave_pb::catalog::{PbSource, PbStreamSourceInfo}; use risingwave_pb::plan_common::ExternalTableDesc; use risingwave_pb::source::ConnectorSplit; use serde::de::DeserializeOwned; +use serde_json::json; use tokio::sync::mpsc; use super::cdc::DebeziumCdcMeta; diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 1d7525bc7a613..df7cd787e061f 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -31,13 +31,14 @@ use crate::error::{ConnectorError, ConnectorResult}; use crate::source::base::SplitEnumerator; use crate::source::kafka::split::KafkaSplit; use crate::source::kafka::{ - KafkaConnection, KafkaContextCommon, KafkaProperties, RwConsumerContext, KAFKA_ISOLATION_LEVEL, + KafkaConnectionInner, KafkaContextCommon, KafkaProperties, RwConsumerContext, + KAFKA_ISOLATION_LEVEL, }; use crate::source::SourceEnumeratorContextRef; type KafkaClientType = BaseConsumer; -pub static SHARED_KAFKA_CLIENT: LazyLock>> = +pub static SHARED_KAFKA_CLIENT: LazyLock>> = LazyLock::new(|| moka::future::Cache::builder().build()); #[derive(Debug, Copy, Clone, Eq, PartialEq)] diff --git a/src/connector/src/source/kafka/mod.rs b/src/connector/src/source/kafka/mod.rs index 030c190eb4942..34a165a5c29e3 100644 --- a/src/connector/src/source/kafka/mod.rs +++ b/src/connector/src/source/kafka/mod.rs @@ -17,7 +17,7 @@ use std::collections::HashMap; use serde::Deserialize; use serde_with::{serde_as, DisplayFromStr}; -use crate::connector_common::{AwsAuthProps, KafkaConnection, KafkaPrivateLinkCommon}; +use crate::connector_common::{AwsAuthProps, KafkaConnectionInner, KafkaPrivateLinkCommon}; mod client_context; pub mod enumerator; @@ -144,7 +144,7 @@ pub struct KafkaProperties { pub common: KafkaCommon, #[serde(flatten)] - pub connection: KafkaConnection, + pub connection: KafkaConnectionInner, #[serde(flatten)] pub rdkafka_properties_common: RdKafkaPropertiesCommon, diff --git a/src/frontend/src/handler/create_connection.rs b/src/frontend/src/handler/create_connection.rs index cbfe6979a9ef4..61ab8798427e1 100644 --- a/src/frontend/src/handler/create_connection.rs +++ b/src/frontend/src/handler/create_connection.rs @@ -28,7 +28,7 @@ use crate::error::ErrorCode::ProtocolError; use crate::error::{ErrorCode, Result, RwError}; use crate::handler::HandlerArgs; use crate::session::SessionImpl; -use crate::utils::resolve_secret_ref_in_with_options; +use crate::utils::{resolve_privatelink_in_with_option, resolve_secret_ref_in_with_options}; use crate::WithOptions; pub(crate) const CONNECTION_TYPE_PROP: &str = "type"; @@ -103,7 +103,8 @@ pub async fn handle_create_connection( }; } let (database_id, schema_id) = session.get_database_and_schema_id_for_create(schema_name)?; - let with_properties = handler_args.with_options.clone().into_connector_props(); + let mut with_properties = handler_args.with_options.clone().into_connector_props(); + resolve_privatelink_in_with_option(&mut with_properties)?; let create_connection_payload = resolve_create_connection_payload(with_properties, &session)?; let catalog_writer = session.catalog_writer()?; diff --git a/src/frontend/src/utils/with_options.rs b/src/frontend/src/utils/with_options.rs index 27bd68c5c8e6b..01dcc4eab6557 100644 --- a/src/frontend/src/utils/with_options.rs +++ b/src/frontend/src/utils/with_options.rs @@ -16,6 +16,9 @@ use std::collections::BTreeMap; use std::num::NonZeroU32; use risingwave_common::catalog::ConnectionId; +use risingwave_connector::connector_common::{ + PRIVATE_LINK_BROKER_REWRITE_MAP_KEY, PRIVATE_LINK_TARGETS_KEY, +}; use risingwave_connector::source::kafka::private_link::{ insert_privatelink_broker_rewrite_map, PRIVATELINK_ENDPOINT_KEY, }; @@ -234,8 +237,30 @@ pub(crate) fn resolve_connection_ref_and_secret_ref( }; let mut connection_type = PbConnectionType::Unspecified; - let connection_params_none_flag = connection_params.is_none(); + let connection_params_is_none_flag = connection_params.is_none(); + if let Some(connection_params) = connection_params { + // Do key checks on `PRIVATE_LINK_BROKER_REWRITE_MAP_KEY`, `PRIVATE_LINK_TARGETS_KEY` and `PRIVATELINK_ENDPOINT_KEY` + // `PRIVATE_LINK_BROKER_REWRITE_MAP_KEY` is generated from `private_link_targets` and `private_link_endpoint`, instead of given by users. + // + // We resolve private link via `resolve_privatelink_in_with_option` when creating Connection, + // so here we need to check `PRIVATE_LINK_TARGETS_KEY` and `PRIVATELINK_ENDPOINT_KEY` are not given + // if `PRIVATE_LINK_BROKER_REWRITE_MAP_KEY` is in Connection catalog. + + if let Some(broker_rewrite_map) = connection_params + .get_properties() + .get(PRIVATE_LINK_BROKER_REWRITE_MAP_KEY) + { + if options.contains_key(PRIVATE_LINK_TARGETS_KEY) + || options.contains_key(PRIVATELINK_ENDPOINT_KEY) + { + return Err(RwError::from(ErrorCode::InvalidParameterValue(format!( + "PrivateLink related options already defined in Connection (rewrite map: {}), please remove {} and {} from WITH clause", + broker_rewrite_map, PRIVATE_LINK_TARGETS_KEY, PRIVATELINK_ENDPOINT_KEY + )))); + } + } + connection_type = connection_params.connection_type(); for (k, v) in connection_params.properties { if options.insert(k.clone(), v).is_some() { @@ -256,7 +281,7 @@ pub(crate) fn resolve_connection_ref_and_secret_ref( } } debug_assert!( - matches!(connection_type, PbConnectionType::Unspecified) && connection_params_none_flag + matches!(connection_type, PbConnectionType::Unspecified) && connection_params_is_none_flag ); Ok(( diff --git a/src/meta/src/error.rs b/src/meta/src/error.rs index f1c3bb0ffdd8a..b73d96d827b3b 100644 --- a/src/meta/src/error.rs +++ b/src/meta/src/error.rs @@ -13,6 +13,7 @@ // limitations under the License. use risingwave_common::error::BoxedError; +use risingwave_common::secret::SecretError; use risingwave_common::session_config::SessionConfigError; use risingwave_connector::error::ConnectorError; use risingwave_connector::sink::SinkError; @@ -132,6 +133,13 @@ pub enum MetaErrorInner { #[error("{0} has been deprecated, please use {1} instead.")] Deprecated(String, String), + + #[error("Secret error: {0}")] + SecretError( + #[from] + #[backtrace] + SecretError, + ), } impl MetaError { diff --git a/src/meta/src/rpc/ddl_controller.rs b/src/meta/src/rpc/ddl_controller.rs index f364ab95ecb68..15a2f978b8bbe 100644 --- a/src/meta/src/rpc/ddl_controller.rs +++ b/src/meta/src/rpc/ddl_controller.rs @@ -30,6 +30,7 @@ use risingwave_common::util::stream_graph_visitor::{ visit_stream_node, visit_stream_node_cont_mut, }; use risingwave_common::{bail, hash, must_match}; +use risingwave_connector::connector_common::validate_connection; use risingwave_connector::error::ConnectorError; use risingwave_connector::source::{ ConnectorProperties, SourceEnumeratorContext, SourceProperties, SplitEnumerator, @@ -496,7 +497,7 @@ impl DdlController { } async fn create_connection(&self, connection: Connection) -> MetaResult { - // todo: do validation here + validate_connection(&connection).await?; self.metadata_manager .catalog_controller .create_connection(connection) From cc75ea73850673dcfbe8ee861da57a2e7e5ff161 Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 18 Nov 2024 19:06:05 +0800 Subject: [PATCH 39/51] add slt test --- e2e_test/source_inline/connection/ddl.slt | 111 ++++++++++++++++++ .../src/connector_common/connection_util.rs | 3 + .../rw_catalog/rw_connections.rs | 4 +- src/frontend/src/handler/create_connection.rs | 29 ++++- src/frontend/src/handler/show.rs | 14 +-- src/frontend/src/utils/with_options.rs | 11 +- 6 files changed, 153 insertions(+), 19 deletions(-) create mode 100644 e2e_test/source_inline/connection/ddl.slt diff --git a/e2e_test/source_inline/connection/ddl.slt b/e2e_test/source_inline/connection/ddl.slt new file mode 100644 index 0000000000000..a92943598492c --- /dev/null +++ b/e2e_test/source_inline/connection/ddl.slt @@ -0,0 +1,111 @@ +# Note: control substitution on will force us to use "\n" instead of "\n" in commands +control substitution on + +# for non-shared source +statement ok +set streaming_use_shared_source to false; + +statement ok +create secret sec_broker with (backend = 'meta') as '${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}'; + +statement error unknown field `foo` +create connection conn with (type = 'kafka', properties.bootstrap.server = secret sec_broker, foo = 'bar'); + +statement error Connection type "kinesis" is not supported +create connection conn with (type = 'kinesis'); + +statement ok +create connection conn with (type = 'kafka', properties.bootstrap.server = secret sec_broker, properties.security.protocol = 'plaintext'); + +query TTT +select "name", "type_", "connection_params" from rw_catalog.rw_connections; +---- +conn CONNECTION_TYPE_KAFKA {"properties.bootstrap.server":"SECRET sec_broker AS TEXT","properties.security.protocol":"plaintext"} + +statement error Permission denied: PermissionDenied: secret used by 1 other objects. +drop secret sec_broker; + +statement error expect 'profile' as the key for connection ref, but got: connection +create table t1 (a int, b varchar) with ( + connector = 'kafka', + connection = connection conn, + topic = 'connection_ddl_1') +format plain encode json; + +statement error Duplicated key in both WITH clause and Connection catalog: properties.security.protocol +create table t1 (a int, b varchar) with ( + connector = 'kafka', + profile = connection conn, + topic = 'connection_ddl_1', + properties.security.protocol = 'plaintext') +format plain encode json; + +statement error connector kinesis and connection type Kafka are not compatible +create table t1 (a int, b varchar) with ( + connector = 'kinesis', + profile = connection conn, + stream = 'connection_ddl_1', + region = 'us-east-1') +format plain encode json; + +system ok +rpk topic create connection_ddl_1 -p 1 + +statement ok +create table t1 (a int, b varchar) with ( + connector = 'kafka', + profile = connection conn, + topic = 'connection_ddl_1') +format plain encode json; + +statement error Permission denied: PermissionDenied: connection used by 1 other objects. +drop connection conn; + +# Connection & Source & Sink will have independent rely on the secret +statement error Permission denied: PermissionDenied: secret used by 2 other objects. +drop secret sec_broker; + +statement ok +create table data_table (a int, b varchar); + +statement ok +insert into data_table values (1, 'a'), (2, 'b'), (3, 'c'); + +statement ok +flush; + +statement ok +create sink sink_kafka from data_table with ( + connector = 'kafka', + profile = connection conn, + topic = 'connection_ddl_1' +) format plain encode json ( + force_append_only='true' +); + +sleep 3s + +query IT rowsort +select a, b from t1; +---- +1 a +2 b +3 c + +statement ok +drop table t1; + +statement ok +drop connection conn; + +statement ok +drop secret sec_broker; + +statement ok +drop sink sink_kafka + +statement ok +drop table data_table; + +statement ok +set streaming_use_shared_source to true; diff --git a/src/connector/src/connector_common/connection_util.rs b/src/connector/src/connector_common/connection_util.rs index 75aa990d69642..0d2c7c55e4d3e 100644 --- a/src/connector/src/connector_common/connection_util.rs +++ b/src/connector/src/connector_common/connection_util.rs @@ -35,6 +35,7 @@ pub trait ConnectionValidate { #[serde_as] #[derive(Debug, Clone, Deserialize, WithOptions, PartialEq)] +#[serde(deny_unknown_fields)] pub struct KafkaConnection { #[serde(flatten)] pub inner: KafkaConnectionInner, @@ -104,6 +105,7 @@ impl KafkaConnection { #[serde_as] #[derive(Debug, Clone, PartialEq, Eq, Deserialize, WithOptions)] +#[serde(deny_unknown_fields)] pub struct IcebergConnection {} #[async_trait] @@ -115,6 +117,7 @@ impl ConnectionValidate for IcebergConnection { #[serde_as] #[derive(Debug, Clone, Deserialize, WithOptions, PartialEq, Hash, Eq)] +#[serde(deny_unknown_fields)] pub struct SchemaRegistryConnection {} #[async_trait] diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs index 11e780badec49..7eae4c37ee418 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs @@ -17,6 +17,7 @@ use risingwave_frontend_macro::system_catalog; use crate::catalog::system_catalog::SysCatalogReaderImpl; use crate::error::Result; +use crate::handler::create_connection::print_connection_params; #[derive(Fields)] struct RwConnection { @@ -55,8 +56,7 @@ fn read_rw_connections(reader: &SysCatalogReaderImpl) -> Result { - rw_connection.connection_params = - serde_json::to_string(¶ms.get_properties()).unwrap(); + rw_connection.connection_params = print_connection_params(params, schema); } }; diff --git a/src/frontend/src/handler/create_connection.rs b/src/frontend/src/handler/create_connection.rs index 61ab8798427e1..184f4b8c11978 100644 --- a/src/frontend/src/handler/create_connection.rs +++ b/src/frontend/src/handler/create_connection.rs @@ -18,12 +18,15 @@ use pgwire::pg_response::{PgResponse, StatementType}; use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR; use risingwave_connector::source::kafka::{KAFKA_CONNECTOR, PRIVATELINK_CONNECTION}; use risingwave_pb::catalog::connection_params::ConnectionType; -use risingwave_pb::catalog::ConnectionParams; +use risingwave_pb::catalog::{ConnectionParams, PbConnectionParams}; use risingwave_pb::ddl_service::create_connection_request; +use risingwave_pb::secret::SecretRef; use risingwave_sqlparser::ast::CreateConnectionStatement; use super::RwPgResponse; use crate::binder::Binder; +use crate::catalog::schema_catalog::SchemaCatalog; +use crate::catalog::SecretId; use crate::error::ErrorCode::ProtocolError; use crate::error::{ErrorCode, Result, RwError}; use crate::handler::HandlerArgs; @@ -120,3 +123,27 @@ pub async fn handle_create_connection( Ok(PgResponse::empty_result(StatementType::CREATE_CONNECTION)) } + +pub fn print_connection_params( + params: &PbConnectionParams, + schema: &SchemaCatalog, +) -> String { + let print_secret_ref = |secret_ref: &SecretRef| -> String { + let secret_name = schema + .get_secret_by_id(&SecretId::from(secret_ref.secret_id)) + .map(|s| s.name.as_str()) + .unwrap(); + format!( + "SECRET {} AS {}", + secret_name, + secret_ref.get_ref_as().unwrap().as_str_name() + ) + }; + let deref_secrets = params + .get_secret_refs() + .iter() + .map(|(k, v)| (k.clone(), print_secret_ref(v))); + let mut props = params.get_properties().clone(); + props.extend(deref_secrets); + serde_json::to_string(&props).unwrap() +} diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index ac64f133d21d2..8c4c22d1d78c8 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -27,15 +27,15 @@ use risingwave_common::util::addr::HostAddr; use risingwave_connector::source::kafka::PRIVATELINK_CONNECTION; use risingwave_expr::scalar::like::{i_like_default, like_default}; use risingwave_pb::catalog::connection; -use risingwave_pb::secret::SecretRef; use risingwave_sqlparser::ast::{ display_comma_separated, Ident, ObjectName, ShowCreateType, ShowObject, ShowStatementFilter, }; use super::{fields_to_descriptors, RwPgResponse, RwPgResponseBuilderExt}; use crate::binder::{Binder, Relation}; -use crate::catalog::{CatalogError, IndexCatalog, SecretId}; +use crate::catalog::{CatalogError, IndexCatalog}; use crate::error::Result; +use crate::handler::create_connection::print_connection_params; use crate::handler::HandlerArgs; use crate::session::cursor_manager::SubscriptionCursor; use crate::session::SessionImpl; @@ -443,16 +443,8 @@ pub async fn handle_show_object( ) } connection::Info::ConnectionParams(params) => { - // todo: check secrets are not exposed // todo: show dep relations - let print_secret_ref = |secret_ref: &SecretRef| -> String { - let secret_name = schema.get_secret_by_id(&SecretId::from(secret_ref.secret_id)).map(|s| s.name.as_str()).unwrap(); - format!("SECRET {} AS {}", secret_name, secret_ref.get_ref_as().unwrap().as_str_name()) - }; - let deref_secrets = params.get_secret_refs().iter().map(|(k, v)| (k.clone(), print_secret_ref(v))); - let mut props = params.get_properties().clone(); - props.extend(deref_secrets); - serde_json::to_string(&props).unwrap() + print_connection_params(params, schema) } }; ShowConnectionRow { diff --git a/src/frontend/src/utils/with_options.rs b/src/frontend/src/utils/with_options.rs index 01dcc4eab6557..62d0bd2170e6f 100644 --- a/src/frontend/src/utils/with_options.rs +++ b/src/frontend/src/utils/with_options.rs @@ -265,7 +265,7 @@ pub(crate) fn resolve_connection_ref_and_secret_ref( for (k, v) in connection_params.properties { if options.insert(k.clone(), v).is_some() { return Err(RwError::from(ErrorCode::InvalidParameterValue(format!( - "Duplicated key: {}", + "Duplicated key in both WITH clause and Connection catalog: {}", k )))); } @@ -274,16 +274,17 @@ pub(crate) fn resolve_connection_ref_and_secret_ref( for (k, v) in connection_params.secret_refs { if inner_secret_refs.insert(k.clone(), v).is_some() { return Err(RwError::from(ErrorCode::InvalidParameterValue(format!( - "Duplicated key: {}", + "Duplicated key in both WITH clause and Connection catalog: {}", k )))); } } } - debug_assert!( - matches!(connection_type, PbConnectionType::Unspecified) && connection_params_is_none_flag - ); + // connection_params is None means the connection is not retrieved, so the connection type should be unspecified + if connection_params_is_none_flag { + debug_assert!(matches!(connection_type, PbConnectionType::Unspecified)); + } Ok(( WithOptionsSecResolved::new(options, inner_secret_refs), connection_type, From a446707f764f10edd9a2511c55d6999e0911aedd Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 18 Nov 2024 19:45:30 +0800 Subject: [PATCH 40/51] fix --- src/frontend/src/handler/create_sink.rs | 1 + src/frontend/src/handler/create_source.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index e86cb5bf26f2d..0a5d31db9e343 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -77,6 +77,7 @@ use crate::{Explain, Planner, TableCatalog, WithOptions, WithOptionsSecResolved} static ALLOWED_CONNECTION_CONNECTOR: LazyLock> = LazyLock::new(|| { hashset! { + PbConnectionType::Unspecified, PbConnectionType::Kafka, PbConnectionType::Iceberg, } diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index ee5f40505227d..1fdc22e3b7941 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -1076,6 +1076,7 @@ pub(super) fn bind_source_watermark( static ALLOWED_CONNECTION_CONNECTOR: LazyLock> = LazyLock::new(|| { hashset! { + PbConnectionType::Unspecified, PbConnectionType::Kafka, PbConnectionType::Iceberg, } From e871ab7804575d96fe4fe3c369c465ebdb6cff8b Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 18 Nov 2024 19:56:42 +0800 Subject: [PATCH 41/51] fmt --- src/frontend/src/handler/create_connection.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/frontend/src/handler/create_connection.rs b/src/frontend/src/handler/create_connection.rs index 184f4b8c11978..54ca0f3d28520 100644 --- a/src/frontend/src/handler/create_connection.rs +++ b/src/frontend/src/handler/create_connection.rs @@ -124,10 +124,7 @@ pub async fn handle_create_connection( Ok(PgResponse::empty_result(StatementType::CREATE_CONNECTION)) } -pub fn print_connection_params( - params: &PbConnectionParams, - schema: &SchemaCatalog, -) -> String { +pub fn print_connection_params(params: &PbConnectionParams, schema: &SchemaCatalog) -> String { let print_secret_ref = |secret_ref: &SecretRef| -> String { let secret_name = schema .get_secret_by_id(&SecretId::from(secret_ref.secret_id)) From eb371d50829f15338d3af8b319ae1d4120f8c9bd Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 18 Nov 2024 20:12:12 +0800 Subject: [PATCH 42/51] fix --- e2e_test/source_inline/connection/ddl.slt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/e2e_test/source_inline/connection/ddl.slt b/e2e_test/source_inline/connection/ddl.slt index a92943598492c..9d085e317ebe0 100644 --- a/e2e_test/source_inline/connection/ddl.slt +++ b/e2e_test/source_inline/connection/ddl.slt @@ -18,9 +18,13 @@ statement ok create connection conn with (type = 'kafka', properties.bootstrap.server = secret sec_broker, properties.security.protocol = 'plaintext'); query TTT -select "name", "type_", "connection_params" from rw_catalog.rw_connections; +select "name", "type_" from rw_catalog.rw_connections; ---- -conn CONNECTION_TYPE_KAFKA {"properties.bootstrap.server":"SECRET sec_broker AS TEXT","properties.security.protocol":"plaintext"} +conn CONNECTION_TYPE_KAFKA + +# unstable test serialization due to iter on hashmap +# the "connectiond_params" looks like: +# {"properties.bootstrap.server":"SECRET sec_broker AS TEXT","properties.security.protocol":"plaintext"} statement error Permission denied: PermissionDenied: secret used by 1 other objects. drop secret sec_broker; From 99b2094f449114a6eb90944c0a16a973d638874f Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 18 Nov 2024 21:09:49 +0800 Subject: [PATCH 43/51] fix --- e2e_test/source_inline/connection/ddl.slt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/e2e_test/source_inline/connection/ddl.slt b/e2e_test/source_inline/connection/ddl.slt index 9d085e317ebe0..3a2f591b515be 100644 --- a/e2e_test/source_inline/connection/ddl.slt +++ b/e2e_test/source_inline/connection/ddl.slt @@ -97,19 +97,19 @@ select a, b from t1; 3 c statement ok -drop table t1; +drop sink sink_kafka statement ok -drop connection conn; +drop table data_table; statement ok -drop secret sec_broker; +drop table t1; statement ok -drop sink sink_kafka +drop connection conn; statement ok -drop table data_table; +drop secret sec_broker; statement ok set streaming_use_shared_source to true; From 795a79d0ebc3514e3d23855f599389a4bb87bba3 Mon Sep 17 00:00:00 2001 From: tabVersion Date: Wed, 20 Nov 2024 22:43:45 +0800 Subject: [PATCH 44/51] resolve comments --- e2e_test/source_inline/connection/ddl.slt | 1 - src/connector/src/connector_common/common.rs | 4 ++-- .../{connection_util.rs => connection.rs} | 12 ++++++------ src/connector/src/connector_common/mod.rs | 9 ++++----- src/connector/src/sink/kafka.rs | 4 ++-- src/connector/src/source/kafka/enumerator/client.rs | 4 ++-- src/connector/src/source/kafka/mod.rs | 4 ++-- 7 files changed, 18 insertions(+), 20 deletions(-) rename src/connector/src/connector_common/{connection_util.rs => connection.rs} (93%) diff --git a/e2e_test/source_inline/connection/ddl.slt b/e2e_test/source_inline/connection/ddl.slt index 3a2f591b515be..f936519455f32 100644 --- a/e2e_test/source_inline/connection/ddl.slt +++ b/e2e_test/source_inline/connection/ddl.slt @@ -1,4 +1,3 @@ -# Note: control substitution on will force us to use "\n" instead of "\n" in commands control substitution on # for non-shared source diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index 0f49ff1c60cd0..da99819fc7537 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -163,7 +163,7 @@ impl AwsAuthProps { #[serde_as] #[derive(Debug, Clone, Deserialize, WithOptions, PartialEq, Hash, Eq)] -pub struct KafkaConnectionInner { +pub struct KafkaConnectionProps { #[serde(rename = "properties.bootstrap.server", alias = "kafka.brokers")] pub brokers: String, @@ -322,7 +322,7 @@ impl RdKafkaPropertiesCommon { } } -impl KafkaConnectionInner { +impl KafkaConnectionProps { #[cfg(test)] pub fn test_default() -> Self { Self { diff --git a/src/connector/src/connector_common/connection_util.rs b/src/connector/src/connector_common/connection.rs similarity index 93% rename from src/connector/src/connector_common/connection_util.rs rename to src/connector/src/connector_common/connection.rs index 0d2c7c55e4d3e..fa1f420544677 100644 --- a/src/connector/src/connector_common/connection_util.rs +++ b/src/connector/src/connector_common/connection.rs @@ -23,13 +23,13 @@ use serde_with::serde_as; use tonic::async_trait; use with_options::WithOptions; -use crate::connector_common::{AwsAuthProps, KafkaConnectionInner, KafkaPrivateLinkCommon}; +use crate::connector_common::{AwsAuthProps, KafkaConnectionProps, KafkaPrivateLinkCommon}; use crate::error::ConnectorResult; use crate::source::kafka::{KafkaContextCommon, RwConsumerContext}; use crate::{dispatch_connection_impl, ConnectionImpl}; #[async_trait] -pub trait ConnectionValidate { +pub trait Connection { async fn test_connection(&self) -> ConnectorResult<()>; } @@ -38,7 +38,7 @@ pub trait ConnectionValidate { #[serde(deny_unknown_fields)] pub struct KafkaConnection { #[serde(flatten)] - pub inner: KafkaConnectionInner, + pub inner: KafkaConnectionProps, #[serde(flatten)] pub kafka_private_link_common: KafkaPrivateLinkCommon, #[serde(flatten)] @@ -64,7 +64,7 @@ pub async fn validate_connection(connection: &PbConnection) -> ConnectorResult<( } #[async_trait] -impl ConnectionValidate for KafkaConnection { +impl Connection for KafkaConnection { async fn test_connection(&self) -> ConnectorResult<()> { let client = self.build_client().await?; // describe cluster here @@ -109,7 +109,7 @@ impl KafkaConnection { pub struct IcebergConnection {} #[async_trait] -impl ConnectionValidate for IcebergConnection { +impl Connection for IcebergConnection { async fn test_connection(&self) -> ConnectorResult<()> { todo!() } @@ -121,7 +121,7 @@ impl ConnectionValidate for IcebergConnection { pub struct SchemaRegistryConnection {} #[async_trait] -impl ConnectionValidate for SchemaRegistryConnection { +impl Connection for SchemaRegistryConnection { async fn test_connection(&self) -> ConnectorResult<()> { todo!() } diff --git a/src/connector/src/connector_common/mod.rs b/src/connector/src/connector_common/mod.rs index c75361132a61d..40a1a9f45c809 100644 --- a/src/connector/src/connector_common/mod.rs +++ b/src/connector/src/connector_common/mod.rs @@ -19,14 +19,13 @@ pub use mqtt_common::{MqttCommon, QualityOfService as MqttQualityOfService}; mod common; pub use common::{ - AwsAuthProps, AwsPrivateLinkItem, KafkaCommon, KafkaConnectionInner, KafkaPrivateLinkCommon, + AwsAuthProps, AwsPrivateLinkItem, KafkaCommon, KafkaConnectionProps, KafkaPrivateLinkCommon, KinesisCommon, MongodbCommon, NatsCommon, PulsarCommon, PulsarOauthCommon, RdKafkaPropertiesCommon, PRIVATE_LINK_BROKER_REWRITE_MAP_KEY, PRIVATE_LINK_TARGETS_KEY, }; -mod connection_util; -pub use connection_util::{ - validate_connection, ConnectionValidate, IcebergConnection, KafkaConnection, - SchemaRegistryConnection, +mod connection; +pub use connection::{ + validate_connection, Connection, IcebergConnection, KafkaConnection, SchemaRegistryConnection, }; mod iceberg; diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index 279d28914a2fa..9716938d7a43f 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -35,7 +35,7 @@ use with_options::WithOptions; use super::catalog::{SinkFormat, SinkFormatDesc}; use super::{Sink, SinkError, SinkParam}; use crate::connector_common::{ - AwsAuthProps, KafkaCommon, KafkaConnectionInner, KafkaPrivateLinkCommon, + AwsAuthProps, KafkaCommon, KafkaConnectionProps, KafkaPrivateLinkCommon, RdKafkaPropertiesCommon, }; use crate::sink::formatter::SinkFormatterImpl; @@ -216,7 +216,7 @@ pub struct KafkaConfig { pub common: KafkaCommon, #[serde(flatten)] - pub connection: KafkaConnectionInner, + pub connection: KafkaConnectionProps, #[serde( rename = "properties.retry.max", diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index df7cd787e061f..973086d291111 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -31,14 +31,14 @@ use crate::error::{ConnectorError, ConnectorResult}; use crate::source::base::SplitEnumerator; use crate::source::kafka::split::KafkaSplit; use crate::source::kafka::{ - KafkaConnectionInner, KafkaContextCommon, KafkaProperties, RwConsumerContext, + KafkaConnectionProps, KafkaContextCommon, KafkaProperties, RwConsumerContext, KAFKA_ISOLATION_LEVEL, }; use crate::source::SourceEnumeratorContextRef; type KafkaClientType = BaseConsumer; -pub static SHARED_KAFKA_CLIENT: LazyLock>> = +pub static SHARED_KAFKA_CLIENT: LazyLock>> = LazyLock::new(|| moka::future::Cache::builder().build()); #[derive(Debug, Copy, Clone, Eq, PartialEq)] diff --git a/src/connector/src/source/kafka/mod.rs b/src/connector/src/source/kafka/mod.rs index 34a165a5c29e3..e515aebf8d492 100644 --- a/src/connector/src/source/kafka/mod.rs +++ b/src/connector/src/source/kafka/mod.rs @@ -17,7 +17,7 @@ use std::collections::HashMap; use serde::Deserialize; use serde_with::{serde_as, DisplayFromStr}; -use crate::connector_common::{AwsAuthProps, KafkaConnectionInner, KafkaPrivateLinkCommon}; +use crate::connector_common::{AwsAuthProps, KafkaConnectionProps, KafkaPrivateLinkCommon}; mod client_context; pub mod enumerator; @@ -144,7 +144,7 @@ pub struct KafkaProperties { pub common: KafkaCommon, #[serde(flatten)] - pub connection: KafkaConnectionInner, + pub connection: KafkaConnectionProps, #[serde(flatten)] pub rdkafka_properties_common: RdKafkaPropertiesCommon, From 28bb651d208e58eb98a747aacf1a6f3bf3015ef1 Mon Sep 17 00:00:00 2001 From: tabversion Date: Mon, 25 Nov 2024 14:31:19 +0800 Subject: [PATCH 45/51] rename migration --- src/meta/model/migration/src/lib.rs | 4 ++-- ...ection_params.rs => m20241125_043732_connection_params.rs} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename src/meta/model/migration/src/{m20241118_043732_connection_params.rs => m20241125_043732_connection_params.rs} (100%) diff --git a/src/meta/model/migration/src/lib.rs b/src/meta/model/migration/src/lib.rs index 31530b0fd40da..6280756ffc101 100644 --- a/src/meta/model/migration/src/lib.rs +++ b/src/meta/model/migration/src/lib.rs @@ -25,8 +25,8 @@ mod m20240911_083152_variable_vnode_count; mod m20241016_065621_hummock_gc_history; mod m20241025_062548_singleton_vnode_count; mod m20241115_085007_remove_function_type; -mod m20241118_043732_connection_params; mod m20241120_182555_hummock_add_time_travel_sst_index; +mod m20241125_043732_connection_params; mod utils; pub struct Migrator; @@ -90,8 +90,8 @@ impl MigratorTrait for Migrator { Box::new(m20241016_065621_hummock_gc_history::Migration), Box::new(m20241025_062548_singleton_vnode_count::Migration), Box::new(m20241115_085007_remove_function_type::Migration), - Box::new(m20241118_043732_connection_params::Migration), Box::new(m20241120_182555_hummock_add_time_travel_sst_index::Migration), + Box::new(m20241125_043732_connection_params::Migration), ] } } diff --git a/src/meta/model/migration/src/m20241118_043732_connection_params.rs b/src/meta/model/migration/src/m20241125_043732_connection_params.rs similarity index 100% rename from src/meta/model/migration/src/m20241118_043732_connection_params.rs rename to src/meta/model/migration/src/m20241125_043732_connection_params.rs From 9c0e9dfc4b58478ba48092817b7b8b3d839cf3b8 Mon Sep 17 00:00:00 2001 From: Bohan Zhang Date: Mon, 25 Nov 2024 17:09:34 +0800 Subject: [PATCH 46/51] refactor: make object ref outside `ast::Value` (#19559) Co-authored-by: tabversion --- src/frontend/src/utils/with_options.rs | 15 ++--- src/meta/src/manager/diagnose.rs | 4 +- src/sqlparser/src/ast/legacy_source.rs | 18 +++--- src/sqlparser/src/ast/mod.rs | 45 ++++++++++++++- src/sqlparser/src/ast/statement.rs | 2 +- src/sqlparser/src/ast/value.rs | 6 -- src/sqlparser/src/parser.rs | 67 ++++++++++++++++------- src/sqlparser/tests/sqlparser_common.rs | 8 +-- src/sqlparser/tests/sqlparser_postgres.rs | 6 +- 9 files changed, 117 insertions(+), 54 deletions(-) diff --git a/src/frontend/src/utils/with_options.rs b/src/frontend/src/utils/with_options.rs index 62d0bd2170e6f..b2c564098a45c 100644 --- a/src/frontend/src/utils/with_options.rs +++ b/src/frontend/src/utils/with_options.rs @@ -30,7 +30,8 @@ use risingwave_pb::secret::secret_ref::PbRefAsType; use risingwave_pb::secret::PbSecretRef; use risingwave_sqlparser::ast::{ ConnectionRefValue, CreateConnectionStatement, CreateSinkStatement, CreateSourceStatement, - CreateSubscriptionStatement, SecretRefAsType, SecretRefValue, SqlOption, Statement, Value, + CreateSubscriptionStatement, SecretRefAsType, SecretRefValue, SqlOption, SqlOptionValue, + Statement, Value, }; use super::OverwriteOptions; @@ -346,7 +347,7 @@ impl TryFrom<&[SqlOption]> for WithOptions { for option in options { let key = option.name.real_value(); match &option.value { - Value::SecretRef(r) => { + SqlOptionValue::SecretRef(r) => { if secret_ref.insert(key.clone(), r.clone()).is_some() || inner.contains_key(&key) { @@ -357,7 +358,7 @@ impl TryFrom<&[SqlOption]> for WithOptions { } continue; } - Value::ConnectionRef(r) => { + SqlOptionValue::ConnectionRef(r) => { if key != CONNECTION_REF_KEY { return Err(RwError::from(ErrorCode::InvalidParameterValue(format!( "expect 'profile' as the key for connection ref, but got: {}", @@ -377,10 +378,10 @@ impl TryFrom<&[SqlOption]> for WithOptions { _ => {} } let value: String = match option.value.clone() { - Value::CstyleEscapedString(s) => s.value, - Value::SingleQuotedString(s) => s, - Value::Number(n) => n, - Value::Boolean(b) => b.to_string(), + SqlOptionValue::Value(Value::CstyleEscapedString(s)) => s.value, + SqlOptionValue::Value(Value::SingleQuotedString(s)) => s, + SqlOptionValue::Value(Value::Number(n)) => n, + SqlOptionValue::Value(Value::Boolean(b)) => b.to_string(), _ => { return Err(RwError::from(ErrorCode::InvalidParameterValue( "`with options` or `with properties` only support single quoted string value and C style escaped string" diff --git a/src/meta/src/manager/diagnose.rs b/src/meta/src/manager/diagnose.rs index fd4d67c256685..33a97537ab467 100644 --- a/src/meta/src/manager/diagnose.rs +++ b/src/meta/src/manager/diagnose.rs @@ -759,12 +759,12 @@ fn redact_all_sql_options(sql: &str) -> Option { }; if let Some(options) = options.0 { for option in options { - option.value = Value::SingleQuotedString("[REDACTED]".into()); + option.value = Value::SingleQuotedString("[REDACTED]".into()).into(); } } if let Some(options) = options.1 { for option in options { - option.value = Value::SingleQuotedString("[REDACTED]".into()); + option.value = Value::SingleQuotedString("[REDACTED]".into()).into(); } } writeln!(&mut redacted, "{statement}").unwrap(); diff --git a/src/sqlparser/src/ast/legacy_source.rs b/src/sqlparser/src/ast/legacy_source.rs index 7a5abf35a2df8..592cbf79c5816 100644 --- a/src/sqlparser/src/ast/legacy_source.rs +++ b/src/sqlparser/src/ast/legacy_source.rs @@ -163,7 +163,7 @@ impl LegacyRowFormat { value: "message".into(), quote_style: None, }]), - value: Value::SingleQuotedString(schema.message_name.0), + value: Value::SingleQuotedString(schema.message_name.0).into(), }]; if schema.use_schema_registry { options.push(SqlOption { @@ -171,7 +171,7 @@ impl LegacyRowFormat { value: "schema.registry".into(), quote_style: None, }]), - value: Value::SingleQuotedString(schema.row_schema_location.0), + value: Value::SingleQuotedString(schema.row_schema_location.0).into(), }); } else { options.push(SqlOption { @@ -179,7 +179,7 @@ impl LegacyRowFormat { value: "schema.location".into(), quote_style: None, }]), - value: Value::SingleQuotedString(schema.row_schema_location.0), + value: Value::SingleQuotedString(schema.row_schema_location.0).into(), }) } options @@ -191,7 +191,7 @@ impl LegacyRowFormat { value: "schema.registry".into(), quote_style: None, }]), - value: Value::SingleQuotedString(schema.row_schema_location.0), + value: Value::SingleQuotedString(schema.row_schema_location.0).into(), }] } else { vec![SqlOption { @@ -199,7 +199,7 @@ impl LegacyRowFormat { value: "schema.location".into(), quote_style: None, }]), - value: Value::SingleQuotedString(schema.row_schema_location.0), + value: Value::SingleQuotedString(schema.row_schema_location.0).into(), }] } } @@ -209,7 +209,7 @@ impl LegacyRowFormat { value: "schema.registry".into(), quote_style: None, }]), - value: Value::SingleQuotedString(schema.row_schema_location.0), + value: Value::SingleQuotedString(schema.row_schema_location.0).into(), }] } LegacyRowFormat::Csv(schema) => { @@ -221,7 +221,8 @@ impl LegacyRowFormat { }]), value: Value::SingleQuotedString( String::from_utf8_lossy(&[schema.delimiter]).into(), - ), + ) + .into(), }, SqlOption { name: ObjectName(vec![Ident { @@ -232,7 +233,8 @@ impl LegacyRowFormat { "false".into() } else { "true".into() - }), + }) + .into(), }, ] } diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index bae38996066ef..9c48f1d03ce14 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -2736,7 +2736,7 @@ impl ParseTo for ObjectType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct SqlOption { pub name: ObjectName, - pub value: Value, + pub value: SqlOptionValue, } impl fmt::Display for SqlOption { @@ -2755,6 +2755,44 @@ impl fmt::Display for SqlOption { } } +#[derive(Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum SqlOptionValue { + Value(Value), + SecretRef(SecretRefValue), + ConnectionRef(ConnectionRefValue), +} + +impl fmt::Debug for SqlOptionValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SqlOptionValue::Value(value) => write!(f, "{:?}", value), + SqlOptionValue::SecretRef(secret_ref) => write!(f, "secret {:?}", secret_ref), + SqlOptionValue::ConnectionRef(connection_ref) => { + write!(f, "connection {:?}", connection_ref) + } + } + } +} + +impl fmt::Display for SqlOptionValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SqlOptionValue::Value(value) => write!(f, "{}", value), + SqlOptionValue::SecretRef(secret_ref) => write!(f, "secret {}", secret_ref), + SqlOptionValue::ConnectionRef(connection_ref) => { + write!(f, "connection {}", connection_ref) + } + } + } +} + +impl From for SqlOptionValue { + fn from(value: Value) -> Self { + SqlOptionValue::Value(value) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum EmitMode { @@ -3148,7 +3186,10 @@ impl TryFrom> for CreateFunctionWithOptions { let mut always_retry_on_network_error = None; for option in with_options { if option.name.to_string().to_lowercase() == "always_retry_on_network_error" { - always_retry_on_network_error = Some(option.value == Value::Boolean(true)); + always_retry_on_network_error = Some(matches!( + option.value, + SqlOptionValue::Value(Value::Boolean(true)) + )); } else { return Err(StrError(format!("Unsupported option: {}", option.name))); } diff --git a/src/sqlparser/src/ast/statement.rs b/src/sqlparser/src/ast/statement.rs index 72680161defea..4d4b62f8f97e2 100644 --- a/src/sqlparser/src/ast/statement.rs +++ b/src/sqlparser/src/ast/statement.rs @@ -845,7 +845,7 @@ impl ParseTo for CreateSecretStatement { impl_parse_to!(with_properties: WithProperties, parser); let mut credential = Value::Null; if parser.parse_keyword(Keyword::AS) { - credential = parser.parse_value()?; + credential = parser.ensure_parse_value()?; } Ok(Self { if_not_exists, diff --git a/src/sqlparser/src/ast/value.rs b/src/sqlparser/src/ast/value.rs index 051e0814a3d9f..7f6cdb048b4e6 100644 --- a/src/sqlparser/src/ast/value.rs +++ b/src/sqlparser/src/ast/value.rs @@ -59,10 +59,6 @@ pub enum Value { }, /// `NULL` value Null, - /// name of the reference to secret - SecretRef(SecretRefValue), - /// name of the reference to connection - ConnectionRef(ConnectionRefValue), } impl fmt::Display for Value { @@ -117,8 +113,6 @@ impl fmt::Display for Value { Ok(()) } Value::Null => write!(f, "NULL"), - Value::SecretRef(v) => write!(f, "secret {}", v), - Value::ConnectionRef(v) => write!(f, "connection {}", v), } } } diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 9d7f308b7833f..bae54c21124a6 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -583,7 +583,7 @@ impl Parser<'_> { Token::Word(w) => match w.keyword { Keyword::TRUE | Keyword::FALSE | Keyword::NULL => { *self = checkpoint; - Ok(Expr::Value(self.parse_value()?)) + Ok(Expr::Value(self.ensure_parse_value()?)) } Keyword::CASE => self.parse_case_expr(), Keyword::CAST => self.parse_cast_expr(), @@ -706,7 +706,7 @@ impl Parser<'_> { | Token::HexStringLiteral(_) | Token::CstyleEscapesString(_) => { *self = checkpoint; - Ok(Expr::Value(self.parse_value()?)) + Ok(Expr::Value(self.ensure_parse_value()?)) } Token::Parameter(number) => self.parse_param(number), Token::Pipe => { @@ -2941,7 +2941,7 @@ impl Parser<'_> { pub fn parse_sql_option(&mut self) -> PResult { let name = self.parse_object_name()?; self.expect_token(&Token::Eq)?; - let value = self.parse_value()?; + let value = self.parse_value_and_obj_ref::()?; Ok(SqlOption { name, value }) } @@ -3592,44 +3592,69 @@ impl Parser<'_> { values } + pub fn ensure_parse_value(&mut self) -> PResult { + match self.parse_value_and_obj_ref::()? { + SqlOptionValue::Value(value) => Ok(value), + SqlOptionValue::SecretRef(_) | SqlOptionValue::ConnectionRef(_) => unreachable!(), + } + } + /// Parse a literal value (numbers, strings, date/time, booleans) - pub fn parse_value(&mut self) -> PResult { + pub fn parse_value_and_obj_ref( + &mut self, + ) -> PResult { let checkpoint = *self; let token = self.next_token(); match token.token { Token::Word(w) => match w.keyword { - Keyword::TRUE => Ok(Value::Boolean(true)), - Keyword::FALSE => Ok(Value::Boolean(false)), - Keyword::NULL => Ok(Value::Null), + Keyword::TRUE => Ok(Value::Boolean(true).into()), + Keyword::FALSE => Ok(Value::Boolean(false).into()), + Keyword::NULL => Ok(Value::Null.into()), Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style { - Some('"') => Ok(Value::DoubleQuotedString(w.value)), - Some('\'') => Ok(Value::SingleQuotedString(w.value)), + Some('"') => Ok(Value::DoubleQuotedString(w.value).into()), + Some('\'') => Ok(Value::SingleQuotedString(w.value).into()), _ => self.expected_at(checkpoint, "A value")?, }, Keyword::SECRET => { + if FORBID_OBJ_REF { + return self.expected_at( + checkpoint, + "a concrete value rather than a secret reference", + ); + } let secret_name = self.parse_object_name()?; let ref_as = if self.parse_keywords(&[Keyword::AS, Keyword::FILE]) { SecretRefAsType::File } else { SecretRefAsType::Text }; - Ok(Value::SecretRef(SecretRefValue { + Ok(SqlOptionValue::SecretRef(SecretRefValue { secret_name, ref_as, })) } Keyword::CONNECTION => { + if FORBID_OBJ_REF { + return self.expected_at( + checkpoint, + "a concrete value rather than a connection reference", + ); + } let connection_name = self.parse_object_name()?; - Ok(Value::ConnectionRef(ConnectionRefValue { connection_name })) + Ok(SqlOptionValue::ConnectionRef(ConnectionRefValue { + connection_name, + })) } _ => self.expected_at(checkpoint, "a concrete value"), }, - Token::Number(ref n) => Ok(Value::Number(n.clone())), - Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), - Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())), - Token::CstyleEscapesString(ref s) => Ok(Value::CstyleEscapedString(s.clone())), - Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), - Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), + Token::Number(ref n) => Ok(Value::Number(n.clone()).into()), + Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string()).into()), + Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone()).into()), + Token::CstyleEscapesString(ref s) => Ok(Value::CstyleEscapedString(s.clone()).into()), + Token::NationalStringLiteral(ref s) => { + Ok(Value::NationalStringLiteral(s.to_string()).into()) + } + Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string()).into()), _ => self.expected_at(checkpoint, "a value"), } } @@ -3640,7 +3665,7 @@ impl Parser<'_> { separated( 1.., alt(( - Self::parse_value.map(SetVariableValueSingle::Literal), + Self::ensure_parse_value.map(SetVariableValueSingle::Literal), |parser: &mut Self| { let checkpoint = *parser; let ident = parser.parse_identifier()?; @@ -3667,7 +3692,7 @@ impl Parser<'_> { pub fn parse_number_value(&mut self) -> PResult { let checkpoint = *self; - match self.parse_value()? { + match self.ensure_parse_value()? { Value::Number(v) => Ok(v), _ => self.expected_at(checkpoint, "literal number"), } @@ -4352,7 +4377,7 @@ impl Parser<'_> { })), ), Self::parse_identifier.map(SetTimeZoneValue::Ident), - Self::parse_value.map(SetTimeZoneValue::Literal), + Self::ensure_parse_value.map(SetTimeZoneValue::Literal), )) .expect("variable") .parse_next(self)?; @@ -4371,7 +4396,7 @@ impl Parser<'_> { }) } else if self.parse_keyword(Keyword::TRANSACTION) && modifier.is_none() { if self.parse_keyword(Keyword::SNAPSHOT) { - let snapshot_id = self.parse_value()?; + let snapshot_id = self.ensure_parse_value()?; return Ok(Statement::SetTransaction { modes: vec![], snapshot: Some(snapshot_id), diff --git a/src/sqlparser/tests/sqlparser_common.rs b/src/sqlparser/tests/sqlparser_common.rs index c8f6fb41d32a9..71af4bba29871 100644 --- a/src/sqlparser/tests/sqlparser_common.rs +++ b/src/sqlparser/tests/sqlparser_common.rs @@ -1543,11 +1543,11 @@ fn parse_create_table_with_options() { vec![ SqlOption { name: vec!["foo".into()].into(), - value: Value::SingleQuotedString("bar".into()) + value: Value::SingleQuotedString("bar".into()).into(), }, SqlOption { name: vec!["a".into()].into(), - value: number("123") + value: number("123").into(), }, ], with_options @@ -3145,11 +3145,11 @@ fn parse_create_view_with_options() { vec![ SqlOption { name: vec!["foo".into()].into(), - value: Value::SingleQuotedString("bar".into()) + value: Value::SingleQuotedString("bar".into()).into(), }, SqlOption { name: vec!["a".into()].into(), - value: number("123") + value: number("123").into(), }, ], with_options diff --git a/src/sqlparser/tests/sqlparser_postgres.rs b/src/sqlparser/tests/sqlparser_postgres.rs index 549920d1c7585..a2edb08edda44 100644 --- a/src/sqlparser/tests/sqlparser_postgres.rs +++ b/src/sqlparser/tests/sqlparser_postgres.rs @@ -159,15 +159,15 @@ fn parse_create_table_with_defaults() { vec![ SqlOption { name: vec!["fillfactor".into()].into(), - value: number("20") + value: number("20").into(), }, SqlOption { name: vec!["user_catalog_table".into()].into(), - value: Value::Boolean(true) + value: Value::Boolean(true).into(), }, SqlOption { name: vec!["autovacuum_vacuum_threshold".into()].into(), - value: number("100") + value: number("100").into(), }, ] ); From d24fafb0b7deb1e23a6323c77e2a6645a2e69143 Mon Sep 17 00:00:00 2001 From: tabversion Date: Tue, 26 Nov 2024 13:29:10 +0800 Subject: [PATCH 47/51] rerun Signed-off-by: tabversion From dc3fd25c01c5ddadd4274f9efd91ecb0ef636cad Mon Sep 17 00:00:00 2001 From: tabversion Date: Tue, 26 Nov 2024 16:15:07 +0800 Subject: [PATCH 48/51] fix catalog proto --- proto/catalog.proto | 2 +- .../src/connector_common/connection.rs | 27 ++++++------ src/ctl/src/cmd_impl/meta/connection.rs | 22 +++++----- .../src/catalog/connection_catalog.rs | 19 +++++---- .../rw_catalog/rw_connections.rs | 15 ++++--- src/frontend/src/handler/show.rs | 42 +++++++++---------- src/frontend/src/utils/with_options.rs | 3 +- src/meta/model/src/connection.rs | 5 +-- src/meta/service/src/ddl_service.rs | 4 +- src/meta/src/controller/catalog.rs | 3 +- src/meta/src/controller/mod.rs | 12 +++--- 11 files changed, 75 insertions(+), 79 deletions(-) diff --git a/proto/catalog.proto b/proto/catalog.proto index d8704e3ac7ba9..d396d27cf03a5 100644 --- a/proto/catalog.proto +++ b/proto/catalog.proto @@ -269,9 +269,9 @@ message Connection { string name = 4; oneof info { PrivateLinkService private_link_service = 5 [deprecated = true]; - ConnectionParams connection_params = 7; } uint32 owner = 6; + ConnectionParams connection_params = 7; } message Index { diff --git a/src/connector/src/connector_common/connection.rs b/src/connector/src/connector_common/connection.rs index fa1f420544677..bb80a5e9f92b4 100644 --- a/src/connector/src/connector_common/connection.rs +++ b/src/connector/src/connector_common/connection.rs @@ -46,20 +46,23 @@ pub struct KafkaConnection { } pub async fn validate_connection(connection: &PbConnection) -> ConnectorResult<()> { - if let Some(ref info) = connection.info { - match info { - risingwave_pb::catalog::connection::Info::ConnectionParams(cp) => { - let options = cp.properties.clone().into_iter().collect(); - let secret_refs = cp.secret_refs.clone().into_iter().collect(); - let props_secret_resolved = - LocalSecretManager::global().fill_secrets(options, secret_refs)?; - let connection_impl = - ConnectionImpl::from_proto(cp.connection_type(), props_secret_resolved)?; - dispatch_connection_impl!(connection_impl, inner, inner.test_connection().await?) - } - risingwave_pb::catalog::connection::Info::PrivateLinkService(_) => unreachable!(), + #[cfg(debug_assertions)] + { + if let Some(risingwave_pb::catalog::connection::Info::PrivateLinkService(_)) = + connection.info + { + unreachable!("private link has been deprecated.") } } + if let Some(ref cp) = connection.connection_params { + let options = cp.properties.clone().into_iter().collect(); + let secret_refs = cp.secret_refs.clone().into_iter().collect(); + let props_secret_resolved = + LocalSecretManager::global().fill_secrets(options, secret_refs)?; + let connection_impl = + ConnectionImpl::from_proto(cp.connection_type(), props_secret_resolved)?; + dispatch_connection_impl!(connection_impl, inner, inner.test_connection().await?) + } Ok(()) } diff --git a/src/ctl/src/cmd_impl/meta/connection.rs b/src/ctl/src/cmd_impl/meta/connection.rs index c7bd94b58de62..e19fdcec48423 100644 --- a/src/ctl/src/cmd_impl/meta/connection.rs +++ b/src/ctl/src/cmd_impl/meta/connection.rs @@ -30,19 +30,19 @@ pub async fn list_connections(context: &CtlContext) -> anyhow::Result<()> { "Connection#{}, connection_name: {}, {}", conn.id, conn.name, - match conn.info { - Some(Info::PrivateLinkService(svc)) => format!( + if let Some(Info::PrivateLinkService(svc)) = conn.info { + format!( "PrivateLink: service_name: {}, endpoint_id: {}, dns_entries: {:?}", svc.service_name, svc.endpoint_id, svc.dns_entries, - ), - Some(Info::ConnectionParams(params)) => { - format!( - "CONNECTION_PARAMS_{}: {}", - params.get_connection_type().unwrap().as_str_name(), - serde_json::to_string(¶ms.get_properties()).unwrap() - ) - } - None => "None".to_string(), + ) + } else if let Some(ref params) = conn.connection_params { + format!( + "{}: {}", + params.get_connection_type().unwrap().as_str_name(), + serde_json::to_string(¶ms.get_properties()).unwrap() + ) + } else { + "None".to_string() } ); } diff --git a/src/frontend/src/catalog/connection_catalog.rs b/src/frontend/src/catalog/connection_catalog.rs index a938328c46d8f..ff0a4a5aed19e 100644 --- a/src/frontend/src/catalog/connection_catalog.rs +++ b/src/frontend/src/catalog/connection_catalog.rs @@ -13,7 +13,7 @@ // limitations under the License. use risingwave_pb::catalog::connection::Info; -use risingwave_pb::catalog::{connection, PbConnection}; +use risingwave_pb::catalog::{connection, ConnectionParams, PbConnection}; use crate::catalog::{ConnectionId, OwnedByUserCatalog}; use crate::user::UserId; @@ -23,21 +23,25 @@ pub struct ConnectionCatalog { pub id: ConnectionId, pub name: String, pub info: connection::Info, + pub connection_params: Option, pub owner: UserId, } impl ConnectionCatalog { pub fn connection_type(&self) -> &str { - match &self.info { - Info::PrivateLinkService(srv) => srv.get_provider().unwrap().as_str_name(), - Info::ConnectionParams(params) => params.get_connection_type().unwrap().as_str_name(), + if let Some(ref cp) = self.connection_params { + cp.get_connection_type().unwrap().as_str_name() + } else { + let Info::PrivateLinkService(ref srv) = self.info; + srv.get_provider().unwrap().as_str_name() } } pub fn provider(&self) -> &str { - match &self.info { - Info::PrivateLinkService(_) => "PRIVATELINK", - Info::ConnectionParams(_) => panic!("ConnectionParams is not supported as provider."), + if matches!(self.info, connection::Info::PrivateLinkService(_)) { + "PRIVATELINK" + } else { + unreachable!() } } } @@ -48,6 +52,7 @@ impl From<&PbConnection> for ConnectionCatalog { id: prost.id, name: prost.name.clone(), info: prost.info.clone().unwrap(), + connection_params: prost.connection_params.clone(), owner: prost.owner, } } diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs index 7eae4c37ee418..32365e5cb75e6 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs @@ -14,6 +14,7 @@ use risingwave_common::types::Fields; use risingwave_frontend_macro::system_catalog; +use risingwave_pb::catalog::connection::Info; use crate::catalog::system_catalog::SysCatalogReaderImpl; use crate::error::Result; @@ -51,14 +52,12 @@ fn read_rw_connections(reader: &SysCatalogReaderImpl) -> Result { - rw_connection.provider = conn.provider().into(); - } - risingwave_pb::catalog::connection::Info::ConnectionParams(params) => { - rw_connection.connection_params = print_connection_params(params, schema); - } - }; + if let Some(ref params) = conn.connection_params { + rw_connection.connection_params = print_connection_params(params, schema); + } else if matches!(conn.info, Info::PrivateLinkService(_)) { + // make sure the connection is private link service or it can lead to panic + rw_connection.provider = conn.provider().into(); + } rw_connection }) diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index d7bd80b5a7538..6c5c1fd028967 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -410,13 +410,12 @@ pub async fn handle_show_object( .iter_connections() .map(|c| { let name = c.name.clone(); - let r#type = match &c.info { - connection::Info::PrivateLinkService(_) => { - PRIVATELINK_CONNECTION.to_string() - }, - connection::Info::ConnectionParams(params) => { - params.get_connection_type().unwrap().as_str_name().to_string() - } + let r#type = if let Some(ref cp) = c.connection_params { + cp.get_connection_type().unwrap().as_str_name().to_string() + } else if matches!(c.info, connection::Info::PrivateLinkService(_)) { + PRIVATELINK_CONNECTION.to_string() + } else { + unreachable!() }; let source_names = schema .get_source_ids_by_connection(c.id) @@ -430,22 +429,19 @@ pub async fn handle_show_object( .into_iter() .filter_map(|sid| schema.get_sink_by_id(&sid).map(|catalog| catalog.name.as_str())) .collect_vec(); - let properties = match &c.info { - connection::Info::PrivateLinkService(i) => { - format!( - "provider: {}\nservice_name: {}\nendpoint_id: {}\navailability_zones: {}\nsources: {}\nsinks: {}", - i.get_provider().unwrap().as_str_name(), - i.service_name, - i.endpoint_id, - serde_json::to_string(&i.dns_entries.keys().collect_vec()).unwrap(), - serde_json::to_string(&source_names).unwrap(), - serde_json::to_string(&sink_names).unwrap(), - ) - } - connection::Info::ConnectionParams(params) => { - // todo: show dep relations - print_connection_params(params, schema) - } + let properties = if let Some(ref params) = c.connection_params { + print_connection_params(params, schema) + } else { + let connection::Info::PrivateLinkService(ref srv) = c.info; + format!( + "provider: {}\nservice_name: {}\nendpoint_id: {}\navailability_zones: {}\nsources: {}\nsinks: {}", + srv.get_provider().unwrap().as_str_name(), + srv.service_name, + srv.endpoint_id, + serde_json::to_string(&srv.dns_entries.keys().collect_vec()).unwrap(), + serde_json::to_string(&source_names).unwrap(), + serde_json::to_string(&sink_names).unwrap(), + ) }; ShowConnectionRow { name, diff --git a/src/frontend/src/utils/with_options.rs b/src/frontend/src/utils/with_options.rs index b2c564098a45c..f2e480fccd671 100644 --- a/src/frontend/src/utils/with_options.rs +++ b/src/frontend/src/utils/with_options.rs @@ -24,7 +24,6 @@ use risingwave_connector::source::kafka::private_link::{ }; pub use risingwave_connector::WithOptionsSecResolved; use risingwave_connector::WithPropertiesExt; -use risingwave_pb::catalog::connection::Info as ConnectionInfo; use risingwave_pb::catalog::connection_params::PbConnectionType; use risingwave_pb::secret::secret_ref::PbRefAsType; use risingwave_pb::secret::PbSecretRef; @@ -206,7 +205,7 @@ pub(crate) fn resolve_connection_ref_and_secret_ref( )?; let connection_catalog = session.get_connection_by_name(schema_name, &connection_name)?; - if let ConnectionInfo::ConnectionParams(params) = &connection_catalog.info { + if let Some(ref params) = connection_catalog.connection_params { connection_id = Some(connection_catalog.id); Some(params.clone()) } else { diff --git a/src/meta/model/src/connection.rs b/src/meta/model/src/connection.rs index eb93a1d82fd38..70efaf6616f98 100644 --- a/src/meta/model/src/connection.rs +++ b/src/meta/model/src/connection.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use risingwave_pb::catalog::connection::PbInfo; use risingwave_pb::catalog::PbConnection; use sea_orm::entity::prelude::*; use sea_orm::ActiveValue::Set; @@ -70,9 +69,7 @@ impl ActiveModelBehavior for ActiveModel {} impl From for ActiveModel { fn from(conn: PbConnection) -> Self { - let Some(PbInfo::ConnectionParams(connection_params)) = conn.info else { - unreachable!("private link has been deprecated.") - }; + let connection_params = conn.connection_params.unwrap(); Self { connection_id: Set(conn.id as _), diff --git a/src/meta/service/src/ddl_service.rs b/src/meta/service/src/ddl_service.rs index 9f4d9b330c24c..b18dcf88a371e 100644 --- a/src/meta/service/src/ddl_service.rs +++ b/src/meta/service/src/ddl_service.rs @@ -25,7 +25,6 @@ use risingwave_connector::sink::catalog::SinkId; use risingwave_meta::manager::{EventLogManagerRef, MetadataManager}; use risingwave_meta::rpc::metrics::MetaMetrics; use risingwave_meta_model::ObjectId; -use risingwave_pb::catalog::connection::Info as ConnectionInfo; use risingwave_pb::catalog::{Comment, Connection, CreateType, Secret, Table}; use risingwave_pb::common::worker_node::State; use risingwave_pb::common::WorkerType; @@ -753,7 +752,8 @@ impl DdlService for DdlServiceImpl { schema_id: req.schema_id, database_id: req.database_id, name: req.name, - info: Some(ConnectionInfo::ConnectionParams(params)), + info: None, + connection_params: Some(params), owner: req.owner_id, }; let version = self diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs index 847eb1c58728b..ef67090058770 100644 --- a/src/meta/src/controller/catalog.rs +++ b/src/meta/src/controller/catalog.rs @@ -36,7 +36,6 @@ use risingwave_meta_model::{ SourceId, StreamNode, StreamSourceInfo, StreamingParallelism, SubscriptionId, TableId, UserId, ViewId, }; -use risingwave_pb::catalog::connection::Info as ConnectionInfo; use risingwave_pb::catalog::subscription::SubscriptionState; use risingwave_pb::catalog::table::PbTableType; use risingwave_pb::catalog::{ @@ -1468,7 +1467,7 @@ impl CatalogController { check_connection_name_duplicate(&pb_connection, &txn).await?; let mut dep_secrets = HashSet::new(); - if let Some(ConnectionInfo::ConnectionParams(params)) = &pb_connection.info { + if let Some(ref params) = &pb_connection.connection_params { dep_secrets.extend( params .secret_refs diff --git a/src/meta/src/controller/mod.rs b/src/meta/src/controller/mod.rs index d3634b655ae6e..a276657bb18c0 100644 --- a/src/meta/src/controller/mod.rs +++ b/src/meta/src/controller/mod.rs @@ -19,7 +19,7 @@ use risingwave_common::hash::VnodeCount; use risingwave_common::util::epoch::Epoch; use risingwave_meta_model::{ connection, database, function, index, object, schema, secret, sink, source, subscription, - table, view, PrivateLinkService, + table, view, }; use risingwave_meta_model_migration::{MigrationStatus, Migrator, MigratorTrait}; use risingwave_pb::catalog::connection::PbInfo as PbConnectionInfo; @@ -360,18 +360,16 @@ impl From> for PbView { impl From> for PbConnection { fn from(value: ObjectModel) -> Self { - let info: PbConnectionInfo = if value.0.info == PrivateLinkService::default() { - PbConnectionInfo::ConnectionParams(value.0.params.to_protobuf()) - } else { - PbConnectionInfo::PrivateLinkService(value.0.info.to_protobuf()) - }; Self { id: value.1.oid as _, schema_id: value.1.schema_id.unwrap() as _, database_id: value.1.database_id.unwrap() as _, name: value.0.name, owner: value.1.owner_id as _, - info: Some(info), + info: Some(PbConnectionInfo::PrivateLinkService( + value.0.info.to_protobuf(), + )), + connection_params: Some(value.0.params.to_protobuf()), } } } From 401660bb6d50dbd1de42c707ff48a7c04ce47e99 Mon Sep 17 00:00:00 2001 From: tabversion Date: Tue, 26 Nov 2024 17:14:35 +0800 Subject: [PATCH 49/51] Revert "fix catalog proto" This reverts commit dc3fd25c01c5ddadd4274f9efd91ecb0ef636cad. --- proto/catalog.proto | 2 +- .../src/connector_common/connection.rs | 27 ++++++------ src/ctl/src/cmd_impl/meta/connection.rs | 22 +++++----- .../src/catalog/connection_catalog.rs | 19 ++++----- .../rw_catalog/rw_connections.rs | 15 +++---- src/frontend/src/handler/show.rs | 42 ++++++++++--------- src/frontend/src/utils/with_options.rs | 3 +- src/meta/model/src/connection.rs | 5 ++- src/meta/service/src/ddl_service.rs | 4 +- src/meta/src/controller/catalog.rs | 3 +- src/meta/src/controller/mod.rs | 12 +++--- 11 files changed, 79 insertions(+), 75 deletions(-) diff --git a/proto/catalog.proto b/proto/catalog.proto index d396d27cf03a5..d8704e3ac7ba9 100644 --- a/proto/catalog.proto +++ b/proto/catalog.proto @@ -269,9 +269,9 @@ message Connection { string name = 4; oneof info { PrivateLinkService private_link_service = 5 [deprecated = true]; + ConnectionParams connection_params = 7; } uint32 owner = 6; - ConnectionParams connection_params = 7; } message Index { diff --git a/src/connector/src/connector_common/connection.rs b/src/connector/src/connector_common/connection.rs index bb80a5e9f92b4..fa1f420544677 100644 --- a/src/connector/src/connector_common/connection.rs +++ b/src/connector/src/connector_common/connection.rs @@ -46,23 +46,20 @@ pub struct KafkaConnection { } pub async fn validate_connection(connection: &PbConnection) -> ConnectorResult<()> { - #[cfg(debug_assertions)] - { - if let Some(risingwave_pb::catalog::connection::Info::PrivateLinkService(_)) = - connection.info - { - unreachable!("private link has been deprecated.") + if let Some(ref info) = connection.info { + match info { + risingwave_pb::catalog::connection::Info::ConnectionParams(cp) => { + let options = cp.properties.clone().into_iter().collect(); + let secret_refs = cp.secret_refs.clone().into_iter().collect(); + let props_secret_resolved = + LocalSecretManager::global().fill_secrets(options, secret_refs)?; + let connection_impl = + ConnectionImpl::from_proto(cp.connection_type(), props_secret_resolved)?; + dispatch_connection_impl!(connection_impl, inner, inner.test_connection().await?) + } + risingwave_pb::catalog::connection::Info::PrivateLinkService(_) => unreachable!(), } } - if let Some(ref cp) = connection.connection_params { - let options = cp.properties.clone().into_iter().collect(); - let secret_refs = cp.secret_refs.clone().into_iter().collect(); - let props_secret_resolved = - LocalSecretManager::global().fill_secrets(options, secret_refs)?; - let connection_impl = - ConnectionImpl::from_proto(cp.connection_type(), props_secret_resolved)?; - dispatch_connection_impl!(connection_impl, inner, inner.test_connection().await?) - } Ok(()) } diff --git a/src/ctl/src/cmd_impl/meta/connection.rs b/src/ctl/src/cmd_impl/meta/connection.rs index e19fdcec48423..c7bd94b58de62 100644 --- a/src/ctl/src/cmd_impl/meta/connection.rs +++ b/src/ctl/src/cmd_impl/meta/connection.rs @@ -30,19 +30,19 @@ pub async fn list_connections(context: &CtlContext) -> anyhow::Result<()> { "Connection#{}, connection_name: {}, {}", conn.id, conn.name, - if let Some(Info::PrivateLinkService(svc)) = conn.info { - format!( + match conn.info { + Some(Info::PrivateLinkService(svc)) => format!( "PrivateLink: service_name: {}, endpoint_id: {}, dns_entries: {:?}", svc.service_name, svc.endpoint_id, svc.dns_entries, - ) - } else if let Some(ref params) = conn.connection_params { - format!( - "{}: {}", - params.get_connection_type().unwrap().as_str_name(), - serde_json::to_string(¶ms.get_properties()).unwrap() - ) - } else { - "None".to_string() + ), + Some(Info::ConnectionParams(params)) => { + format!( + "CONNECTION_PARAMS_{}: {}", + params.get_connection_type().unwrap().as_str_name(), + serde_json::to_string(¶ms.get_properties()).unwrap() + ) + } + None => "None".to_string(), } ); } diff --git a/src/frontend/src/catalog/connection_catalog.rs b/src/frontend/src/catalog/connection_catalog.rs index ff0a4a5aed19e..a938328c46d8f 100644 --- a/src/frontend/src/catalog/connection_catalog.rs +++ b/src/frontend/src/catalog/connection_catalog.rs @@ -13,7 +13,7 @@ // limitations under the License. use risingwave_pb::catalog::connection::Info; -use risingwave_pb::catalog::{connection, ConnectionParams, PbConnection}; +use risingwave_pb::catalog::{connection, PbConnection}; use crate::catalog::{ConnectionId, OwnedByUserCatalog}; use crate::user::UserId; @@ -23,25 +23,21 @@ pub struct ConnectionCatalog { pub id: ConnectionId, pub name: String, pub info: connection::Info, - pub connection_params: Option, pub owner: UserId, } impl ConnectionCatalog { pub fn connection_type(&self) -> &str { - if let Some(ref cp) = self.connection_params { - cp.get_connection_type().unwrap().as_str_name() - } else { - let Info::PrivateLinkService(ref srv) = self.info; - srv.get_provider().unwrap().as_str_name() + match &self.info { + Info::PrivateLinkService(srv) => srv.get_provider().unwrap().as_str_name(), + Info::ConnectionParams(params) => params.get_connection_type().unwrap().as_str_name(), } } pub fn provider(&self) -> &str { - if matches!(self.info, connection::Info::PrivateLinkService(_)) { - "PRIVATELINK" - } else { - unreachable!() + match &self.info { + Info::PrivateLinkService(_) => "PRIVATELINK", + Info::ConnectionParams(_) => panic!("ConnectionParams is not supported as provider."), } } } @@ -52,7 +48,6 @@ impl From<&PbConnection> for ConnectionCatalog { id: prost.id, name: prost.name.clone(), info: prost.info.clone().unwrap(), - connection_params: prost.connection_params.clone(), owner: prost.owner, } } diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs index 32365e5cb75e6..7eae4c37ee418 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_connections.rs @@ -14,7 +14,6 @@ use risingwave_common::types::Fields; use risingwave_frontend_macro::system_catalog; -use risingwave_pb::catalog::connection::Info; use crate::catalog::system_catalog::SysCatalogReaderImpl; use crate::error::Result; @@ -52,12 +51,14 @@ fn read_rw_connections(reader: &SysCatalogReaderImpl) -> Result { + rw_connection.provider = conn.provider().into(); + } + risingwave_pb::catalog::connection::Info::ConnectionParams(params) => { + rw_connection.connection_params = print_connection_params(params, schema); + } + }; rw_connection }) diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index 6c5c1fd028967..d7bd80b5a7538 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -410,12 +410,13 @@ pub async fn handle_show_object( .iter_connections() .map(|c| { let name = c.name.clone(); - let r#type = if let Some(ref cp) = c.connection_params { - cp.get_connection_type().unwrap().as_str_name().to_string() - } else if matches!(c.info, connection::Info::PrivateLinkService(_)) { - PRIVATELINK_CONNECTION.to_string() - } else { - unreachable!() + let r#type = match &c.info { + connection::Info::PrivateLinkService(_) => { + PRIVATELINK_CONNECTION.to_string() + }, + connection::Info::ConnectionParams(params) => { + params.get_connection_type().unwrap().as_str_name().to_string() + } }; let source_names = schema .get_source_ids_by_connection(c.id) @@ -429,19 +430,22 @@ pub async fn handle_show_object( .into_iter() .filter_map(|sid| schema.get_sink_by_id(&sid).map(|catalog| catalog.name.as_str())) .collect_vec(); - let properties = if let Some(ref params) = c.connection_params { - print_connection_params(params, schema) - } else { - let connection::Info::PrivateLinkService(ref srv) = c.info; - format!( - "provider: {}\nservice_name: {}\nendpoint_id: {}\navailability_zones: {}\nsources: {}\nsinks: {}", - srv.get_provider().unwrap().as_str_name(), - srv.service_name, - srv.endpoint_id, - serde_json::to_string(&srv.dns_entries.keys().collect_vec()).unwrap(), - serde_json::to_string(&source_names).unwrap(), - serde_json::to_string(&sink_names).unwrap(), - ) + let properties = match &c.info { + connection::Info::PrivateLinkService(i) => { + format!( + "provider: {}\nservice_name: {}\nendpoint_id: {}\navailability_zones: {}\nsources: {}\nsinks: {}", + i.get_provider().unwrap().as_str_name(), + i.service_name, + i.endpoint_id, + serde_json::to_string(&i.dns_entries.keys().collect_vec()).unwrap(), + serde_json::to_string(&source_names).unwrap(), + serde_json::to_string(&sink_names).unwrap(), + ) + } + connection::Info::ConnectionParams(params) => { + // todo: show dep relations + print_connection_params(params, schema) + } }; ShowConnectionRow { name, diff --git a/src/frontend/src/utils/with_options.rs b/src/frontend/src/utils/with_options.rs index f2e480fccd671..b2c564098a45c 100644 --- a/src/frontend/src/utils/with_options.rs +++ b/src/frontend/src/utils/with_options.rs @@ -24,6 +24,7 @@ use risingwave_connector::source::kafka::private_link::{ }; pub use risingwave_connector::WithOptionsSecResolved; use risingwave_connector::WithPropertiesExt; +use risingwave_pb::catalog::connection::Info as ConnectionInfo; use risingwave_pb::catalog::connection_params::PbConnectionType; use risingwave_pb::secret::secret_ref::PbRefAsType; use risingwave_pb::secret::PbSecretRef; @@ -205,7 +206,7 @@ pub(crate) fn resolve_connection_ref_and_secret_ref( )?; let connection_catalog = session.get_connection_by_name(schema_name, &connection_name)?; - if let Some(ref params) = connection_catalog.connection_params { + if let ConnectionInfo::ConnectionParams(params) = &connection_catalog.info { connection_id = Some(connection_catalog.id); Some(params.clone()) } else { diff --git a/src/meta/model/src/connection.rs b/src/meta/model/src/connection.rs index 70efaf6616f98..eb93a1d82fd38 100644 --- a/src/meta/model/src/connection.rs +++ b/src/meta/model/src/connection.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use risingwave_pb::catalog::connection::PbInfo; use risingwave_pb::catalog::PbConnection; use sea_orm::entity::prelude::*; use sea_orm::ActiveValue::Set; @@ -69,7 +70,9 @@ impl ActiveModelBehavior for ActiveModel {} impl From for ActiveModel { fn from(conn: PbConnection) -> Self { - let connection_params = conn.connection_params.unwrap(); + let Some(PbInfo::ConnectionParams(connection_params)) = conn.info else { + unreachable!("private link has been deprecated.") + }; Self { connection_id: Set(conn.id as _), diff --git a/src/meta/service/src/ddl_service.rs b/src/meta/service/src/ddl_service.rs index b18dcf88a371e..9f4d9b330c24c 100644 --- a/src/meta/service/src/ddl_service.rs +++ b/src/meta/service/src/ddl_service.rs @@ -25,6 +25,7 @@ use risingwave_connector::sink::catalog::SinkId; use risingwave_meta::manager::{EventLogManagerRef, MetadataManager}; use risingwave_meta::rpc::metrics::MetaMetrics; use risingwave_meta_model::ObjectId; +use risingwave_pb::catalog::connection::Info as ConnectionInfo; use risingwave_pb::catalog::{Comment, Connection, CreateType, Secret, Table}; use risingwave_pb::common::worker_node::State; use risingwave_pb::common::WorkerType; @@ -752,8 +753,7 @@ impl DdlService for DdlServiceImpl { schema_id: req.schema_id, database_id: req.database_id, name: req.name, - info: None, - connection_params: Some(params), + info: Some(ConnectionInfo::ConnectionParams(params)), owner: req.owner_id, }; let version = self diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs index ef67090058770..847eb1c58728b 100644 --- a/src/meta/src/controller/catalog.rs +++ b/src/meta/src/controller/catalog.rs @@ -36,6 +36,7 @@ use risingwave_meta_model::{ SourceId, StreamNode, StreamSourceInfo, StreamingParallelism, SubscriptionId, TableId, UserId, ViewId, }; +use risingwave_pb::catalog::connection::Info as ConnectionInfo; use risingwave_pb::catalog::subscription::SubscriptionState; use risingwave_pb::catalog::table::PbTableType; use risingwave_pb::catalog::{ @@ -1467,7 +1468,7 @@ impl CatalogController { check_connection_name_duplicate(&pb_connection, &txn).await?; let mut dep_secrets = HashSet::new(); - if let Some(ref params) = &pb_connection.connection_params { + if let Some(ConnectionInfo::ConnectionParams(params)) = &pb_connection.info { dep_secrets.extend( params .secret_refs diff --git a/src/meta/src/controller/mod.rs b/src/meta/src/controller/mod.rs index a276657bb18c0..d3634b655ae6e 100644 --- a/src/meta/src/controller/mod.rs +++ b/src/meta/src/controller/mod.rs @@ -19,7 +19,7 @@ use risingwave_common::hash::VnodeCount; use risingwave_common::util::epoch::Epoch; use risingwave_meta_model::{ connection, database, function, index, object, schema, secret, sink, source, subscription, - table, view, + table, view, PrivateLinkService, }; use risingwave_meta_model_migration::{MigrationStatus, Migrator, MigratorTrait}; use risingwave_pb::catalog::connection::PbInfo as PbConnectionInfo; @@ -360,16 +360,18 @@ impl From> for PbView { impl From> for PbConnection { fn from(value: ObjectModel) -> Self { + let info: PbConnectionInfo = if value.0.info == PrivateLinkService::default() { + PbConnectionInfo::ConnectionParams(value.0.params.to_protobuf()) + } else { + PbConnectionInfo::PrivateLinkService(value.0.info.to_protobuf()) + }; Self { id: value.1.oid as _, schema_id: value.1.schema_id.unwrap() as _, database_id: value.1.database_id.unwrap() as _, name: value.0.name, owner: value.1.owner_id as _, - info: Some(PbConnectionInfo::PrivateLinkService( - value.0.info.to_protobuf(), - )), - connection_params: Some(value.0.params.to_protobuf()), + info: Some(info), } } } From 08fb575f65fcb1cc2c047f2f406f2e5d45f9658a Mon Sep 17 00:00:00 2001 From: tabversion Date: Tue, 26 Nov 2024 17:33:56 +0800 Subject: [PATCH 50/51] change syntax --- e2e_test/source_inline/connection/ddl.slt | 15 ++++----------- src/frontend/src/utils/with_options.rs | 8 -------- src/sqlparser/src/parser.rs | 22 +++++++++------------- 3 files changed, 13 insertions(+), 32 deletions(-) diff --git a/e2e_test/source_inline/connection/ddl.slt b/e2e_test/source_inline/connection/ddl.slt index f936519455f32..77b96978961c5 100644 --- a/e2e_test/source_inline/connection/ddl.slt +++ b/e2e_test/source_inline/connection/ddl.slt @@ -28,17 +28,10 @@ conn CONNECTION_TYPE_KAFKA statement error Permission denied: PermissionDenied: secret used by 1 other objects. drop secret sec_broker; -statement error expect 'profile' as the key for connection ref, but got: connection -create table t1 (a int, b varchar) with ( - connector = 'kafka', - connection = connection conn, - topic = 'connection_ddl_1') -format plain encode json; - statement error Duplicated key in both WITH clause and Connection catalog: properties.security.protocol create table t1 (a int, b varchar) with ( connector = 'kafka', - profile = connection conn, + connection = conn, topic = 'connection_ddl_1', properties.security.protocol = 'plaintext') format plain encode json; @@ -46,7 +39,7 @@ format plain encode json; statement error connector kinesis and connection type Kafka are not compatible create table t1 (a int, b varchar) with ( connector = 'kinesis', - profile = connection conn, + connection = conn, stream = 'connection_ddl_1', region = 'us-east-1') format plain encode json; @@ -57,7 +50,7 @@ rpk topic create connection_ddl_1 -p 1 statement ok create table t1 (a int, b varchar) with ( connector = 'kafka', - profile = connection conn, + connection = conn, topic = 'connection_ddl_1') format plain encode json; @@ -80,7 +73,7 @@ flush; statement ok create sink sink_kafka from data_table with ( connector = 'kafka', - profile = connection conn, + connection = conn, topic = 'connection_ddl_1' ) format plain encode json ( force_append_only='true' diff --git a/src/frontend/src/utils/with_options.rs b/src/frontend/src/utils/with_options.rs index b2c564098a45c..60889626ccfad 100644 --- a/src/frontend/src/utils/with_options.rs +++ b/src/frontend/src/utils/with_options.rs @@ -44,8 +44,6 @@ mod options { pub const RETENTION_SECONDS: &str = "retention_seconds"; } -const CONNECTION_REF_KEY: &str = "profile"; - /// Options or properties extracted from the `WITH` clause of DDLs. #[derive(Default, Clone, Debug, PartialEq, Eq, Hash)] pub struct WithOptions { @@ -359,12 +357,6 @@ impl TryFrom<&[SqlOption]> for WithOptions { continue; } SqlOptionValue::ConnectionRef(r) => { - if key != CONNECTION_REF_KEY { - return Err(RwError::from(ErrorCode::InvalidParameterValue(format!( - "expect 'profile' as the key for connection ref, but got: {}", - key - )))); - } if connection_ref.insert(key.clone(), r.clone()).is_some() || inner.contains_key(&key) { diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index bae54c21124a6..b1e52e06639b6 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -2941,7 +2941,15 @@ impl Parser<'_> { pub fn parse_sql_option(&mut self) -> PResult { let name = self.parse_object_name()?; self.expect_token(&Token::Eq)?; - let value = self.parse_value_and_obj_ref::()?; + let value = { + const CONNECTION_REF_KEY: &str = "connection"; + if name.real_value().eq_ignore_ascii_case(CONNECTION_REF_KEY) { + let connection_name = self.parse_object_name()?; + SqlOptionValue::ConnectionRef(ConnectionRefValue { connection_name }) + } else { + self.parse_value_and_obj_ref::()? + } + }; Ok(SqlOption { name, value }) } @@ -3633,18 +3641,6 @@ impl Parser<'_> { ref_as, })) } - Keyword::CONNECTION => { - if FORBID_OBJ_REF { - return self.expected_at( - checkpoint, - "a concrete value rather than a connection reference", - ); - } - let connection_name = self.parse_object_name()?; - Ok(SqlOptionValue::ConnectionRef(ConnectionRefValue { - connection_name, - })) - } _ => self.expected_at(checkpoint, "a concrete value"), }, Token::Number(ref n) => Ok(Value::Number(n.clone()).into()), From 2f0a4a8aedc7639b483776f4df96bf49b9437f8a Mon Sep 17 00:00:00 2001 From: tabversion Date: Thu, 28 Nov 2024 17:40:01 +0800 Subject: [PATCH 51/51] test --- src/sqlparser/src/ast/ddl.rs | 8 ++++---- src/sqlparser/src/parser.rs | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/sqlparser/src/ast/ddl.rs b/src/sqlparser/src/ast/ddl.rs index 7d3d07a9f8d5e..c7cf401221b07 100644 --- a/src/sqlparser/src/ast/ddl.rs +++ b/src/sqlparser/src/ast/ddl.rs @@ -20,10 +20,10 @@ use core::fmt; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use super::{FormatEncodeOptions, Value}; +use super::FormatEncodeOptions; use crate::ast::{ - display_comma_separated, display_separated, DataType, Expr, Ident, ObjectName, SecretRef, - SetVariableValue, + display_comma_separated, display_separated, DataType, Expr, Ident, ObjectName, SecretRefValue, + SetVariableValue, Value, }; use crate::tokenizer::Token; @@ -824,6 +824,6 @@ impl fmt::Display for ReferentialAction { #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct WebhookSourceInfo { - pub secret_ref: SecretRef, + pub secret_ref: SecretRefValue, pub signature_expr: Expr, } diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 505f37ca5ffaf..43c7997818a14 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -3585,7 +3585,7 @@ impl Parser<'_> { let secret_name = self.parse_object_name()?; let with_options = self.parse_with_properties()?; self.expect_keyword(Keyword::AS)?; - let new_credential = self.parse_value()?; + let new_credential = self.ensure_parse_value()?; let operation = AlterSecretOperation::ChangeCredential { new_credential }; Ok(Statement::AlterSecret { name: secret_name, @@ -3676,7 +3676,7 @@ impl Parser<'_> { ); } let secret = self.parse_secret_ref()?; - Ok(Value::Ref(secret)) + Ok(SqlOptionValue::SecretRef(secret)) } _ => self.expected_at(checkpoint, "a concrete value"), }, @@ -3692,14 +3692,14 @@ impl Parser<'_> { } } - fn parse_secret_ref(&mut self) -> PResult { + fn parse_secret_ref(&mut self) -> PResult { let secret_name = self.parse_object_name()?; let ref_as = if self.parse_keywords(&[Keyword::AS, Keyword::FILE]) { SecretRefAsType::File } else { SecretRefAsType::Text }; - Ok(SecretRef { + Ok(SecretRefValue { secret_name, ref_as, })