diff --git a/ticdc/ticdc-changefeed-config.md b/ticdc/ticdc-changefeed-config.md index 9c5d38d05158e..a60aaa9d02852 100644 --- a/ticdc/ticdc-changefeed-config.md +++ b/ticdc/ticdc-changefeed-config.md @@ -185,9 +185,18 @@ flush-interval = 2000 # The storage URI of the redo log. # The default value is empty. storage = "" -# Specifies whether to store the redo log in a file. +# Specifies whether to store the redo log in a local file. # The default value is false. use-file-backend = false +# The number of encoding and decoding workers in the redo module. +# The default value is 16. +encoding-worker-num = 16 +# The number of flushing workers in the redo module. +# The default value is 8. +flush-worker-num = 8 +# The behavior to compress redo log files. +# Available options are "" and "lz4". The default value is "", which means no compression. +compression = "" [integrity] # Whether to enable the checksum validation for single-row data. The default value is "none", which means to disable the feature. Value options are "none" and "correctness". @@ -211,4 +220,24 @@ sasl-oauth-scopes = ["producer.kafka", "consumer.kafka"] sasl-oauth-grant-type = "client_credentials" # The audience in the Kafka SASL OAUTHBEARER authentication. The default value is empty. This parameter is optional when the OAUTHBEARER authentication is used. sasl-oauth-audience = "kafka" + +[sink.cloud-storage-config] +# The concurrency for saving data changes to the downstream cloud storage. +# The default value is 16. +worker-count = 16 +# The interval for saving data changes to the downstream cloud storage. +# The default value is "2s". +flush-interval = "2s" +# A data change file is saved to the cloud storage when the number of bytes in this file exceeds `file-size`. +# The default value is 67108864 (this is, 64 MiB). +file-size = 67108864 +# The duration to retain files, which takes effect only when `date-separator` is configured as `day`. Assume that `file-expiration-days = 1` and `file-cleanup-cron-spec = "0 0 0 * * *"`, then TiCDC performs daily cleanup at 00:00:00 for files saved beyond 24 hours. For example, at 00:00:00 on 2023/12/02, TiCDC cleans up files generated before 2023/12/01, while files generated on 2023/12/01 remain unaffected. +# The default value is 0, which means file cleanup is disabled. +file-expiration-days = 0 +# The running cycle of the scheduled cleanup task, compatible with the crontab configuration, with a format of ` ` +# The default value is "0 0 2 * * *", which means that the cleanup task is executed every day at 2 AM. +file-cleanup-cron-spec = "0 0 2 * * *" +# The concurrency for uploading a single file. +# The default value is 1, which means concurrency is disabled. +flush-concurrency = 1 ``` diff --git a/ticdc/ticdc-open-api-v2.md b/ticdc/ticdc-open-api-v2.md index 3b6ded5f7d07b..a7f3ecf1da486 100644 --- a/ticdc/ticdc-open-api-v2.md +++ b/ticdc/ticdc-open-api-v2.md @@ -288,6 +288,9 @@ The `consistent` parameters are described as follows: | `level` | `STRING` type. The consistency level of the replicated data. (Optional) | | `max_log_size` | `UINT64` type. The maximum value of redo log. (Optional) | | `storage` | `STRING` type. The destination address of the storage. (Optional) | +| `use_file_backend` | `BOOL` type. Specifies whether to store the redo log in a local file. (Optional) | +| `encoding_worker_num` | `INT` type. The number of encoding and decoding workers in the redo module. (Optional) | +| `flush_worker_num` | `INT` type. The number of flushing workers in the redo module. (Optional) | The `filter` parameters are described as follows: @@ -333,6 +336,7 @@ The `sink` parameters are described as follows: | `terminator` | `STRING` type. The terminator is used to separate two data change events. The default value is null, which means `"\r\n"` is used as the terminator. (Optional) | | `transaction_atomicity` | `STRING` type. The atomicity level of the transaction. (Optional) | | `only_output_updated_columns` | `BOOLEAN` type. For MQ sinks using the `canal-json` or `open-protocol` protocol, you can specify whether only output the modified columns. The default value is `false`. (Optional) | +| `cloud_storage_config` | The storage sink configuration. (Optional) | `sink.column_selectors` is an array. The parameters are described as follows: @@ -345,10 +349,11 @@ The `sink.csv` parameters are described as follows: | Parameter name | Description | |:-----------------|:---------------------------------------| -| `delimiter` | `STRING` type. The character used to separate fields in the CSV file. The value must be an ASCII character and defaults to `,`. | -| `include_commit_ts` | `BOOLEAN` type. Whether to include commit-ts in CSV rows. The default value is `false`. | -| `null` | `STRING` type. The character that is displayed when a CSV column is null. The default value is `\N`. | -| `quote` | `STRING` type. The quotation character used to surround fields in the CSV file. If the value is empty, no quotation is used. The default value is `"`. | +| `delimiter` | `STRING` type. The character used to separate fields in the CSV file. The value must be an ASCII character and defaults to `,`. | +| `include_commit_ts` | `BOOLEAN` type. Whether to include commit-ts in CSV rows. The default value is `false`. | +| `null` | `STRING` type. The character that is displayed when a CSV column is null. The default value is `\N`. | +| `quote` | `STRING` type. The quotation character used to surround fields in the CSV file. If the value is empty, no quotation is used. The default value is `"`. | +| `binary_encoding_method` | `STRING` type. The encoding method of binary data, which can be `"base64"` or `"hex"`. The default value is `"base64"`. | `sink.dispatchers`: for the sink of MQ type, you can use this parameter to configure the event dispatcher. The following dispatchers are supported: `default`, `ts`, `rowid`, and `table`. The dispatcher rules are as follows: @@ -365,6 +370,17 @@ The `sink.csv` parameters are described as follows: | `partition` | `STRING` type. The target partition for dispatching events. | | `topic` | `STRING` type. The target topic for dispatching events. | +`sink.cloud_storage_config` parameters are described as follows: + +| Parameter name | Description | +|:-----------------|:---------------------------------------| +| `worker_count` | `INT` type. The concurrency for saving data changes to the downstream cloud storage. | +| `flush_interval` | `STRING` type. The interval for saving data changes to the downstream cloud storage. | +| `file_size` | `INT` type. A data change file is saved to the cloud storage when the number of bytes in this file exceeds the value of this parameter. | +| `file_expiration_days` | `INT` type. The duration to retain files, which takes effect only when `date-separator` is configured as `day`. | +| `file_cleanup_cron_spec` | `STRING` type. The running cycle of the scheduled cleanup task, compatible with the crontab configuration, with a format of ` `. | +| `flush_concurrency` | `INT` type. The concurrency for uploading a single file. | + ### Example The following request creates a replication task with an ID of `test5` and `sink_uri` of `blackhome://`. diff --git a/ticdc/ticdc-server-config.md b/ticdc/ticdc-server-config.md index 4425cb7b5ace4..9bed11e4f75b5 100644 --- a/ticdc/ticdc-server-config.md +++ b/ticdc/ticdc-server-config.md @@ -40,6 +40,8 @@ data-dir = "" gc-ttl = 86400 # 24 h tz = "System" cluster-id = "default" +# This parameter specifies the maximum memory threshold (in bytes) for tuning GOGC. Setting a smaller threshold increases the GC frequency. Setting a larger threshold reduces GC frequency and consumes more memory resources for the TiCDC process. Once the memory usage exceeds this threshold, GOGC Tuner stops working. The default value is 0, indicating that GOGC Tuner is disabled. +gc-tuner-memory-threshold = 0 [security] ca-path = ""