diff --git a/ticdc/ticdc-changefeed-config.md b/ticdc/ticdc-changefeed-config.md index f5023ddc2f885..de16c8375c009 100644 --- a/ticdc/ticdc-changefeed-config.md +++ b/ticdc/ticdc-changefeed-config.md @@ -204,6 +204,8 @@ enable-partition-separator = true # output-handle-key = false # Whether to output the value before the row data changes. The default value is false. # When it is enabled, the UPDATE event will output two rows of data: the first row is a DELETE event that outputs the data before the change; the second row is an INSERT event that outputs the changed data. +# When it is enabled (setting it to true), the "is-update" column will be added before the column with data changes. This added column is used to identify whether the data change of the current row comes from the UPDATE event or the original INSERT/DELETE event. +# If the data change of the current row comes from the UPDATE event, the value of the "is-update" column is true. Otherwise it is false. # output-old-value = false # Starting from v8.0.0, TiCDC supports the Simple message encoding protocol. The following are the configuration parameters for the Simple protocol. diff --git a/ticdc/ticdc-csv.md b/ticdc/ticdc-csv.md index a55c78b89e693..12278eec59ca0 100644 --- a/ticdc/ticdc-csv.md +++ b/ticdc/ticdc-csv.md @@ -27,6 +27,7 @@ delimiter = ',' # Before v7.6.0, you can only set the delimiter to a single char quote = '"' null = '\N' include-commit-ts = true +output-old-value = false ``` ## Transactional constraints @@ -47,7 +48,8 @@ In the CSV file, each column is defined as follows: - Column 2: Table name. - Column 3: Schema name. - Column 4: The `commit-ts` of the source transaction. This column is optional. -- Column 5 to the last column: One or more columns that represent data to be changed. +- Column 5: The `is-update` column only exists when the value of `output-old-value` is true, which is used to identify whether the row data change comes from the UPDATE event (the value of the column is true) or the INSERT/DELETE event (the value is false). +- Column 6 to the last column: One or more columns with data changes. Assume that table `hr.employee` is defined as follows: @@ -61,7 +63,7 @@ CREATE TABLE `employee` ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; ``` -The DML events of this table are stored in the CSV format as follows: +When `include-commit-ts = true` and `output-old-value = false`, the DML events of this table are stored in the CSV format as follows: ```shell "I","employee","hr",433305438660591626,101,"Smith","Bob","2014-06-04","New York" @@ -71,6 +73,18 @@ The DML events of this table are stored in the CSV format as follows: "U","employee","hr",433305438660591630,102,"Alex","Alice","2018-06-15","Beijing" ``` +When `include-commit-ts = true` and `output-old-value = true`, the DML events of this table are stored in the CSV format as follows: + +``` +"I","employee","hr",433305438660591626,false,101,"Smith","Bob","2014-06-04","New York" +"D","employee","hr",433305438660591627,true,101,"Smith","Bob","2015-10-08","Shanghai" +"I","employee","hr",433305438660591627,true,101,"Smith","Bob","2015-10-08","Los Angeles" +"D","employee","hr",433305438660591629,false,101,"Smith","Bob","2017-03-13","Dallas" +"I","employee","hr",433305438660591630,false,102,"Alex","Alice","2017-03-14","Shanghai" +"D","employee","hr",433305438660591630,true,102,"Alex","Alice","2017-03-14","Beijing" +"I","employee","hr",433305438660591630,true,102,"Alex","Alice","2018-06-15","Beijing" +``` + ## Data type mapping | MySQL type | CSV type | Example | Description |