[#11052][#23476] YCQL: Fix timestamp precision issues in inserts and …

…index scans Summary: Previously, there were issues related to timestamp handling during insertions, particularly when timestamps were provided with microsecond precision. This occurred either through the use of the `currenttimestamp()` function or when explicitly inserting timestamps as strings with non-zero microseconds. These timestamps were stored in the storage layer (DocDB) with full microsecond precision, but the Cassandra layer only supports precision up to milliseconds. This mismatch in precision led to several issues: 1. Query mismatch in `SELECT` statements: When data was fetched using `SELECT * FROM table`, the timestamps were returned with millisecond precision (i.e., zeroed out microseconds). However, if users then used these returned timestamps in a `WHERE` clause to query the same table, no results would be returned. This occurred because the underlying storage still held the original timestamp with microseconds, causing a mismatch in the comparison. 2. Index scan failures with TIMESTAMP primary keys: In cases where a timestamp column was part of a primary key, performing an index scan using a secondary index would fail. This is because during an index scan, the data retrieved from the index includes the primary key, which, after passing through the Cassandra layer, was reduced to millisecond precision. When this primary key was then used to fetch data from the storage layer (DocDB), the keys no longer matched due to the precision difference. This revision addresses these two issues by ensuring that timestamps provided in microseconds are properly handled. Specifically: 1. `currenttimestamp()` function: Previously, this function returned the current time in microsecond precision, leading to inconsistencies. It now converts the timestamp to millisecond precision, aligning with Cassandra’s limitations. 2. String-specified timestamps with microsecond precision: When a timestamp is explicitly provided in a string format with microseconds (e.g., `2024-08-26 09:13:38.248513+0000`), it is now rounded down to the nearest millisecond (i.e., flooring the value). These fixes ensure that YCQL behaves more consistently with Cassandra, which does not support microseconds. To enable this behavior and use the original partial support for microseconds, the gflag `cql_revert_to_partial_microsecond_support=false` must be set. This flag is set to `true` by default but can be turned off to store timestamps inserted with microseconds precision in nearest millisecond. Irrespective of the value of gflag, a warning will be logged every 300 secs, if there are any inserts of timestamp data with microseconds precision. JIRA: DB-1000, DB-12394 Test Plan: == Automated testing == ./yb_build.sh --java-test 'org.yb.cql.TestTimestampDataType#testTimestampMicrosecsInsertSelect' ./yb_build.sh --java-test 'org.yb.cql.TestTimestampDataType#testTimestampPKIndexScan' New Tests Added: **org.yb.cql.TestTimestampDataType#testTimestampMicrosecsInsertSelect** This test verifies that inserting a timestamp string with microseconds precision (e.g., `'2024-08-26 09:13:38.248513+0000'`) allows fetching the corresponding row using a `WHERE` clause with the nearest millisecond value (e.g., `WHERE col = '2024-08-26 09:13:38.248000+0000'`). **org.yb.cql.TestTimestampDataType#testTimestampPKIndexScan** This test checks the behavior for a table with a primary key on a timestamp column and a secondary index. It inserts timestamp data using the `currenttimestamp()` function and explicit strings with microseconds precision. Then, an index scan using the secondary index is performed to fetch the data. Both tests pass only when the gflag `cql_revert_to_partial_microsecond_support=false` Reviewers: skumar, stiwary, pjain Reviewed By: skumar, pjain Subscribers: svc_phabricator, yql, ybase Differential Revision: https://phorge.dev.yugabyte.com/D37531
yugabyte · Oct 15, 2024 · 749d46b · 749d46b
1 parent 5ae8309
commit 749d46b
Show file tree

Hide file tree

Showing 6 changed files with 114 additions and 2 deletions.
diff --git a/java/yb-cql/src/test/java/org/yb/cql/TestTimestampDataType.java b/java/yb-cql/src/test/java/org/yb/cql/TestTimestampDataType.java
@@ -16,6 +16,7 @@
 import com.datastax.driver.core.Row;
 import org.junit.Test;
 
+import java.text.SimpleDateFormat;
 import java.util.*;
 
 import static org.yb.AssertionWrappers.assertEquals;
@@ -395,4 +396,74 @@ public void testTimestampLogicGFlag() throws Exception {
     destroyMiniCluster();
   }
 
+  @Test
+  public void testTimestampMicrosecsInsertSelect() throws Exception {
+    destroyMiniCluster();
+    createMiniCluster(Collections.emptyMap(),
+        Collections.singletonMap("cql_revert_to_partial_microsecond_support", "false"));
+    setUpCqlClient();
+    String tableName = "test";
+    String testTimestamp = "2024-08-26 09:23:38.319213+0000";
+    String testTimestampMS = "2024-08-26 09:23:38.319000+0000";
+
+    session.execute(String.format("CREATE TABLE %s(x int primary key, b timestamp);", tableName));
+    session.execute(
+        String.format("INSERT INTO %s(x, b) VALUES(%d, '%s');", tableName, 1, testTimestamp));
+
+    session.execute(
+        String.format("INSERT INTO %s(x, b) VALUES(%d, %s);", tableName, 2, "currenttimestamp()"));
+
+    session.execute(String.format("INSERT INTO %s(x, b) VALUES(%d, %s);", tableName, 3,
+        "totimestamp(now())")); /* Previously, totimestamp(now()) worked fine */
+
+    String sel_stmt_init = "SELECT b FROM %s WHERE x=%d";
+    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSXXX");
+    String currenttimestamp_string =
+        dateFormat.format(runSelect(sel_stmt_init, tableName, 2).next().getTimestamp(0));
+    String totimestamp_now_string =
+        dateFormat.format(runSelect(sel_stmt_init, tableName, 3).next().getTimestamp(0));
+
+    String sel_stmt = "SELECT COUNT(*) FROM %s WHERE b='%s'";
+
+    assertQuery(String.format(sel_stmt, tableName, testTimestampMS), "Row[1]");
+    assertQuery(String.format(sel_stmt, tableName, currenttimestamp_string), "Row[1]");
+    assertQuery(String.format(sel_stmt, tableName, totimestamp_now_string), "Row[1]");
+
+    destroyMiniCluster();
+  }
+
+  @Test
+  public void testTimestampPKIndexScan() throws Exception {
+    destroyMiniCluster();
+    createMiniCluster(Collections.emptyMap(),
+        Collections.singletonMap("cql_revert_to_partial_microsecond_support", "false"));
+    setUpCqlClient();
+    String tableName = "test";
+    String testTimestamp = "2024-08-26 09:23:38.319213+0000";
+
+    session.execute(String.format("CREATE TABLE %s(x INT, b TIMESTAMP, v1 INT, v2 INT, PRIMARY "
+            + "KEY(x, b)) WITH transactions = {'enabled': 'true'};",
+        tableName));
+    session.execute(String.format("CREATE INDEX idx1 ON %s(v1);", tableName));
+    waitForReadPermsOnAllIndexes(tableName);
+
+    session.execute(String.format("INSERT INTO %s(x, b, v1, v2) VALUES(%d, %s, %d, %d);", tableName,
+        1, "currenttimestamp()", 12, 13));
+    session.execute(String.format("INSERT INTO %s(x, b, v1, v2) VALUES(%d, %s, %d, %d);", tableName,
+        2, "currenttimestamp()", 13, 14));
+    session.execute(String.format("INSERT INTO %s(x, b, v1, v2) VALUES(%d, '%s', %d, %d);",
+        tableName, 3, testTimestamp, 12, 15));
+    session.execute(String.format("INSERT INTO %s(x, b, v1, v2) VALUES(%d, %s, %d, %d);", tableName,
+        4, "totimestamp(now())", 12, 16)); /* Previously, totimestamp(now()) worked fine */
+    String sel_stmt = String.format("SELECT * FROM %s  WHERE v1=12", tableName);
+    assertTrue("Should use index scan",
+        session.execute("EXPLAIN " + sel_stmt).all().toString().contains("Index Scan using"));
+
+    int count = 0;
+    for (Row row : session.execute(sel_stmt)) {
+      count++;
+    }
+    assertEquals(3, count);
+    destroyMiniCluster();
+  }
 }
diff --git a/src/yb/bfql/bfql.cc b/src/yb/bfql/bfql.cc
@@ -24,6 +24,11 @@ using std::vector;
 using std::string;
 using strings::Substitute;
 
+DEFINE_RUNTIME_bool(cql_revert_to_partial_microsecond_support, true,
+    "Use this to store timestamps with microseconds precision but cql layer will only support "
+    "milliseconds. Changing this might have unintended consequences. Please refer technical "
+    "advisory TA-23476 for more information.");
+
 namespace yb {
 namespace bfql {
 

diff --git a/src/yb/bfql/bfunc_standard.h b/src/yb/bfql/bfunc_standard.h
@@ -51,6 +51,8 @@
 #include "yb/util/uuid.h"
 #include "yb/util/yb_partition.h"
 
+DECLARE_bool(cql_revert_to_partial_microsecond_support);
+
 namespace yb {
 
 bool operator ==(const QLValuePB& lhs, const QLValuePB& rhs);
@@ -207,7 +209,19 @@ inline Result<BFRetValue> NowTime(BFFactory factory) {
 
 inline Result<BFRetValue> NowTimestamp(BFFactory factory) {
   BFRetValue result = factory();
-  result.set_timestamp_value(DateTime::TimestampNow().ToInt64());
+  int64_t value_us = DateTime::TimestampNow().ToInt64();
+  int64_t value_ms = DateTime::FloorTimestamp(
+      value_us, DateTime::kInternalPrecision, DateTime::CqlInputFormat.input_precision);
+
+  if (value_ms != value_us) {
+    YB_LOG_EVERY_N_SECS(WARNING, 300)
+        << "timestamp is inserted in YCQL with microseconds precision";
+  }
+  int64_t value = value_us;
+  if (!FLAGS_cql_revert_to_partial_microsecond_support) {
+    value = value_ms;
+  }
+  result.set_timestamp_value(value);
   return result;
 }
 

diff --git a/src/yb/util/date_time.cc b/src/yb/util/date_time.cc
@@ -397,6 +397,13 @@ int64_t DateTime::AdjustPrecision(int64_t val,
   return val;
 }
 
+// returns floor of the timestamp rounded to output_precision
+int64_t DateTime::FloorTimestamp(
+    int64_t val, size_t input_precision, const size_t output_precision) {
+  auto interim_val = AdjustPrecision(val, input_precision, output_precision);
+  return AdjustPrecision(interim_val, output_precision, input_precision);
+}
+
 namespace {
 
 std::vector<std::regex> InputFormatRegexes() {

diff --git a/src/yb/util/date_time.h b/src/yb/util/date_time.h
@@ -91,6 +91,7 @@ class DateTime {
 
   //----------------------------------------------------------------------------------------------
   static int64_t AdjustPrecision(int64_t val, size_t input_precision, size_t output_precision);
+  static int64_t FloorTimestamp(int64_t val, size_t input_precision, size_t output_precision);
   static constexpr int64_t kInternalPrecision = 6; // microseconds
   static constexpr int64_t kMillisecondPrecision = 3; // milliseconds
 };

diff --git a/src/yb/yql/cql/ql/ptree/pt_expr.cc b/src/yb/yql/cql/ql/ptree/pt_expr.cc
@@ -44,6 +44,8 @@
 #include "yb/yql/cql/ql/ptree/sem_context.h"
 #include "yb/yql/cql/ql/ptree/yb_location.h"
 
+DECLARE_bool(cql_revert_to_partial_microsecond_support);
+
 namespace yb {
 namespace ql {
 
@@ -413,7 +415,19 @@ Status PTLiteralString::ToString(string *value) const {
 }
 
 Status PTLiteralString::ToTimestamp(int64_t *value) const {
-  *value = VERIFY_RESULT(DateTime::TimestampFromString(value_->c_str())).ToInt64();
+  int64_t value_us = VERIFY_RESULT(DateTime::TimestampFromString(value_->c_str())).ToInt64();
+  int64_t value_ms = DateTime::FloorTimestamp(
+      value_us, DateTime::kInternalPrecision, DateTime::CqlInputFormat.input_precision);
+
+  if (value_ms != value_us) {
+    YB_LOG_EVERY_N_SECS(WARNING, 300)
+        << "timestamp is inserted in YCQL with microseconds precision";
+  }
+
+  *value = value_us;
+  if (!FLAGS_cql_revert_to_partial_microsecond_support) {
+    *value = value_ms;
+  }
   return Status::OK();
 }