From 8a5b9808f2ff062369d8836e8076ad71a8464fc1 Mon Sep 17 00:00:00 2001 From: Maxim Kovalev Date: Thu, 19 Sep 2024 14:38:49 +0300 Subject: [PATCH] YQL: Add seed optional argument to Digest::CityHash() call (#9409) --- .../en/core/yql/reference/yql-core/udf/list/digest.md | 2 +- .../ru/core/yql/reference/yql-core/udf/list/digest.md | 2 +- ydb/library/yql/udfs/common/digest/digest_udf.cpp | 9 +++++++-- .../test/canondata/test.test_Basic_/results.txt | 11 +++++++++++ .../yql/udfs/common/digest/test/cases/Basic.sql | 1 + 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/ydb/docs/en/core/yql/reference/yql-core/udf/list/digest.md b/ydb/docs/en/core/yql/reference/yql-core/udf/list/digest.md index d0d922c14dc5..2acd4de863a0 100644 --- a/ydb/docs/en/core/yql/reference/yql-core/udf/list/digest.md +++ b/ydb/docs/en/core/yql/reference/yql-core/udf/list/digest.md @@ -12,7 +12,7 @@ A set of commonly used hash functions. * `Digest::MurMurHash32(String{Flags:AutoMap}) -> Uint32` * `Digest::MurMurHash2A(String{Flags:AutoMap}) -> Uint64` * `Digest::MurMurHash2A32(String{Flags:AutoMap}) -> Uint32` -* `Digest::CityHash(String{Flags:AutoMap}) -> Uint64` +* `Digest::CityHash(String{Flags:AutoMap}, [Uint64?]) -> Uint64`: The second optional argument is seed * `Digest::CityHash128(String{Flags:AutoMap}) -> Tuple` * `Digest::NumericHash(Uint64{Flags:AutoMap}) -> Uint64` * `Digest::Md5Hex(String{Flags:AutoMap}) -> String` diff --git a/ydb/docs/ru/core/yql/reference/yql-core/udf/list/digest.md b/ydb/docs/ru/core/yql/reference/yql-core/udf/list/digest.md index 3586ca4f2975..837c1c9ccf86 100644 --- a/ydb/docs/ru/core/yql/reference/yql-core/udf/list/digest.md +++ b/ydb/docs/ru/core/yql/reference/yql-core/udf/list/digest.md @@ -15,7 +15,7 @@ * `Digest::MurMurHash32(String{Flags:AutoMap}) -> Uint32` * `Digest::MurMurHash2A(String{Flags:AutoMap}) -> Uint64` * `Digest::MurMurHash2A32(String{Flags:AutoMap}) -> Uint32` -* `Digest::CityHash(String{Flags:AutoMap}) -> Uint64` +* `Digest::CityHash(String{Flags:AutoMap}, [Uint64?]) -> Uint64`: Второй опциональный аргумент задает seed * `Digest::CityHash128(String{Flags:AutoMap}) -> Tuple` CityHash функция для байтовой строки с результатом типа uint128. Результат представлен как пара из двух uint64 чисел `` diff --git a/ydb/library/yql/udfs/common/digest/digest_udf.cpp b/ydb/library/yql/udfs/common/digest/digest_udf.cpp index 7715a19f3163..e279a83c6675 100644 --- a/ydb/library/yql/udfs/common/digest/digest_udf.cpp +++ b/ydb/library/yql/udfs/common/digest/digest_udf.cpp @@ -82,10 +82,15 @@ namespace { return TUnboxedValuePod(hash); } - SIMPLE_STRICT_UDF(TCityHash, ui64(TAutoMap)) { + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TCityHash, ui64(TAutoMap, TOptional), 1) { Y_UNUSED(valueBuilder); const auto& inputRef = args[0].AsStringRef(); - ui64 hash = CityHash64(inputRef.Data(), inputRef.Size()); + ui64 hash; + if (args[1]) { + hash = CityHash64WithSeed(inputRef.Data(), inputRef.Size(), args[1].Get()); + } else { + hash = CityHash64(inputRef.Data(), inputRef.Size()); + } return TUnboxedValuePod(hash); } diff --git a/ydb/library/yql/udfs/common/digest/test/canondata/test.test_Basic_/results.txt b/ydb/library/yql/udfs/common/digest/test/canondata/test.test_Basic_/results.txt index 0ffb24b87bb9..f5b7b0fe7855 100644 --- a/ydb/library/yql/udfs/common/digest/test/canondata/test.test_Basic_/results.txt +++ b/ydb/library/yql/udfs/common/digest/test/canondata/test.test_Basic_/results.txt @@ -70,6 +70,13 @@ "Uint64" ] ]; + [ + "cityWithSeed"; + [ + "DataType"; + "Uint64" + ] + ]; [ "city128"; [ @@ -268,6 +275,7 @@ "5654386555365545660"; "1466639702"; "11413460447292444913"; + "684814019408231284"; [ "125830901799957853"; "7569582475522398857" @@ -326,6 +334,7 @@ "16472888669357673283"; "2351653828"; "17472595041006102391"; + "8016373356242392939"; [ "13426016195983081906"; "17051066397148972982" @@ -384,6 +393,7 @@ "6734453432295282525"; "2128480519"; "11275350073939794026"; + "1669883546352889947"; [ "15168680716710346397"; "13490672353767795293" @@ -442,6 +452,7 @@ "0"; "0"; "11160318154034397263"; + "12607432989128692740"; [ "18085479540095642321"; "11079402499652051579" diff --git a/ydb/library/yql/udfs/common/digest/test/cases/Basic.sql b/ydb/library/yql/udfs/common/digest/test/cases/Basic.sql index ce850bc44fda..fbf6f218fba1 100644 --- a/ydb/library/yql/udfs/common/digest/test/cases/Basic.sql +++ b/ydb/library/yql/udfs/common/digest/test/cases/Basic.sql @@ -9,6 +9,7 @@ SELECT Digest::MurMurHash2A(key) AS murmur2a, Digest::MurMurHash2A32(key) AS murmur2a32, Digest::CityHash(key) AS city, + Digest::CityHash(key, 111) AS cityWithSeed, Digest::CityHash128(key) AS city128, Digest::NumericHash(COALESCE(CAST(key AS Uint64), 0)) AS numeric, Digest::Md5Hex(key) AS md5hex,