diff --git a/velox/common/base/SimdUtil-inl.h b/velox/common/base/SimdUtil-inl.h index 87ff71f8b181..6a5bc78c2039 100644 --- a/velox/common/base/SimdUtil-inl.h +++ b/velox/common/base/SimdUtil-inl.h @@ -1436,4 +1436,144 @@ inline bool memEqualUnsafe(const void* x, const void* y, int32_t size) { return true; } +namespace detail { + +/// NOTE: SSE_4_2`s performance lost a lot in un-match case. +#if XSIMD_WITH_AVX2 +using CharVector = xsimd::batch; +#elif XSIMD_WITH_NEON +using CharVector = xsimd::batch; +#endif + +const int kPageSize = sysconf(_SC_PAGESIZE); +FOLLY_ALWAYS_INLINE bool pageSafe(const void* const ptr, size_t length) { + return ((kPageSize - 1) & reinterpret_cast(ptr)) <= + kPageSize - CharVector::size - length; +} + +template +size_t FOLLY_ALWAYS_INLINE smidStrstrMemcmp( + const char* s, + size_t n, + const char* needle, + size_t needleSize) { + static_assert(compiledNeedleSize >= 2); + VELOX_CHECK_GT(needleSize, 1); + VELOX_CHECK_GT(n, 0); + auto first = CharVector::broadcast(needle[0]); + auto last = CharVector::broadcast(needle[needleSize - 1]); + size_t i = 0; + // Fast path for page-safe data. + // It`s safe to over-read CharVector if all-data are in same page. + // see: https://mudongliang.github.io/x86/html/file_module_x86_id_208.html + // While executing in 16-bit addressing mode, a linear address for a 128-bit + // data access that overlaps the end of a 16-bit segment is not allowed and is + // defined as reserved behavior. A specific processor implementation may or + // may not generate a general-protection exception (#GP) in this situation, + // and the address that spans the end of the segment may or may not wrap + // around to the beginning of the segment. + if (pageSafe(s + n, needleSize)) { + for (; i <= n - needleSize; i += CharVector::size) { + auto blockFirst = CharVector::load_unaligned(s + i); + const auto eqFirst = (first == blockFirst); + /// std:find handle the fast-path for first-char-unmatch, so we also need + /// to handle eqFirst. + if (eqFirst.mask() == 0) { + continue; + } + auto blockLast = CharVector::load_unaligned(s + i + needleSize - 1); + const auto eqLast = (last == blockLast); + auto mask = (eqFirst && eqLast).mask(); + while (mask != 0) { + const auto bitpos = __builtin_ctz(mask); + if constexpr (compiled) { + if constexpr (compiledNeedleSize == 2) { + return i + bitpos; + } + if (memcmp(s + i + bitpos + 1, needle + 1, compiledNeedleSize - 2) == + 0) { + return i + bitpos; + } + } else { + if (memcmp(s + i + bitpos + 1, needle + 1, needleSize - 2) == 0) { + return i + bitpos; + } + } + mask = mask & (mask - 1); + } + } + } + // Fallback path for generic path. + for (; i <= n - needleSize; ++i) { + if constexpr (compiled) { + if (memcmp(s + i, needle, compiledNeedleSize) == 0) { + return i; + } + } else { + if (memcmp(s + i, needle, needleSize) == 0) { + return i; + } + } + } + + return std::string::npos; +}; + +} // namespace detail + +/// A faster implementation for std::find, about 2x faster than string_view`s +/// find() in almost cases, proved by StringSearchBenchmark.cpp. Use xsmid-batch +/// to compare first&&last char first, use fixed-memcmp to compare left chars. +/// Inline in header file will be 30% faster. +FOLLY_ALWAYS_INLINE size_t +simdStrstr(const char* s, size_t n, const char* needle, size_t k) { + size_t result = std::string::npos; + + if (n < k) { + return result; + } + + switch (k) { + case 0: + return 0; + + case 1: { + const char* res = strchr(s, needle[0]); + + return (res != nullptr) ? res - s : std::string::npos; + } +#define FIXED_MEM_STRSTR(size) \ + case size: \ + result = detail::smidStrstrMemcmp(s, n, needle, size); \ + break; + FIXED_MEM_STRSTR(2) + FIXED_MEM_STRSTR(3) + FIXED_MEM_STRSTR(4) + FIXED_MEM_STRSTR(5) + FIXED_MEM_STRSTR(6) + FIXED_MEM_STRSTR(7) + FIXED_MEM_STRSTR(8) + FIXED_MEM_STRSTR(9) + FIXED_MEM_STRSTR(10) + FIXED_MEM_STRSTR(11) + FIXED_MEM_STRSTR(12) + FIXED_MEM_STRSTR(13) + FIXED_MEM_STRSTR(14) + FIXED_MEM_STRSTR(15) + FIXED_MEM_STRSTR(16) + FIXED_MEM_STRSTR(17) + FIXED_MEM_STRSTR(18) + default: + result = detail::smidStrstrMemcmp(s, n, needle, k); + break; + } +#undef FIXED_MEM_STRSTR + // load_unaligned is used for better performance, so result maybe bigger than + // n-k. + if (result <= n - k) { + return result; + } else { + return std::string::npos; + } +} } // namespace facebook::velox::simd diff --git a/velox/common/base/SimdUtil.cpp b/velox/common/base/SimdUtil.cpp index 3f7d0de91d5a..f59ad20b7a3a 100644 --- a/velox/common/base/SimdUtil.cpp +++ b/velox/common/base/SimdUtil.cpp @@ -111,142 +111,4 @@ bool initializeSimdUtil() { } static bool FB_ANONYMOUS_VARIABLE(g_simdConstants) = initializeSimdUtil(); - -namespace detail { - -#if XSIMD_WITH_SSE4_2 -using CharVector = xsimd::batch; -#elif XSIMD_WITH_NEON -using CharVector = xsimd::batch; -#endif - -const int kPageSize = sysconf(_SC_PAGESIZE); -FOLLY_ALWAYS_INLINE bool pageSafe(const void* const ptr) { - return ((kPageSize - 1) & reinterpret_cast(ptr)) <= - kPageSize - CharVector::size; -} - -template -size_t FOLLY_ALWAYS_INLINE smidStrstrMemcmp( - const char* s, - size_t n, - const char* needle, - size_t needleSize) { - static_assert(compiledNeedleSize >= 2); - VELOX_CHECK_GT(needleSize, 1); - VELOX_CHECK_GT(n, 0); - auto first = CharVector::broadcast(needle[0]); - auto last = CharVector::broadcast(needle[needleSize - 1]); - size_t i = 0; - // Fast path for page-safe data. - // It`s safe to over-read CharVector if all-data are in same page. - // see: https://mudongliang.github.io/x86/html/file_module_x86_id_208.html - // While executing in 16-bit addressing mode, a linear address for a 128-bit - // data access that overlaps the end of a 16-bit segment is not allowed and is - // defined as reserved behavior. A specific processor implementation may or - // may not generate a general-protection exception (#GP) in this situation, - // and the address that spans the end of the segment may or may not wrap - // around to the beginning of the segment. - for (; i <= n - needleSize && pageSafe(s + i + needleSize - 1) && - pageSafe(s + i); - i += CharVector::size) { - auto blockFirst = CharVector::load_unaligned(s + i); - auto blockLast = CharVector::load_unaligned(s + i + needleSize - 1); - - const auto eqFirst = (first == blockFirst); - const auto eqLast = (last == blockLast); - - auto mask = toBitMask(eqFirst && eqLast); - - while (mask != 0) { - const auto bitpos = __builtin_ctz(mask); - if constexpr (compiled) { - if constexpr (compiledNeedleSize == 2) { - return i + bitpos; - } - if (memcmp(s + i + bitpos + 1, needle + 1, compiledNeedleSize - 2) == - 0) { - return i + bitpos; - } - } else { - if (memcmp(s + i + bitpos + 1, needle + 1, needleSize - 2) == 0) { - return i + bitpos; - } - } - mask = mask & (mask - 1); - } - } - // Fallback path for generic path. - for (; i <= n - needleSize; ++i) { - if constexpr (compiled) { - if (memcmp(s + i, needle, compiledNeedleSize) == 0) { - return i; - } - } else { - if (memcmp(s + i, needle, needleSize) == 0) { - return i; - } - } - } - - return std::string::npos; -}; - -} // namespace detail - -/// A faster implementation for c_strstr(), about 2x faster than string_view`s -/// find(), proved by TpchLikeBenchmark. Use xsmid-batch to compare first&&last -/// char first, use fixed-memcmp to compare left chars. Inline in header file -/// will be a little faster. -size_t simdStrstr(const char* s, size_t n, const char* needle, size_t k) { - size_t result = std::string::npos; - - if (n < k) { - return result; - } - - switch (k) { - case 0: - return 0; - - case 1: { - const char* res = strchr(s, needle[0]); - - return (res != nullptr) ? res - s : std::string::npos; - } -#define FIXED_MEM_STRSTR(size) \ - case size: \ - result = detail::smidStrstrMemcmp(s, n, needle, size); \ - break; - FIXED_MEM_STRSTR(2) - FIXED_MEM_STRSTR(3) - FIXED_MEM_STRSTR(4) - FIXED_MEM_STRSTR(5) - FIXED_MEM_STRSTR(6) - FIXED_MEM_STRSTR(7) - FIXED_MEM_STRSTR(8) - FIXED_MEM_STRSTR(9) - FIXED_MEM_STRSTR(10) - FIXED_MEM_STRSTR(11) - FIXED_MEM_STRSTR(12) - FIXED_MEM_STRSTR(13) - FIXED_MEM_STRSTR(14) - FIXED_MEM_STRSTR(15) - FIXED_MEM_STRSTR(16) - FIXED_MEM_STRSTR(17) - FIXED_MEM_STRSTR(18) - default: - result = detail::smidStrstrMemcmp(s, n, needle, k); - break; - } -#undef FIXED_MEM_STRSTR - // load_unaligned is used for better performance, so result maybe bigger than - // n-k. - if (result <= n - k) { - return result; - } else { - return std::string::npos; - } -} - } // namespace facebook::velox::simd diff --git a/velox/common/base/SimdUtil.h b/velox/common/base/SimdUtil.h index ba63d3c1d237..5230abe6ff1f 100644 --- a/velox/common/base/SimdUtil.h +++ b/velox/common/base/SimdUtil.h @@ -497,7 +497,8 @@ xsimd::batch reinterpretBatch(xsimd::batch, const A& = {}); template inline bool memEqualUnsafe(const void* x, const void* y, int32_t size); -size_t simdStrstr(const char* s, size_t n, const char* needle, size_t k); +FOLLY_ALWAYS_INLINE size_t +simdStrstr(const char* s, size_t n, const char* needle, size_t k); } // namespace facebook::velox::simd diff --git a/velox/common/base/benchmarks/CMakeLists.txt b/velox/common/base/benchmarks/CMakeLists.txt index 065db83c3672..a8e956f8542a 100644 --- a/velox/common/base/benchmarks/CMakeLists.txt +++ b/velox/common/base/benchmarks/CMakeLists.txt @@ -17,3 +17,9 @@ target_link_libraries( velox_common_base_benchmarks PUBLIC ${FOLLY_BENCHMARK} PRIVATE velox_common_base Folly::folly) + +add_executable(velox_common_stringsearch_benchmarks StringSearchBenchmark.cpp) +target_link_libraries( + velox_common_stringsearch_benchmarks + PUBLIC ${FOLLY_BENCHMARK} + PRIVATE velox_common_base Folly::folly) diff --git a/velox/common/base/benchmarks/StringSearchBenchmark.cpp b/velox/common/base/benchmarks/StringSearchBenchmark.cpp new file mode 100644 index 000000000000..562c1fe4c8be --- /dev/null +++ b/velox/common/base/benchmarks/StringSearchBenchmark.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "velox/common/base/SimdUtil.h" + +/// Copy Part code from +/// https://github.com/facebook/folly/blob/ce5edfb9b08ead9e78cb46879e7b9499861f7cd2/folly/test/FBStringTestBenchmarks.cpp.h +using namespace std; +using namespace folly; +static const int seed = folly::randomNumberSeed(); +using RandomT = std::mt19937; +static RandomT rng(seed); + +namespace facebook::velox { +template +Integral2 random(Integral1 low, Integral2 up) { + std::uniform_int_distribution<> range(low, up); + return range(rng); +} + +struct TestData { + std::string heyStack; + std::string needle; +}; + +TestData generateTestData(int hayStackSize, int needleSize) { + // Text courtesy (ahem) of + // http://www.psychologytoday.com/blog/career-transitions/200906/ + // the-dreaded-writing-sample + // 1028chars + const std::string s = + "\ +Even if you've mastered the art of the cover letter and the resume, \ +another part of the job search process can trip up an otherwise \ +qualified candidate: the writing sample.\n\ +\n\ +Strong writing and communication skills are highly sought after by \ +most employers. Whether crafting short emails or lengthy annual \ +reports, many workers use their writing skills every day. And for an \ +employer seeking proof behind that ubiquitous candidate \ +phrase,\"excellent communication skills\", a required writing sample \ +is invaluable.\n\ +\n\ +Writing samples need the same care and attention given to cover \ +letters and resumes. Candidates with otherwise impeccable credentials \ +are routinely eliminated by a poorly chosen writing sample. Notice I \ +said \"poorly chosen\" not \"poorly written.\" Because that's the rub: \ +a writing sample not only reveals the individual's writing skills, it \ +also offers a peek into what they consider important or relevant for \ +the position. If you miss that mark with your writing sample, don't \ +expect to get a call for an interview."; + auto pos = random(0, s.size() - hayStackSize); + std::string haystack = s.substr(pos, hayStackSize); + auto needlePos = random(0, hayStackSize - needleSize); + std::string needle = haystack.substr(needlePos, needleSize); + return TestData{std::move(haystack), std::move(needle)}; +} + +void findSuccessful( + size_t /*arg*/, + bool useStd, + size_t iters, + const TestData& testdata) { + if (useStd) { + FOR_EACH_RANGE (i, 0, iters) { + doNotOptimizeAway(testdata.heyStack.find(testdata.needle)); + } + } else { + FOR_EACH_RANGE (i, 0, iters) { + doNotOptimizeAway(simd::simdStrstr( + testdata.heyStack.data(), + testdata.heyStack.size(), + testdata.needle.data(), + testdata.needle.size())); + } + } +} + +/// Folly uses random test data for each iteration, but this cannot guarantee +/// that the data for each test of different algorithms is the same, so we use +/// the same random data for each comparison benchmark here. +TestData data50to5 = generateTestData(50, 5); +TestData data100to10 = generateTestData(100, 10); +TestData data100to20 = generateTestData(100, 20); +TestData data1000to10 = generateTestData(1000, 10); +TestData data1000to100 = generateTestData(1000, 100); + +BENCHMARK_NAMED_PARAM(findSuccessful, opt_50_5, false, 5242880, data50to5) +BENCHMARK_NAMED_PARAM(findSuccessful, opt_100_10, false, 5242880, data100to10) +BENCHMARK_NAMED_PARAM(findSuccessful, opt_100_20, false, 5242880, data100to20) +BENCHMARK_NAMED_PARAM(findSuccessful, opt_1k_10, false, 5242880, data1000to10) +BENCHMARK_NAMED_PARAM(findSuccessful, opt_1k_100, false, 5242880, data1000to100) +BENCHMARK_NAMED_PARAM(findSuccessful, std_50_5, true, 5242880, data50to5) +BENCHMARK_NAMED_PARAM(findSuccessful, std_100_10, true, 5242880, data100to10) +BENCHMARK_NAMED_PARAM(findSuccessful, std_100_20, true, 5242880, data100to20) +BENCHMARK_NAMED_PARAM(findSuccessful, std_1k_10, true, 5242880, data1000to10) +BENCHMARK_NAMED_PARAM(findSuccessful, std_1k_100, true, 5242880, data1000to100) + +/// std::find only handle fast-path for prefix-unmatch-char, if there is a +/// prefix-match-char(But in practice, it is a high probability event that a +/// first char match is successful.), performance drops significantly in such a +/// scenario. +TestData prefixMatch = { + "luffily close dugouts wake about the pinto beans. pending, ironic dependencies", + "b???"}; + +TestData prefixUnMatch = { + "luffily close dugouts wake about the pinto beans. pending, ironic dependencies", + "????"}; + +void findUnsuccessful( + size_t /*arg*/, + bool useStd, + size_t iters, + const TestData& test) { + const char* haystack = test.heyStack.data(); + const char* neddle = test.needle.data(); + int haystackSize = test.heyStack.size(); + int needleSize = test.needle.size(); + if (useStd) { + FOR_EACH_RANGE (i, 0, iters) { + doNotOptimizeAway(std::string_view(haystack, haystackSize) + .find(std::string_view(neddle, needleSize))); + } + } else { + FOR_EACH_RANGE (i, 0, iters) { + doNotOptimizeAway( + simd::simdStrstr(haystack, haystackSize, neddle, needleSize)); + } + } +} +BENCHMARK_NAMED_PARAM( + findUnsuccessful, + std_first_char_match, + true, + 52428800, + prefixMatch) +BENCHMARK_NAMED_PARAM( + findUnsuccessful, + opt_first_char_match, + false, + 52428800, + prefixMatch) +BENCHMARK_NAMED_PARAM( + findUnsuccessful, + std_first_char_unmatch, + true, + 52428800, + prefixUnMatch) +BENCHMARK_NAMED_PARAM( + findUnsuccessful, + opt_first_char_unmatch, + false, + 52428800, + prefixUnMatch) +} // namespace facebook::velox +int main(int argc, char** argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + folly::runBenchmarks(); + return 0; +} diff --git a/velox/common/base/tests/SimdUtilTest.cpp b/velox/common/base/tests/SimdUtilTest.cpp index 9dbebc060fb3..e8ea748299cb 100644 --- a/velox/common/base/tests/SimdUtilTest.cpp +++ b/velox/common/base/tests/SimdUtilTest.cpp @@ -109,6 +109,20 @@ class SimdUtilTest : public testing::Test { EXPECT_EQ(reference, target); } + template + Integral2 random(Integral1 low, Integral2 up) { + std::uniform_int_distribution<> range(low, up); + return range(rng_); + } + + void randomString(std::string* toFill, unsigned int maxSize = 1000) { + assert(toFill); + toFill->resize(random(0, maxSize)); + for (int i = 0; i < toFill->size(); i++) { + (*toFill)[i] = random('a', 'z'); + } + } + folly::Random::DefaultGenerator rng_; }; @@ -491,25 +505,154 @@ TEST_F(SimdUtilTest, memcpyTime) { LOG(INFO) << "simd=" << simd << " sys=" << sys; } -TEST_F(SimdUtilTest, testSimdStrStr) { - // 48 chars. +/// Copy from std::boyer_moore_searcher proposal: +/// https://github.com/mclow/search-library/blob/master/basic_tests.cpp +/// Basic sanity checking. It makes sure that all the algorithms work. +TEST_F(SimdUtilTest, basicSimdStrStr) { + auto checkOne = [](const std::string& text, const std::string& needle) { + auto size = text.size(); + auto k = needle.size(); + ASSERT_EQ( + simd::simdStrstr(text.data(), size, needle.data(), k), + text.find(needle)); + }; + std::string haystack1("NOW AN FOWE\220ER ANNMAN THE ANPANMANEND"); + std::string needle1("ANPANMAN"); + std::string needle2("MAN THE"); + std::string needle3("WE\220ER"); + // At the beginning + std::string needle4("NOW "); + // At the end + std::string needle5("NEND"); + // Nowhere + std::string needle6("NOT FOUND"); + // Nowhere + std::string needle7("NOT FO\340ND"); + + std::string haystack2("ABC ABCDAB ABCDABCDABDE"); + std::string needle11("ABCDABD"); + + std::string haystack3("abra abracad abracadabra"); + std::string needle12("abracadabra"); + + std::string needle13(""); + std::string haystack4(""); + + checkOne(haystack1, needle1); + checkOne(haystack1, needle2); + checkOne(haystack1, needle3); + checkOne(haystack1, needle4); + checkOne(haystack1, needle5); + checkOne(haystack1, needle6); + checkOne(haystack1, needle7); + + // Cant find long pattern in short corpus + checkOne(needle1, haystack1); + // Find something in itself + checkOne(haystack1, haystack1); + // Find something in itself + checkOne(haystack2, haystack2); + + checkOne(haystack2, needle11); + checkOne(haystack3, needle12); + // Find the empty string + checkOne(haystack1, needle13); + // Can't find in an empty haystack + checkOne(haystack4, needle1); + + // Comment copy from the origin code. + // Mikhail Levin found a problem, and this was the + // test that triggered it. + const std::string mikhailPattern = + "GATACACCTACCTTCACCAGTTACTCTATGCACTAGGTGCGCCAGGCCCATGCACAAGGGCTTGAGTGGATGGGAAGGA" + "TGTGCCCTAGTGATGGCAGCATAAGCTACGCAGAGAAGTTCCAGGGCAGAGTCACCATGACCAGGGACACATCCACGAG" + "CACAGCCTACATGGAGCTGAGCAGCCTGAGATCTGAAGACACGGCCATGTATTACTGTGGGAGAGATGTCTGGAGTGGT" + "TATTATTGCCCCGGTAATATTACTACTACTACTACTACATGGACGTCTGGGGCAAAGGGACCACG"; + const std::string mikhailCorpus = std::string(8, 'a') + mikhailPattern; + + checkOne(mikhailCorpus, mikhailPattern); +} + +TEST_F(SimdUtilTest, variableNeedleSize) { std::string s1 = "aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz"; std::string s2 = "aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz"; - std::string s3 = "xxx"; + std::string s3 = "01234567890123456789"; auto test = [](char* text, size_t size, char* needle, size_t k) { + if (simd::simdStrstr(text, size, needle, k) != + std::string_view(text, size).find(std::string_view(needle, k))) { + LOG(ERROR) << "text: " << std::string(text, size) + << " needle :" << std::string(needle, k); + } ASSERT_EQ( simd::simdStrstr(text, size, needle, k), std::string_view(text, size).find(std::string_view(needle, k))); }; - // Match cases : substrings in s2 should be a substring in s1. + // Match cases (prefix/middle/suffix): substrings in s2 should be a substring + // in s1. Choose different needle-size left from s2, testing prefix-match in + // s1. + for (int k = 0; k < s2.size(); k++) { + test(s1.data(), s1.size(), s2.data(), k); + } + // Choose different needle-size left from s2, testing middle-match in s1. for (int i = 0; i < 20; i++) { for (int k = 0; k < 28; k++) { char* data = s2.data() + i; test(s1.data(), s1.size(), data, k); } } - // Not match case : "xxx" not in s1. - test(s1.data(), s1.size(), s3.data(), s3.size()); + // Choose different needle-size right from s2, testing suffix-match in s1. + for (int k = 0; k < s2.size(); k++) { + char* data = s2.data() + s2.size() - k; + test(s1.data(), s1.size(), data, k); + } + // Not match case : substring in s3 not in s1. + for (auto k = 0; k < s3.size(); k++) { + test(s1.data(), s1.size(), s3.data(), k); + } + + // FirstBlock match + for (auto k = 0; k < s3.size(); k++) { + std::string somePrefix = "xxxxxx"; + std::string matchString = "a" + std::string(k, 'x'); + std::string someSuffix = "yyyyyyyy"; + std::string text = somePrefix + matchString + someSuffix; + auto s = "a" + std::string(k, '9'); + test(text.data(), text.size(), s.data(), s.size()); + } + // FirstBlock and LastBlock match + for (auto k = 0; k < s3.size(); k++) { + std::string somePrefix = "xxxxxx"; + std::string matchString = "a" + std::string(k, 'x') + "b"; + std::string someSuffix = "yyyyyyyy"; + std::string text = somePrefix + matchString + someSuffix; + auto s = "a" + std::string(k, '9') + "b"; + test(text.data(), text.size(), s.data(), s.size()); + } +} + +/// Copy from +/// https://github.com/facebook/folly/blob/ce5edfb9b08ead9e78cb46879e7b9499861f7cd2/folly/test/FBStringTest.cpp#L1277 +/// clause11_21_4_7_2_a1 +TEST_F(SimdUtilTest, randomStringStrStr) { + std::string test; + const int kTestLoop = 1000; + auto checkOne = + [](const std::string& text, const std::string& needle, size_t pos) { + auto size = text.length() - pos; + auto textPtr = text.data() + pos; + auto k = needle.size(); + ASSERT_EQ( + simd::simdStrstr(textPtr, size, needle.data(), k), + text.substr(pos).find(needle)); + }; + for (int i = 0; i < kTestLoop; i++) { + // clause11_21_4_7_2_a1 + randomString(&test); + auto from = random(0, test.size()); + auto length = random(0, test.size() - from); + std::string str = test.substr(from, length); + checkOne(test, str, random(0, test.size())); + } } } // namespace