From 170a143d543bd2cc828f5678c7df9b38927901d2 Mon Sep 17 00:00:00 2001 From: Hudd Date: Mon, 11 Mar 2024 21:14:29 +0400 Subject: [PATCH] unicode: add utf8::skip Moving random bits of code into awlib. I remember needing this in the past, so I'll add it here. --- utility/include/aw/utility/unicode/utf8.h | 37 +++++++++++++++-------- utility/tests/utf8toutf16.c++ | 10 ++++++ 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/utility/include/aw/utility/unicode/utf8.h b/utility/include/aw/utility/unicode/utf8.h index e5f5f3e0..c9869b0a 100644 --- a/utility/include/aw/utility/unicode/utf8.h +++ b/utility/include/aw/utility/unicode/utf8.h @@ -140,18 +140,6 @@ struct codec { return output; } - /*! - * Advance to the next sequence of code units - */ - template - static Iterator next(Iterator input, Iterator end) - { - while ((input < end) && !is_head(*input) && !is_ascii(*input)) - ++input; - - return input; - } - /*! * Decode UTF-8 sequence, if valid */ @@ -207,6 +195,31 @@ struct codec { return input; } + + /*! + * Advance to the next sequence of code units + */ + template + static Iterator next(Iterator input, Iterator end) + { + while ((input < end) && !is_head(*input) && !is_ascii(*input)) + ++input; + + return input; + } + + /*! + * Skip n code points + */ + template + static Iterator skip(Iterator input, Iterator end, size_t n) + { + while (input < end && n > 0) { + input = next(std::next(input), end); + --n; + } + return input; + } }; } // namespace aw::unicode::utf8 #endif//aw_utility_utf8_h diff --git a/utility/tests/utf8toutf16.c++ b/utility/tests/utf8toutf16.c++ index 008b7782..088f3d60 100644 --- a/utility/tests/utf8toutf16.c++ +++ b/utility/tests/utf8toutf16.c++ @@ -4,6 +4,16 @@ TestFile("utf8toutf16"); namespace aw { +Test(utf8skip) { + std::string s1 = "αβγ"; + std::string s2 = "こんにちは"; + + auto it1 = aw::unicode::utf8::codec::skip(begin(s1), end(s1), 2); + auto it2 = aw::unicode::utf8::codec::skip(begin(s2), end(s2), 4); + TestEqual(std::string(it1, end(s1)), "γ"); + TestEqual(std::string(it2, end(s2)), "は"); +} + Test(utf8toutf16) { const std::pair utf_strings[] = {