From 1a2a31b4dbdb507dac95e97c45b71db676e3c588 Mon Sep 17 00:00:00 2001 From: Rauli Laine Date: Thu, 8 Feb 2024 16:37:01 +0200 Subject: [PATCH] Modernize things a bit Instead of header guard macros, use `#pragma once`, use `std::array` instead of C style arrays and so on. Generally prepare for 1.0.0 release. --- .github/workflows/build.yml | 14 +- .gitignore | 1 + CMakeLists.txt | 4 +- Doxyfile | 2 +- LICENSE.md | 6 +- README.md | 102 ++++++++- include/peelo/unicode/ctype.hpp | 7 +- include/peelo/unicode/ctype/_utils.hpp | 53 +++++ include/peelo/unicode/ctype/isalnum.hpp | 28 +-- include/peelo/unicode/ctype/isalpha.hpp | 28 +-- include/peelo/unicode/ctype/isblank.hpp | 28 +-- include/peelo/unicode/ctype/iscntrl.hpp | 28 +-- include/peelo/unicode/ctype/isdigit.hpp | 28 +-- include/peelo/unicode/ctype/isgraph.hpp | 28 +-- include/peelo/unicode/ctype/islower.hpp | 28 +-- include/peelo/unicode/ctype/isprint.hpp | 28 +-- include/peelo/unicode/ctype/ispunct.hpp | 28 +-- include/peelo/unicode/ctype/isspace.hpp | 28 +-- include/peelo/unicode/ctype/isupper.hpp | 28 +-- include/peelo/unicode/ctype/isvalid.hpp | 10 +- include/peelo/unicode/ctype/isxdigit.hpp | 10 +- include/peelo/unicode/ctype/tolower.hpp | 10 +- include/peelo/unicode/ctype/toupper.hpp | 10 +- include/peelo/unicode/encoding.hpp | 17 +- include/peelo/unicode/encoding/_utils.hpp | 132 +++++++++++ include/peelo/unicode/encoding/utf16be.hpp | 214 ++++++++--------- include/peelo/unicode/encoding/utf16le.hpp | 217 ++++++++---------- include/peelo/unicode/encoding/utf32be.hpp | 183 +++++++-------- include/peelo/unicode/encoding/utf32le.hpp | 183 +++++++-------- include/peelo/unicode/encoding/utf8.hpp | 252 ++++++++++----------- test/CMakeLists.txt | 14 ++ test/test_ctype.cpp | 77 +++---- test/test_utf16be.cpp | 38 +--- test/test_utf16le.cpp | 38 +--- test/test_utf32be.cpp | 38 +--- test/test_utf32le.cpp | 46 +--- test/test_utf8.cpp | 44 +--- 37 files changed, 985 insertions(+), 1045 deletions(-) create mode 100644 include/peelo/unicode/ctype/_utils.hpp create mode 100644 include/peelo/unicode/encoding/_utils.hpp diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a7e3d10..a56e03a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -9,10 +9,10 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@v2 - - name: Build - uses: ashutoshvarma/action-cmake-build@master - with: - build-dir: ${{ runner.workspace }}/build - build-type: Release - run-test: true + - uses: actions/checkout@v4 + - name: Build + uses: ashutoshvarma/action-cmake-build@master + with: + build-dir: ${{ runner.workspace }}/build + build-type: Release + run-test: true diff --git a/.gitignore b/.gitignore index 770ac09..3030848 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +/.vscode /build /doxygen *.o diff --git a/CMakeLists.txt b/CMakeLists.txt index b6318b0..1048bb9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,8 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 3.12) +CMAKE_MINIMUM_REQUIRED(VERSION 3.6) PROJECT( PeeloUnicode - VERSION 0.2.0 + VERSION 1.0.0 DESCRIPTION "Header only C++ Unicode utilities." HOMEPAGE_URL "https://github.com/peelonet/peelo-unicode" LANGUAGES CXX diff --git a/Doxyfile b/Doxyfile index 0c4f84f..bfa5bfb 100644 --- a/Doxyfile +++ b/Doxyfile @@ -858,7 +858,7 @@ EXCLUDE_PATTERNS = # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* -EXCLUDE_SYMBOLS = *::internal +EXCLUDE_SYMBOLS = *::utils # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include diff --git a/LICENSE.md b/LICENSE.md index 6e160e7..b2fd8d5 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,13 +1,13 @@ -Copyright (c) 2018, peelo.net +Copyright (c) 2018-2024, peelo.net All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright notice, this +- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, +- Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. diff --git a/README.md b/README.md index 177ee4f..1700237 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,108 @@ ![Build](https://github.com/peelonet/peelo-unicode/workflows/Build/badge.svg) -Collection of various Unicode related utility functions for C++17. +Collection of simple to use [Unicode] utilities for C++17. [Doxygen generated API documentation.][API] +[Unicode]: https://en.wikipedia.org/wiki/Unicode [API]: https://peelonet.github.io/peelo-unicode/index.html + +## Character testing functions + +The library ships with Unicode version of [ctype.h] header, containing +following functions inside `peelo::unicode::ctype` namespace: + +- `isalnum()` +- `isalpha()` +- `isblank()` +- `iscntrl()` +- `isdigit()` +- `isgraph()` +- `islower()` +- `isprint()` +- `ispunct()` +- `isspace()` +- `isupper()` +- `isxdigit()` +- `tolower()` +- `toupper()` +- And additional `isvalid()` function which tests whether given value is valid + Unicode codepoint. + +[ctype.h]: https://en.cppreference.com/w/cpp/header/cctype + +### Example + +```cpp +#include +#include + +int +main() +{ + using namespace peelo::unicode::ctype; + + std::cout << isalnum(U'Ä') << std::endl; + std::cout << isdigit(U'൧') << std::endl; + std::cout << isgraph(U'€') << std::endl; + std::cout << ispunct(U'\u2001') << std::endl; + std::cout << std::hex; + std::cout << tolower(U'Ä') << std::endl; + std::cout << toupper(U'ä') << std::endl; +} +``` + +## Character encodings + +The library also provides functions for encoding and decoding Unicode character +encodings. Both validating and non-validating (where all encoding/decoding +errors are ignored) functions are provided. + +Supported character encodings are: + +- [UTF-8] +- [UTF-16BE][UTF-16] +- [UTF-16LE][UTF-16] +- [UTF-32BE][UTF-32] +- [UTF-32LE][UTF-32] + +[UTF-8]: https://en.wikipedia.org/wiki/UTF-8 +[UTF-16]: https://en.wikipedia.org/wiki/UTF-16 +[UTF-32]: https://en.wikipedia.org/wiki/UTF-32 + +### Example + +```cpp +#include + +int +main() +{ + using namespace peelo::unicode::encoding; + + // Decode UTF-8 input, ignoring any decoding errors. + std::u32string utf8_decoded = utf8::decode("\xe2\x82\xac"); + + // Encode it back to byte string, ignoring any encoding errors. + std::string utf8_encoded = utf8::encode(utf8_decoded); + + // Decode UTF-32BE input with validation. + std::u32string utf32be_decoded; + if (utf32be::decode_validate("\x00\x00 \xac", utf32be_decoded)) + { + // Given input is valid UTF-32BE. + } else { + // Given input is invalid UTF-32BE. + } + + // Encode it back to byte string, with validation. + std::string utf32be_encoded; + if (utf32be::encode_validate(utf32be_decoded, utf32be_encoded)) + { + // Given input contained only valid Unicode code points. + } else { + // Given input contained invalid Unicode code points. + } +} +``` diff --git a/include/peelo/unicode/ctype.hpp b/include/peelo/unicode/ctype.hpp index a681f3b..d952b62 100644 --- a/include/peelo/unicode/ctype.hpp +++ b/include/peelo/unicode/ctype.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,8 +24,7 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_HPP_GUARD -#define PEELO_UNICODE_CTYPE_HPP_GUARD +#pragma once #include #include @@ -42,5 +41,3 @@ #include #include #include - -#endif /* !PEELO_UNICODE_CTYPE_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/_utils.hpp b/include/peelo/unicode/ctype/_utils.hpp new file mode 100644 index 0000000..ef732bb --- /dev/null +++ b/include/peelo/unicode/ctype/_utils.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018-2024, peelo.net + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#pragma once + +#include +#include + +namespace peelo::unicode::ctype::utils +{ + using range = std::pair; + + template + inline bool table_lookup(const std::array& table, char32_t c) + { + const auto size = table.size(); + + for (std::size_t i = 0; i < size; ++i) + { + const auto& range = table[i]; + + if (c >= range.first && c <= range.second) + { + return true; + } + } + + return false; + } +} diff --git a/include/peelo/unicode/ctype/isalnum.hpp b/include/peelo/unicode/ctype/isalnum.hpp index ed96229..5d7fe7d 100644 --- a/include/peelo/unicode/ctype/isalnum.hpp +++ b/include/peelo/unicode/ctype/isalnum.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,18 +24,20 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISALNUM_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISALNUM_HPP_GUARD +#pragma once + +#include namespace peelo::unicode::ctype { /** * Determines whether the given Unicode code point is alphanumeric. */ - inline bool isalnum(char32_t c) + inline bool + isalnum(char32_t c) { - static const char32_t alnum_table[436][2] = - { + static const std::array alnum_table = + {{ { 0x0030, 0x0039 }, { 0x0041, 0x005a }, { 0x0061, 0x007a }, { 0x00aa, 0x00aa }, { 0x00b5, 0x00b5 }, { 0x00ba, 0x00ba }, { 0x00c0, 0x00d6 }, { 0x00d8, 0x00f6 }, { 0x00f8, 0x0241 }, @@ -182,18 +184,8 @@ namespace peelo::unicode::ctype { 0x1d78a, 0x1d7a8 }, { 0x1d7aa, 0x1d7c2 }, { 0x1d7c4, 0x1d7c9 }, { 0x1d7ce, 0x1d7ff }, { 0x20000, 0x2a6d6 }, { 0x2f800, 0x2fa1d }, { 0xe0100, 0xe01ef } - }; - - for (int i = 0; i < 436; ++i) - { - if (c >= alnum_table[i][0] && c <= alnum_table[i][1]) - { - return true; - } - } + }}; - return false; + return utils::table_lookup(alnum_table, c); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISALNUM_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/isalpha.hpp b/include/peelo/unicode/ctype/isalpha.hpp index 8086fe1..13b09d5 100644 --- a/include/peelo/unicode/ctype/isalpha.hpp +++ b/include/peelo/unicode/ctype/isalpha.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,18 +24,20 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISALPHA_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISALPHA_HPP_GUARD +#pragma once + +#include namespace peelo::unicode::ctype { /** * Determines whether the given Unicode code point is alphabetic. */ - inline bool isalpha(char32_t c) + inline bool + isalpha(char32_t c) { - static const char32_t alpha_table[418][2] = - { + static const std::array alpha_table = + {{ { 0x0041, 0x005a }, { 0x0061, 0x007a }, { 0x00aa, 0x00aa }, { 0x00b5, 0x00b5 }, { 0x00ba, 0x00ba }, { 0x00c0, 0x00d6 }, { 0x00d8, 0x00f6 }, { 0x00f8, 0x0241 }, { 0x0250, 0x02c1 }, @@ -176,18 +178,8 @@ namespace peelo::unicode::ctype { 0x1d770, 0x1d788 }, { 0x1d78a, 0x1d7a8 }, { 0x1d7aa, 0x1d7c2 }, { 0x1d7c4, 0x1d7c9 }, { 0x20000, 0x2a6d6 }, { 0x2f800, 0x2fa1d }, { 0xe0100, 0xe01ef } - }; - - for (int i = 0; i < 418; ++i) - { - if (c >= alpha_table[i][0] && c <= alpha_table[i][1]) - { - return true; - } - } + }}; - return false; + return utils::table_lookup(alpha_table, c); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISALPHA_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/isblank.hpp b/include/peelo/unicode/ctype/isblank.hpp index 8606439..1ee7282 100644 --- a/include/peelo/unicode/ctype/isblank.hpp +++ b/include/peelo/unicode/ctype/isblank.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,33 +24,25 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISBLANK_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISBLANK_HPP_GUARD +#pragma once + +#include namespace peelo::unicode::ctype { /** * Determines whether the given Unicode code point is a blank character. */ - inline bool isblank(char32_t c) + inline bool + isblank(char32_t c) { - static const char32_t blank_table[9][2] = - { + static const std::array blank_table = + {{ { 0x0009, 0x0009 }, { 0x0020, 0x0020 }, { 0x00a0, 0x00a0 }, { 0x1680, 0x1680 }, { 0x180e, 0x180e }, { 0x2000, 0x200a }, { 0x202f, 0x202f }, { 0x205f, 0x205f }, { 0x3000, 0x3000 } - }; - - for (int i = 0; i < 9; ++i) - { - if (c >= blank_table[i][0] && c <= blank_table[i][1]) - { - return true; - } - } + }}; - return false; + return utils::table_lookup(blank_table, c); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISBLANK_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/iscntrl.hpp b/include/peelo/unicode/ctype/iscntrl.hpp index f014d92..bb2a00a 100644 --- a/include/peelo/unicode/ctype/iscntrl.hpp +++ b/include/peelo/unicode/ctype/iscntrl.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,18 +24,20 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISCNTRL_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISCNTRL_HPP_GUARD +#pragma once + +#include namespace peelo::unicode::ctype { /** * Determines whether the given Unicode code point is a control character. */ - inline bool iscntrl(char32_t c) + inline bool + iscntrl(char32_t c) { - static const char32_t cntrl_table[19][2] = - { + static const std::array cntrl_table = + {{ { 0x0000, 0x001f }, { 0x007f, 0x009f }, { 0x00ad, 0x00ad }, { 0x0600, 0x0603 }, { 0x06dd, 0x06dd }, { 0x070f, 0x070f }, { 0x17b4, 0x17b5 }, { 0x200b, 0x200f }, { 0x202a, 0x202e }, @@ -43,18 +45,8 @@ namespace peelo::unicode::ctype { 0xfeff, 0xfeff }, { 0xfff9, 0xfffb }, { 0x1d173, 0x1d17a }, { 0xe0001, 0xe0001 }, { 0xe0020, 0xe007f }, { 0xf0000, 0xffffd }, { 0x100000, 0x10fffd } - }; - - for (int i = 0; i < 19; ++i) - { - if (c >= cntrl_table[i][0] && c <= cntrl_table[i][1]) - { - return true; - } - } + }}; - return false; + return utils::table_lookup(cntrl_table, c); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISCNTRL_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/isdigit.hpp b/include/peelo/unicode/ctype/isdigit.hpp index 0cc9c82..8aaac9f 100644 --- a/include/peelo/unicode/ctype/isdigit.hpp +++ b/include/peelo/unicode/ctype/isdigit.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,18 +24,20 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISDIGIT_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISDIGIT_HPP_GUARD +#pragma once + +#include namespace peelo::unicode::ctype { /** * Determines whether the given Unicode code point is a digit. */ - inline bool isdigit(char32_t c) + inline bool + isdigit(char32_t c) { - static const char32_t digit_table[23][2] = - { + static const std::array digit_table = + {{ { 0x0030, 0x0039 }, { 0x0660, 0x0669 }, { 0x06f0, 0x06f9 }, { 0x0966, 0x096f }, { 0x09e6, 0x09ef }, { 0x0a66, 0x0a6f }, { 0x0ae6, 0x0aef }, { 0x0b66, 0x0b6f }, { 0x0be6, 0x0bef }, @@ -44,18 +46,8 @@ namespace peelo::unicode::ctype { 0x1040, 0x1049 }, { 0x17e0, 0x17e9 }, { 0x1810, 0x1819 }, { 0x1946, 0x194f }, { 0x19d0, 0x19d9 }, { 0xff10, 0xff19 }, { 0x104a0, 0x104a9 }, { 0x1d7ce, 0x1d7ff } - }; - - for (int i = 0; i < 23; ++i) - { - if (c >= digit_table[i][0] && c <= digit_table[i][1]) - { - return true; - } - } + }}; - return false; + return utils::table_lookup(digit_table, c); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISDIGIT_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/isgraph.hpp b/include/peelo/unicode/ctype/isgraph.hpp index 19b4f45..4584520 100644 --- a/include/peelo/unicode/ctype/isgraph.hpp +++ b/include/peelo/unicode/ctype/isgraph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,8 +24,9 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISGRAPH_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISGRAPH_HPP_GUARD +#pragma once + +#include namespace peelo::unicode::ctype { @@ -33,10 +34,11 @@ namespace peelo::unicode::ctype * Determines whether the given Unicode code point is a graphical * character. */ - inline bool isgraph(char32_t c) + inline bool + isgraph(char32_t c) { - static const char32_t graph_table[424][2] = - { + static const std::array graph_table = + {{ { 0x0021, 0x007e }, { 0x00a1, 0x0241 }, { 0x0250, 0x036f }, { 0x0374, 0x0375 }, { 0x037a, 0x037a }, { 0x037e, 0x037e }, { 0x0384, 0x038a }, { 0x038c, 0x038c }, { 0x038e, 0x03a1 }, @@ -179,18 +181,8 @@ namespace peelo::unicode::ctype { 0x20000, 0x2a6d6 }, { 0x2f800, 0x2fa1d }, { 0xe0001, 0xe0001 }, { 0xe0020, 0xe007f }, { 0xe0100, 0xe01ef }, { 0xf0000, 0xffffd }, { 0x100000, 0x10fffd }, - }; - - for (int i = 0; i < 424; ++i) - { - if (c >= graph_table[i][0] && c <= graph_table[i][1]) - { - return true; - } - } + }}; - return false; + return utils::table_lookup(graph_table, c); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISGRAPH_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/islower.hpp b/include/peelo/unicode/ctype/islower.hpp index aa905f5..8860869 100644 --- a/include/peelo/unicode/ctype/islower.hpp +++ b/include/peelo/unicode/ctype/islower.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,18 +24,20 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISLOWER_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISLOWER_HPP_GUARD +#pragma once + +#include namespace peelo::unicode::ctype { /** * Determines whether the given Unicode code point is lowercase. */ - inline bool islower(char32_t c) + inline bool + islower(char32_t c) { - static const char32_t lower_table[480][2] = - { + static const std::array lower_table = + {{ { 0x0061, 0x007a }, { 0x00aa, 0x00aa }, { 0x00b5, 0x00b5 }, { 0x00ba, 0x00ba }, { 0x00df, 0x00f6 }, { 0x00f8, 0x00ff }, { 0x0101, 0x0101 }, { 0x0103, 0x0103 }, { 0x0105, 0x0105 }, @@ -196,18 +198,8 @@ namespace peelo::unicode::ctype { 0x1d6dc, 0x1d6e1 }, { 0x1d6fc, 0x1d714 }, { 0x1d716, 0x1d71b }, { 0x1d736, 0x1d74e }, { 0x1d750, 0x1d755 }, { 0x1d770, 0x1d788 }, { 0x1d78a, 0x1d78f }, { 0x1d7aa, 0x1d7c2 }, { 0x1d7c4, 0x1d7c9 } - }; - - for (int i = 0; i < 480; ++i) - { - if (c >= lower_table[i][0] && c <= lower_table[i][1]) - { - return true; - } - } + }}; - return false; + return utils::table_lookup(lower_table, c); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISLOWER_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/isprint.hpp b/include/peelo/unicode/ctype/isprint.hpp index ebc331a..a77a9ab 100644 --- a/include/peelo/unicode/ctype/isprint.hpp +++ b/include/peelo/unicode/ctype/isprint.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,18 +24,20 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISPRINT_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISPRINT_HPP_GUARD +#pragma once + +#include namespace peelo::unicode::ctype { /** * Determines whether the given Unicode code point is a printing character. */ - inline bool isprint(char32_t c) + inline bool + isprint(char32_t c) { - static const char32_t print_table[423][2] = - { + static const std::array print_table = + {{ { 0x0009, 0x000d }, { 0x0020, 0x007e }, { 0x0085, 0x0085 }, { 0x00a0, 0x0241 }, { 0x0250, 0x036f }, { 0x0374, 0x0375 }, { 0x037a, 0x037a }, { 0x037e, 0x037e }, { 0x0384, 0x038a }, @@ -177,18 +179,8 @@ namespace peelo::unicode::ctype { 0x1d6a8, 0x1d7c9 }, { 0x1d7ce, 0x1d7ff }, { 0x20000, 0x2a6d6 }, { 0x2f800, 0x2fa1d }, { 0xe0001, 0xe0001 }, { 0xe0020, 0xe007f }, { 0xe0100, 0xe01ef }, { 0xf0000, 0xffffd }, { 0x100000, 0x10fffd }, - }; - - for (int i = 0; i < 423; ++i) - { - if (c >= print_table[i][0] && c <= print_table[i][1]) - { - return true; - } - } + }}; - return false; + return utils::table_lookup(print_table, c); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISPRINT_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/ispunct.hpp b/include/peelo/unicode/ctype/ispunct.hpp index 7af21b9..2db201d 100644 --- a/include/peelo/unicode/ctype/ispunct.hpp +++ b/include/peelo/unicode/ctype/ispunct.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,8 +24,9 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISPUNCT_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISPUNCT_HPP_GUARD +#pragma once + +#include namespace peelo::unicode::ctype { @@ -33,10 +34,11 @@ namespace peelo::unicode::ctype * Determines whether the given Unicode code point is a punctuation * character. */ - inline bool ispunct(char32_t c) + inline bool + ispunct(char32_t c) { - static const char32_t punct_table[96][2] = - { + static const std::array punct_table = + {{ { 0x0021, 0x0023 }, { 0x0025, 0x002a }, { 0x002c, 0x002f }, { 0x003a, 0x003b }, { 0x003f, 0x0040 }, { 0x005b, 0x005d }, { 0x005f, 0x005f }, { 0x007b, 0x007b }, { 0x007d, 0x007d }, @@ -69,18 +71,8 @@ namespace peelo::unicode::ctype { 0xff1f, 0xff20 }, { 0xff3b, 0xff3d }, { 0xff3f, 0xff3f }, { 0xff5b, 0xff5b }, { 0xff5d, 0xff5d }, { 0xff5f, 0xff65 }, { 0x10100, 0x10101 }, { 0x1039f, 0x1039f }, { 0x10a50, 0x10a58 } - }; - - for (int i = 0; i < 96; ++i) - { - if (c >= punct_table[i][0] && c <= punct_table[i][1]) - { - return true; - } - } + }}; - return false; + return utils::table_lookup(punct_table, c); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISPUNCT_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/isspace.hpp b/include/peelo/unicode/ctype/isspace.hpp index b1579cc..eead878 100644 --- a/include/peelo/unicode/ctype/isspace.hpp +++ b/include/peelo/unicode/ctype/isspace.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,36 +24,28 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISSPACE_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISSPACE_HPP_GUARD +#pragma once + +#include namespace peelo::unicode::ctype { /** * Determines whether the given Unicode code point is a space character. */ - inline bool isspace(char32_t c) + inline bool + isspace(char32_t c) { - static const char32_t space_table[11][2] = - { + static const std::array space_table = + {{ { 0x0009, 0x000d }, { 0x0020, 0x0020 }, { 0x0085, 0x0085 }, { 0x00a0, 0x00a0 }, { 0x1680, 0x1680 }, { 0x180e, 0x180e }, { 0x2000, 0x200a }, { 0x2028, 0x2029 }, { 0x202f, 0x202f }, { 0x205f, 0x205f }, { 0x3000, 0x3000 } - }; - - for (int i = 0; i < 11; ++i) - { - if (c >= space_table[i][0] && c <= space_table[i][1]) - { - return true; - } - } + }}; - return false; + return utils::table_lookup(space_table, c); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISSPACE_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/isupper.hpp b/include/peelo/unicode/ctype/isupper.hpp index 63d7f84..9bd1e40 100644 --- a/include/peelo/unicode/ctype/isupper.hpp +++ b/include/peelo/unicode/ctype/isupper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,8 +24,9 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISUPPER_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISUPPER_HPP_GUARD +#pragma once + +#include namespace peelo::unicode::ctype { @@ -33,10 +34,11 @@ namespace peelo::unicode::ctype * Determines whether the given Unicode code point is an uppercase * character. */ - inline bool isupper(char32_t c) + inline bool + isupper(char32_t c) { - static const char32_t upper_table[476][2] = - { + static const std::array upper_table = + {{ { 0x0041, 0x005a }, { 0x00c0, 0x00d6 }, { 0x00d8, 0x00de }, { 0x0100, 0x0100 }, { 0x0102, 0x0102 }, { 0x0104, 0x0104 }, { 0x0106, 0x0106 }, { 0x0108, 0x0108 }, { 0x010a, 0x010a }, @@ -196,18 +198,8 @@ namespace peelo::unicode::ctype { 0x1d608, 0x1d621 }, { 0x1d63c, 0x1d655 }, { 0x1d670, 0x1d689 }, { 0x1d6a8, 0x1d6c0 }, { 0x1d6e2, 0x1d6fa }, { 0x1d71c, 0x1d734 }, { 0x1d756, 0x1d76e }, { 0x1d790, 0x1d7a8 } - }; - - for (int i = 0; i < 476; ++i) - { - if (c >= upper_table[i][0] && c <= upper_table[i][1]) - { - return true; - } - } + }}; - return false; + return utils::table_lookup(upper_table, c); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISUPPER_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/isvalid.hpp b/include/peelo/unicode/ctype/isvalid.hpp index cdd0fd3..d1e622e 100644 --- a/include/peelo/unicode/ctype/isvalid.hpp +++ b/include/peelo/unicode/ctype/isvalid.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,15 +24,15 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISVALID_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISVALID_HPP_GUARD +#pragma once namespace peelo::unicode::ctype { /** * Determines whether given Unicode code point is valid or not. */ - inline bool isvalid(char32_t c) + inline bool + isvalid(char32_t c) { return !(c > 0x10ffff || (c & 0xfffe) == 0xfffe @@ -40,5 +40,3 @@ namespace peelo::unicode::ctype || (c >= 0xfdd0 && c <= 0xfdef)); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISVALID_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/isxdigit.hpp b/include/peelo/unicode/ctype/isxdigit.hpp index b30c04e..13f839e 100644 --- a/include/peelo/unicode/ctype/isxdigit.hpp +++ b/include/peelo/unicode/ctype/isxdigit.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,8 +24,7 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_ISXDIGIT_HPP_GUARD -#define PEELO_UNICODE_CTYPE_ISXDIGIT_HPP_GUARD +#pragma once namespace peelo::unicode::ctype { @@ -33,12 +32,11 @@ namespace peelo::unicode::ctype * Determines whether the given Unicode code point is a hexadecimal * character. */ - inline bool isxdigit(char32_t c) + inline bool + isxdigit(char32_t c) { return (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') || (c >= '0' && c <= '9'); } } - -#endif /* !PEELO_UNICODE_CTYPE_ISXDIGIT_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/tolower.hpp b/include/peelo/unicode/ctype/tolower.hpp index 29c3004..d4411c0 100644 --- a/include/peelo/unicode/ctype/tolower.hpp +++ b/include/peelo/unicode/ctype/tolower.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,15 +24,15 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_TOLOWER_HPP_GUARD -#define PEELO_UNICODE_CTYPE_TOLOWER_HPP_GUARD +#pragma once namespace peelo::unicode::ctype { /** * Converts given Unicode code point into lowercase. */ - inline char32_t tolower(char32_t c) + inline char32_t + tolower(char32_t c) { if (c >= 'A' && c <= 'Z') { @@ -105,5 +105,3 @@ namespace peelo::unicode::ctype return c; } } - -#endif /* !PEELO_UNICODE_CTYPE_TOLOWER_HPP_GUARD */ diff --git a/include/peelo/unicode/ctype/toupper.hpp b/include/peelo/unicode/ctype/toupper.hpp index 768307f..4bd85eb 100644 --- a/include/peelo/unicode/ctype/toupper.hpp +++ b/include/peelo/unicode/ctype/toupper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,15 +24,15 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_CTYPE_TOUPPER_HPP_GUARD -#define PEELO_UNICODE_CTYPE_TOUPPER_HPP_GUARD +#pragma once namespace peelo::unicode::ctype { /** * Converts given Unicode code point into uppercase. */ - inline char32_t toupper(char32_t c) + inline char32_t + toupper(char32_t c) { if (c >= 'a' && c <= 'z') { @@ -105,5 +105,3 @@ namespace peelo::unicode::ctype return c; } } - -#endif /* !PEELO_UNICODE_CTYPE_TOUPPER_HPP_GUARD */ diff --git a/include/peelo/unicode/encoding.hpp b/include/peelo/unicode/encoding.hpp index 966fc43..5484e73 100644 --- a/include/peelo/unicode/encoding.hpp +++ b/include/peelo/unicode/encoding.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,13 +24,10 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_ENCODING_HPP_GUARD -#define PEELO_UNICODE_ENCODING_HPP_GUARD +#pragma once -#include -#include -#include -#include -#include - -#endif /* !PEELO_UNICODE_ENCODING_HPP_GUARD */ +#include +#include +#include +#include +#include diff --git a/include/peelo/unicode/encoding/_utils.hpp b/include/peelo/unicode/encoding/_utils.hpp new file mode 100644 index 0000000..14e553d --- /dev/null +++ b/include/peelo/unicode/encoding/_utils.hpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2018-2024, peelo.net + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#pragma once + +#include +#include + +#include + +namespace peelo::unicode::encoding::utils +{ + using encode_callback = void(*)( + char32_t codepoint, + std::string& output + ); + using decode_callback = bool(*)( + const char* input, + std::size_t& i, + const std::size_t length, + char32_t& result + ); + + inline std::string + encode( + const char32_t* input, + std::size_t length, + encode_callback callback + ) + { + std::string result; + + for (std::size_t i = 0; i < length; ++i) + { + const auto& c = input[i]; + + if (!ctype::isvalid(c)) + { + continue; + } + callback(c, result); + } + + return result; + } + + inline bool + encode_validate( + const char32_t* input, + std::size_t length, + std::string& output, + encode_callback callback + ) + { + for (std::size_t i = 0; i < length; ++i) + { + const auto& c = input[i]; + + if (!ctype::isvalid(c)) + { + return false; + } + callback(c, output); + } + + return true; + } + + inline std::u32string + decode(const char* input, std::size_t length, decode_callback callback) + { + std::u32string result; + + for (std::size_t i = 0; i < length;) + { + char32_t c; + + if (!callback(input, i, length, c)) + { + continue; + } + result.append(1, c); + } + + return result; + } + + inline bool + decode_validate( + const char* input, + std::size_t length, + std::u32string& output, + decode_callback callback + ) + { + for (std::size_t i = 0; i < length;) + { + char32_t c; + + if (!callback(input, i, length, c)) + { + return false; + } + output.append(1, c); + } + + return true; + } +} diff --git a/include/peelo/unicode/encoding/utf16be.hpp b/include/peelo/unicode/encoding/utf16be.hpp index fd88d2c..6d54321 100644 --- a/include/peelo/unicode/encoding/utf16be.hpp +++ b/include/peelo/unicode/encoding/utf16be.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,30 +24,27 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#pragma once -#include +#include namespace peelo::unicode::encoding::utf16be { - namespace internal + inline void + encode_codepoint(char32_t c, std::string& output) { - inline void encode_codepoint(char32_t c, std::string& output) + if (c > 0xffff) { - if (c > 0xffff) - { - const auto high = (c >> 10) + 0xd7c0; - const auto low = (c & 0x3ff) + 0xdc00; - - output.append(1, static_cast((high >> 8) & 0xff)); - output.append(1, static_cast(high & 0xff)); - output.append(1, static_cast((low >> 8) & 0xff)); - output.append(1, static_cast(low & 0xff)); - } else { - output.append(1, static_cast((c & 0xff00) >> 8)); - output.append(1, static_cast(c & 0xff)); - } + const auto high = (c >> 10) + 0xd7c0; + const auto low = (c & 0x3ff) + 0xdc00; + + output.append(1, static_cast((high >> 8) & 0xff)); + output.append(1, static_cast(high & 0xff)); + output.append(1, static_cast((low >> 8) & 0xff)); + output.append(1, static_cast(low & 0xff)); + } else { + output.append(1, static_cast((c & 0xff00) >> 8)); + output.append(1, static_cast(c & 0xff)); } } @@ -55,106 +52,86 @@ namespace peelo::unicode::encoding::utf16be * Encodes given Unicode character sequence into a byte string using UTF-16BE * character encoding. Encoding errors are ignored. */ - inline std::string encode(const char32_t* input, const std::size_t length) + inline std::string + encode(const char32_t* input, const std::size_t length) { - std::string output; - - for (std::u32string::size_type i = 0; i < length; ++i) - { - const auto& c = input[i]; - - if (!ctype::isvalid(c)) - { - continue; - } - internal::encode_codepoint(c, output); - } - - return output; + return utils::encode(input, length, encode_codepoint); } /** * Encodes given Unicode string into a byte string using UTF-16BE character * encoding. Encoding errors are ignored. */ - inline std::string encode(const std::u32string& input) + inline + std::string encode(const std::u32string& input) { return encode(input.c_str(), input.length()); } - namespace internal + inline bool + decode_advance( + const char* input, + std::string::size_type& i, + const std::string::size_type length, + char32_t& output + ) { - inline bool decode_advance( - const char* input, - std::string::size_type& i, - const std::string::size_type length, - char32_t& output - ) - { - unsigned char p0; - unsigned char p1; - unsigned char p2; - unsigned char p3; + unsigned char p0; + unsigned char p1; + unsigned char p2; + unsigned char p3; - if ((input[i] & 0xfc) == 0xd8) + if ((input[i] & 0xfc) == 0xd8) + { + if (i + 3 >= length) { - if (i + 3 >= length) - { - return false; - } - p0 = static_cast(input[i]); - p1 = static_cast(input[i + 1]); - p2 = static_cast(input[i + 2]); - p3 = static_cast(input[i + 3]); - output = static_cast( - ((((p0 << 8) + p1) & 0x03ff) << 10) - + (((p2 << 8) + p3) & 0x03ff) + 0x10000 - ); - i += 4; - } else { - if (i + 1 >= length) - { - return false; - } - p0 = static_cast(input[i]); - p1 = static_cast(input[i + 1]); - output = static_cast(p0 * 256 + p1); - i += 2; + return false; } - - return true; + p0 = static_cast(input[i]); + p1 = static_cast(input[i + 1]); + p2 = static_cast(input[i + 2]); + p3 = static_cast(input[i + 3]); + output = static_cast( + ((((p0 << 8) + p1) & 0x03ff) << 10) + + (((p2 << 8) + p3) & 0x03ff) + 0x10000 + ); + i += 4; + } else { + if (i + 1 >= length) + { + return false; + } + p0 = static_cast(input[i]); + p1 = static_cast(input[i + 1]); + output = static_cast(p0 * 256 + p1); + i += 2; } + + return true; } /** * Decodes given byte sequence into Unicode string using UTF-16BE character * encoding. Decoding errors are ignored. */ - inline std::u32string decode(const char* input, const std::size_t length) + inline std::u32string + decode(const char* input, const std::size_t length) { - std::u32string output; - - for (std::string::size_type i = 0; i < length;) - { - char32_t c; - - if (!internal::decode_advance(input, i, length, c)) - { - break; - } - output.append(1, c); - } - - return output; + return utils::decode(input, length, decode_advance); } /** * Decodes given byte string into Unicode string using UTF-16BE character * encoding. Decoding errors are ignored. */ - inline std::u32string decode(const std::string& input) + inline std::u32string + decode(const std::string& input) { - return decode(input.c_str(), input.length()); + return utils::decode( + input.c_str(), + input.length(), + decode_advance + ); } /** @@ -163,24 +140,19 @@ namespace peelo::unicode::encoding::utf16be * encoding error is encountered, this function will return false, otherwise * it returns true. */ - inline bool encode_validate( + inline bool + encode_validate( const char32_t* input, const std::size_t length, std::string& output ) { - for (std::size_t i = 0; i < length; ++i) - { - const auto& c = input[i]; - - if (!ctype::isvalid(c)) - { - return false; - } - internal::encode_codepoint(c, output); - } - - return true; + return utils::encode_validate( + input, + length, + output, + encode_codepoint + ); } /** @@ -194,7 +166,12 @@ namespace peelo::unicode::encoding::utf16be std::string& output ) { - return encode_validate(input.c_str(), input.length(), output); + return utils::encode_validate( + input.c_str(), + input.length(), + output, + encode_codepoint + ); } /** @@ -203,24 +180,19 @@ namespace peelo::unicode::encoding::utf16be * error is encountered, this function will return false, otherwise it * returns true. */ - inline bool decode_validate( + inline bool + decode_validate( const char* input, const std::size_t length, std::u32string& output ) { - for (std::size_t i = 0; i < length;) - { - char32_t c; - - if (!internal::decode_advance(input, i, length, c)) - { - return false; - } - output.append(1, c); - } - - return true; + return utils::decode_validate( + input, + length, + output, + decode_advance + ); } /** @@ -229,11 +201,17 @@ namespace peelo::unicode::encoding::utf16be * error is encountered, this function will return false, otherwise it * returns true. */ - inline bool decode_validate( + inline bool + decode_validate( const std::string& input, std::u32string& output ) { - return decode_validate(input.c_str(), input.length(), output); + return utils::decode_validate( + input.c_str(), + input.length(), + output, + decode_advance + ); } } diff --git a/include/peelo/unicode/encoding/utf16le.hpp b/include/peelo/unicode/encoding/utf16le.hpp index 427bf9e..4d44e1b 100644 --- a/include/peelo/unicode/encoding/utf16le.hpp +++ b/include/peelo/unicode/encoding/utf16le.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,30 +24,27 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#pragma once -#include +#include namespace peelo::unicode::encoding::utf16le { - namespace internal + inline void + encode_codepoint(char32_t c, std::string& output) { - inline void encode_codepoint(char32_t c, std::string& output) + if (c > 0xffff) { - if (c > 0xffff) - { - const auto high = (c >> 10) + 0xd7c0; - const auto low = (c & 0x3ff) + 0xdc00; - - output.append(1, static_cast(high & 0xff)); - output.append(1, static_cast((high >> 8) & 0xff)); - output.append(1, static_cast(low & 0xff)); - output.append(1, static_cast((low >> 8) & 0xff)); - } else { - output.append(1, static_cast(c & 0xff)); - output.append(1, static_cast((c & 0xff00) >> 8)); - } + const auto high = (c >> 10) + 0xd7c0; + const auto low = (c & 0x3ff) + 0xdc00; + + output.append(1, static_cast(high & 0xff)); + output.append(1, static_cast((high >> 8) & 0xff)); + output.append(1, static_cast(low & 0xff)); + output.append(1, static_cast((low >> 8) & 0xff)); + } else { + output.append(1, static_cast(c & 0xff)); + output.append(1, static_cast((c & 0xff00) >> 8)); } } @@ -55,106 +52,86 @@ namespace peelo::unicode::encoding::utf16le * Encodes given Unicode character sequence into a byte string using UTF-16LE * character encoding. Encoding errors are ignored. */ - inline std::string encode(const char32_t* input, const std::size_t length) + inline std::string + encode(const char32_t* input, const std::size_t length) { - std::string output; - - for (std::u32string::size_type i = 0; i < length; ++i) - { - const auto& c = input[i]; - - if (!ctype::isvalid(c)) - { - continue; - } - internal::encode_codepoint(c, output); - } - - return output; + return utils::encode(input, length, encode_codepoint); } /** * Encodes given Unicode string into a byte string using UTF-16LE character * encoding. Encoding errors are ignored. */ - inline std::string encode(const std::u32string& input) + inline std::string + encode(const std::u32string& input) { return encode(input.c_str(), input.length()); } - namespace internal + inline bool + decode_advance( + const char* input, + std::string::size_type& i, + const std::string::size_type length, + char32_t& output + ) { - inline bool decode_advance( - const char* input, - std::string::size_type& i, - const std::string::size_type length, - char32_t& output - ) - { - unsigned char p0; - unsigned char p1; - unsigned char p2; - unsigned char p3; + unsigned char p0; + unsigned char p1; + unsigned char p2; + unsigned char p3; - if (i + 1 < length && (input[i + 1] & 0xfc) == 0xd8) + if (i + 1 < length && (input[i + 1] & 0xfc) == 0xd8) + { + if (i + 3 >= length) { - if (i + 3 >= length) - { - return false; - } - p0 = static_cast(input[i]); - p1 = static_cast(input[i + 1]); - p2 = static_cast(input[i + 2]); - p3 = static_cast(input[i + 3]); - output = static_cast( - ((((p1 << 8) + p0) & 0x03ff) << 10) - + (((p3 << 8) + p2) & 0x03ff) + 0x10000 - ); - i += 4; - } else { - if (i + 1 >= length) - { - return false; - } - p0 = static_cast(input[i]); - p1 = static_cast(input[i + 1]); - output = static_cast(p1 * 256 + p0); - i += 2; + return false; } - - return true; + p0 = static_cast(input[i]); + p1 = static_cast(input[i + 1]); + p2 = static_cast(input[i + 2]); + p3 = static_cast(input[i + 3]); + output = static_cast( + ((((p1 << 8) + p0) & 0x03ff) << 10) + + (((p3 << 8) + p2) & 0x03ff) + 0x10000 + ); + i += 4; + } else { + if (i + 1 >= length) + { + return false; + } + p0 = static_cast(input[i]); + p1 = static_cast(input[i + 1]); + output = static_cast(p1 * 256 + p0); + i += 2; } + + return true; } /** * Decodes given byte sequence into Unicode string using UTF-16LE character * encoding. Decoding errors are ignored. */ - inline std::u32string decode(const char* input, const std::size_t length) + inline std::u32string + decode(const char* input, const std::size_t length) { - std::u32string output; - - for (std::string::size_type i = 0; i < length;) - { - char32_t c; - - if (!internal::decode_advance(input, i, length, c)) - { - break; - } - output.append(1, c); - } - - return output; + return utils::decode(input, length, decode_advance); } /** * Decodes given byte string into Unicode string using UTF-16LE character * encoding. Decoding errors are ignored. */ - inline std::u32string decode(const std::string& input) + inline std::u32string + decode(const std::string& input) { - return decode(input.c_str(), input.length()); + return utils::decode( + input.c_str(), + input.length(), + decode_advance + ); } /** @@ -163,24 +140,19 @@ namespace peelo::unicode::encoding::utf16le * encoding error is encountered, this function will return false, otherwise * it returns true. */ - inline bool encode_validate( + inline bool + encode_validate( const char32_t* input, const std::size_t length, std::string& output ) { - for (std::size_t i = 0; i < length; ++i) - { - const auto& c = input[i]; - - if (!ctype::isvalid(c)) - { - return false; - } - internal::encode_codepoint(c, output); - } - - return true; + return utils::encode_validate( + input, + length, + output, + encode_codepoint + ); } /** @@ -189,12 +161,18 @@ namespace peelo::unicode::encoding::utf16le * is encountered, this function will return false, otherwise it returns * true. */ - inline bool encode_validate( + inline bool + encode_validate( const std::u32string& input, std::string& output ) { - return encode_validate(input.c_str(), input.length(), output); + return utils::encode_validate( + input.c_str(), + input.length(), + output, + encode_codepoint + ); } /** @@ -203,24 +181,19 @@ namespace peelo::unicode::encoding::utf16le * error is encountered, this function will return false, otherwise it * returns true. */ - inline bool decode_validate( + inline bool + decode_validate( const char* input, const std::size_t length, std::u32string& output ) { - for (std::size_t i = 0; i < length;) - { - char32_t c; - - if (!internal::decode_advance(input, i, length, c)) - { - return false; - } - output.append(1, c); - } - - return true; + return utils::decode_validate( + input, + length, + output, + decode_advance + ); } /** @@ -229,11 +202,17 @@ namespace peelo::unicode::encoding::utf16le * error is encountered, this function will return false, otherwise it * returns true. */ - inline bool decode_validate( + inline bool + decode_validate( const std::string& input, std::u32string& output ) { - return decode_validate(input.c_str(), input.length(), output); + return utils::decode_validate( + input.c_str(), + input.length(), + output, + decode_advance + ); } } diff --git a/include/peelo/unicode/encoding/utf32be.hpp b/include/peelo/unicode/encoding/utf32be.hpp index 507db2d..16ab354 100644 --- a/include/peelo/unicode/encoding/utf32be.hpp +++ b/include/peelo/unicode/encoding/utf32be.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,117 +24,90 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_ENCODING_UTF32BE_HPP_GUARD -#define PEELO_UNICODE_ENCODING_UTF32BE_HPP_GUARD +#pragma once -#include -#include - -#include +#include namespace peelo::unicode::encoding::utf32be { - namespace internal + inline void + encode_codepoint(char32_t c, std::string& output) { - inline void encode_codepoint(char32_t c, std::string& output) - { - output.append(1, static_cast((c & 0xff000000) >> 24)); - output.append(1, static_cast((c & 0xff0000) >> 16)); - output.append(1, static_cast((c & 0xff00) >> 8)); - output.append(1, static_cast(c & 0xff)); - } + output.append(1, static_cast((c & 0xff000000) >> 24)); + output.append(1, static_cast((c & 0xff0000) >> 16)); + output.append(1, static_cast((c & 0xff00) >> 8)); + output.append(1, static_cast(c & 0xff)); } /** * Encodes given Unicode character sequence into a byte string using UTF-32BE * character encoding. Encoding errors are ignored. */ - inline std::string encode(const char32_t* input, const std::size_t length) + inline std::string + encode(const char32_t* input, const std::size_t length) { - std::string output; - - output.reserve(length * 4); - for (std::u32string::size_type i = 0; i < length; ++i) - { - const auto& c = input[i]; - - if (!ctype::isvalid(c)) - { - continue; - } - internal::encode_codepoint(c, output); - } - - return output; + return utils::encode(input, length, encode_codepoint); } /** * Encodes given Unicode string into a byte string using UTF-32BE character * encoding. Encoding errors are ignored. */ - inline std::string encode(const std::u32string& input) + inline std::string + encode(const std::u32string& input) { return encode(input.c_str(), input.length()); } - namespace internal + inline bool + decode_advance( + const char* input, + std::string::size_type& i, + const std::string::size_type length, + char32_t& output + ) { - inline bool decode_advance( - const char* input, - std::string::size_type& i, - const std::string::size_type length, - char32_t& output - ) - { - unsigned char p0; - unsigned char p1; - unsigned char p2; - unsigned char p3; - - if (i + 3 >= length) - { - return false; - } - p0 = static_cast(input[i]); - p1 = static_cast(input[i + 1]); - p2 = static_cast(input[i + 2]); - p3 = static_cast(input[i + 3]); - output = static_cast(((p0 * 256 + p1) * 256 + p2) * 256 + p3); - i += 4; + unsigned char p0; + unsigned char p1; + unsigned char p2; + unsigned char p3; - return true; + if (i + 3 >= length) + { + return false; } + p0 = static_cast(input[i]); + p1 = static_cast(input[i + 1]); + p2 = static_cast(input[i + 2]); + p3 = static_cast(input[i + 3]); + output = static_cast(((p0 * 256 + p1) * 256 + p2) * 256 + p3); + i += 4; + + return true; } /** * Decodes given byte sequence into Unicode string using UTF-32BE character * encoding. Decoding errors are ignored. */ - inline std::u32string decode(const char* input, const std::size_t length) + inline std::u32string + decode(const char* input, const std::size_t length) { - std::u32string output; - - for (std::string::size_type i = 0; i < length;) - { - char32_t c; - - if (!internal::decode_advance(input, i, length, c)) - { - break; - } - output.append(1, c); - } - - return output; + return utils::decode(input, length, decode_advance); } /** * Decodes given byte string into Unicode string using UTF-32BE character * encoding. Decoding errors are ignored. */ - inline std::u32string decode(const std::string& input) + inline std::u32string + decode(const std::string& input) { - return decode(input.c_str(), input.length()); + return utils::decode( + input.c_str(), + input.length(), + decode_advance + ); } /** @@ -143,24 +116,19 @@ namespace peelo::unicode::encoding::utf32be * encoding error is encountered, this function will return false, otherwise * it returns true. */ - inline bool encode_validate( + inline bool + encode_validate( const char32_t* input, const std::size_t length, std::string& output ) { - for (std::size_t i = 0; i < length; ++i) - { - const auto& c = input[i]; - - if (!ctype::isvalid(c)) - { - return false; - } - internal::encode_codepoint(c, output); - } - - return true; + return utils::encode_validate( + input, + length, + output, + encode_codepoint + ); } /** @@ -169,12 +137,18 @@ namespace peelo::unicode::encoding::utf32be * is encountered, this function will return false, otherwise it returns * true. */ - inline bool encode_validate( + inline bool + encode_validate( const std::u32string& input, std::string& output ) { - return encode_validate(input.c_str(), input.length(), output); + return utils::encode_validate( + input.c_str(), + input.length(), + output, + encode_codepoint + ); } /** @@ -183,24 +157,19 @@ namespace peelo::unicode::encoding::utf32be * error is encountered, this function will return false, otherwise it * returns true. */ - inline bool decode_validate( + inline bool + decode_validate( const char* input, const std::size_t length, std::u32string& output ) { - for (std::size_t i = 0; i < length;) - { - char32_t c; - - if (!internal::decode_advance(input, i, length, c)) - { - return false; - } - output.append(1, c); - } - - return true; + return utils::decode_validate( + input, + length, + output, + decode_advance + ); } /** @@ -209,13 +178,17 @@ namespace peelo::unicode::encoding::utf32be * error is encountered, this function will return false, otherwise it * returns true. */ - inline bool decode_validate( + inline bool + decode_validate( const std::string& input, std::u32string& output ) { - return decode_validate(input.c_str(), input.length(), output); + return utils::decode_validate( + input.c_str(), + input.length(), + output, + decode_advance + ); } } - -#endif /* !PEELO_UNICODE_ENCODING_UTF32BE_HPP_GUARD */ diff --git a/include/peelo/unicode/encoding/utf32le.hpp b/include/peelo/unicode/encoding/utf32le.hpp index 6b5ed1a..a24e50e 100644 --- a/include/peelo/unicode/encoding/utf32le.hpp +++ b/include/peelo/unicode/encoding/utf32le.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,117 +24,90 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_ENCODING_UTF32BE_HPP_GUARD -#define PEELO_UNICODE_ENCODING_UTF32BE_HPP_GUARD +#pragma once -#include -#include - -#include +#include namespace peelo::unicode::encoding::utf32le { - namespace internal + inline void + encode_codepoint(char32_t c, std::string& output) { - inline void encode_codepoint(char32_t c, std::string& output) - { - output.append(1, static_cast(c & 0xff)); - output.append(1, static_cast((c & 0xff00) >> 8)); - output.append(1, static_cast((c & 0xff0000) >> 16)); - output.append(1, static_cast((c & 0xff000000) >> 24)); - } + output.append(1, static_cast(c & 0xff)); + output.append(1, static_cast((c & 0xff00) >> 8)); + output.append(1, static_cast((c & 0xff0000) >> 16)); + output.append(1, static_cast((c & 0xff000000) >> 24)); } /** * Encodes given Unicode character sequence into a byte string using UTF-32BE * character encoding. Encoding errors are ignored. */ - inline std::string encode(const char32_t* input, const std::size_t length) + inline std::string + encode(const char32_t* input, const std::size_t length) { - std::string output; - - output.reserve(length * 4); - for (std::u32string::size_type i = 0; i < length; ++i) - { - const auto& c = input[i]; - - if (!ctype::isvalid(c)) - { - continue; - } - internal::encode_codepoint(c, output); - } - - return output; + return utils::encode(input, length, encode_codepoint); } /** * Encodes given Unicode string into a byte string using UTF-32BE character * encoding. Encoding errors are ignored. */ - inline std::string encode(const std::u32string& input) + inline std::string + encode(const std::u32string& input) { return encode(input.c_str(), input.length()); } - namespace internal + inline bool + decode_advance( + const char* input, + std::string::size_type& i, + const std::string::size_type length, + char32_t& output + ) { - inline bool decode_advance( - const char* input, - std::string::size_type& i, - const std::string::size_type length, - char32_t& output - ) - { - unsigned char p0; - unsigned char p1; - unsigned char p2; - unsigned char p3; - - if (i + 3 >= length) - { - return false; - } - p0 = static_cast(input[i]); - p1 = static_cast(input[i + 1]); - p2 = static_cast(input[i + 2]); - p3 = static_cast(input[i + 3]); - output = static_cast(((p3 * 256 + p2) * 256 + p1) * 256 + p0); - i += 4; + unsigned char p0; + unsigned char p1; + unsigned char p2; + unsigned char p3; - return true; + if (i + 3 >= length) + { + return false; } + p0 = static_cast(input[i]); + p1 = static_cast(input[i + 1]); + p2 = static_cast(input[i + 2]); + p3 = static_cast(input[i + 3]); + output = static_cast(((p3 * 256 + p2) * 256 + p1) * 256 + p0); + i += 4; + + return true; } /** * Decodes given byte sequence into Unicode string using UTF-32BE character * encoding. Decoding errors are ignored. */ - inline std::u32string decode(const char* input, const std::size_t length) + inline std::u32string + decode(const char* input, const std::size_t length) { - std::u32string output; - - for (std::string::size_type i = 0; i < length;) - { - char32_t c; - - if (!internal::decode_advance(input, i, length, c)) - { - break; - } - output.append(1, c); - } - - return output; + return utils::decode(input, length, decode_advance); } /** * Decodes given byte string into Unicode string using UTF-32BE character * encoding. Decoding errors are ignored. */ - inline std::u32string decode(const std::string& input) + inline std::u32string + decode(const std::string& input) { - return decode(input.c_str(), input.length()); + return utils::decode( + input.c_str(), + input.length(), + decode_advance + ); } /** @@ -143,24 +116,19 @@ namespace peelo::unicode::encoding::utf32le * encoding error is encountered, this function will return false, otherwise * it returns true. */ - inline bool encode_validate( + inline bool + encode_validate( const char32_t* input, const std::size_t length, std::string& output ) { - for (std::size_t i = 0; i < length; ++i) - { - const auto& c = input[i]; - - if (!ctype::isvalid(c)) - { - return false; - } - internal::encode_codepoint(c, output); - } - - return true; + return utils::encode_validate( + input, + length, + output, + encode_codepoint + ); } /** @@ -169,12 +137,18 @@ namespace peelo::unicode::encoding::utf32le * is encountered, this function will return false, otherwise it returns * true. */ - inline bool encode_validate( + inline bool + encode_validate( const std::u32string& input, std::string& output ) { - return encode_validate(input.c_str(), input.length(), output); + return utils::encode_validate( + input.c_str(), + input.length(), + output, + encode_codepoint + ); } /** @@ -183,24 +157,19 @@ namespace peelo::unicode::encoding::utf32le * error is encountered, this function will return false, otherwise it * returns true. */ - inline bool decode_validate( + inline bool + decode_validate( const char* input, const std::size_t length, std::u32string& output ) { - for (std::size_t i = 0; i < length;) - { - char32_t c; - - if (!internal::decode_advance(input, i, length, c)) - { - return false; - } - output.append(1, c); - } - - return true; + return utils::decode_validate( + input, + length, + output, + decode_advance + ); } /** @@ -209,13 +178,17 @@ namespace peelo::unicode::encoding::utf32le * error is encountered, this function will return false, otherwise it * returns true. */ - inline bool decode_validate( + inline bool + decode_validate( const std::string& input, std::u32string& output ) { - return decode_validate(input.c_str(), input.length(), output); + return utils::decode_validate( + input.c_str(), + input.length(), + output, + decode_advance + ); } } - -#endif /* !PEELO_UNICODE_ENCODING_UTF32BE_HPP_GUARD */ diff --git a/include/peelo/unicode/encoding/utf8.hpp b/include/peelo/unicode/encoding/utf8.hpp index 3379cbf..73cf7f7 100644 --- a/include/peelo/unicode/encoding/utf8.hpp +++ b/include/peelo/unicode/encoding/utf8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, peelo.net + * Copyright (c) 2018-2024, peelo.net * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,13 +24,9 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef PEELO_UNICODE_ENCODING_UTF8_HPP_GUARD -#define PEELO_UNICODE_ENCODING_UTF8_HPP_GUARD +#pragma once -#include -#include - -#include +#include namespace peelo::unicode::encoding::utf8 { @@ -40,7 +36,8 @@ namespace peelo::unicode::encoding::utf8 * beginning of sequence is invalid according to the UTF-8 * specification), 0 will be returned instead. */ - inline std::size_t sequence_length(unsigned char byte) + inline std::size_t + sequence_length(unsigned char byte) { if ((byte & 0x80) == 0x00) { @@ -71,7 +68,8 @@ namespace peelo::unicode::encoding::utf8 * given Unicode code point with UTF-8 character encoding. If the given * code point cannot be encoded, 0 will be returned instead. */ - inline std::size_t codepoint_length(char32_t c) + inline std::size_t + codepoint_length(char32_t c) { if (c < 0x007f) { @@ -93,30 +91,28 @@ namespace peelo::unicode::encoding::utf8 } } - namespace internal + inline void + encode_codepoint(char32_t c, std::string& output) { - inline void encode_codepoint(char32_t c, std::string& output) + if (c <= 0x7f) { - if (c <= 0x7f) - { - output.append(1, static_cast(c)); - } - else if (c <= 0x07ff) - { - output.append(1, static_cast(0xc0 | ((c & 0x7c0) >> 6))); - output.append(1, static_cast(0x80 | (c & 0x3f))); - } - else if (c <= 0xffff) - { - output.append(1, static_cast(0xe0 | ((c & 0xf000)) >> 12)); - output.append(1, static_cast(0x80 | ((c & 0xfc0)) >> 6)); - output.append(1, static_cast(0x80 | (c & 0x3f))); - } else { - output.append(1, static_cast(0xf0 | ((c & 0x1c0000) >> 18))); - output.append(1, static_cast(0x80 | ((c & 0x3f000) >> 12))); - output.append(1, static_cast(0x80 | ((c & 0xfc0) >> 6))); - output.append(1, static_cast(0x80 | (c & 0x3f))); - } + output.append(1, static_cast(c)); + } + else if (c <= 0x07ff) + { + output.append(1, static_cast(0xc0 | ((c & 0x7c0) >> 6))); + output.append(1, static_cast(0x80 | (c & 0x3f))); + } + else if (c <= 0xffff) + { + output.append(1, static_cast(0xe0 | ((c & 0xf000)) >> 12)); + output.append(1, static_cast(0x80 | ((c & 0xfc0)) >> 6)); + output.append(1, static_cast(0x80 | (c & 0x3f))); + } else { + output.append(1, static_cast(0xf0 | ((c & 0x1c0000) >> 18))); + output.append(1, static_cast(0x80 | ((c & 0x3f000) >> 12))); + output.append(1, static_cast(0x80 | ((c & 0xfc0) >> 6))); + output.append(1, static_cast(0x80 | (c & 0x3f))); } } @@ -124,115 +120,99 @@ namespace peelo::unicode::encoding::utf8 * Encodes given Unicode character sequence into a byte string using UTF-8 * character encoding. Encoding errors are ignored. */ - inline std::string encode(const char32_t* input, const std::size_t length) + inline std::string + encode(const char32_t* input, std::size_t length) { - std::string output; - - for (std::size_t i = 0; i < length; ++i) - { - const auto& c = input[i]; - - if (!ctype::isvalid(c)) - { - continue; - } - internal::encode_codepoint(c, output); - } - - return output; + return utils::encode(input, length, encode_codepoint); } /** * Encodes given Unicode string into a byte string using UTF-8 character * encoding. Encoding errors are ignored. */ - inline std::string encode(const std::u32string& input) + inline std::string + encode(const std::u32string& input) { - return encode(input.c_str(), input.length()); + return utils::encode( + input.c_str(), + input.length(), + encode_codepoint + ); } - namespace internal + inline bool + decode_advance( + const char* input, + std::size_t& i, + const std::size_t length, + char32_t& result + ) { - inline bool decode_advance( - const char* input, - std::size_t& i, - const std::size_t length, - char32_t& result - ) - { - const auto seq_length = sequence_length(input[i]); + const auto seq_length = sequence_length(input[i]); - if (!seq_length || i + (seq_length - 1) >= length) - { - return false; - } + if (!seq_length || i + (seq_length - 1) >= length) + { + return false; + } - switch (seq_length) - { - case 1: - result = static_cast(input[i]); - break; + switch (seq_length) + { + case 1: + result = static_cast(input[i]); + break; - case 2: - result = static_cast(input[i] & 0x1f); - break; + case 2: + result = static_cast(input[i] & 0x1f); + break; - case 3: - result = static_cast(input[i] & 0x0f); - break; + case 3: + result = static_cast(input[i] & 0x0f); + break; - case 4: - result = static_cast(input[i] & 0x07); - break; + case 4: + result = static_cast(input[i] & 0x07); + break; - default: - return false; - } + default: + return false; + } - for (std::size_t j = 1; j < seq_length; ++j) + for (std::size_t j = 1; j < seq_length; ++j) + { + if ((input[i + j] & 0xc0) != 0x80) { - if ((input[i + j] & 0xc0) != 0x80) - { - return false; - } - result = (result << 6) | (input[i + j] & 0x3f); + return false; } + result = (result << 6) | (input[i + j] & 0x3f); + } - i += seq_length; + i += seq_length; - return true; - } + return true; } /** * Decodes given byte sequence into Unicode string using UTF-8 character * encoding. Decoding errors are ignored. */ - inline std::u32string decode(const char* input, const std::size_t length) + inline std::u32string + decode(const char* input, const std::size_t length) { - std::u32string output; - - for (std::size_t i = 0; i < length;) - { - char32_t c; - - if (!internal::decode_advance(input, i, length, c)) - { - break; - } - output.append(1, c); - } - - return output; + return utils::decode(input, length, decode_advance); } /** * Decodes given byte string into Unicode string using UTF-8 character * encoding. Decoding errors are ignored. */ - inline std::u32string decode(const std::string& input) + inline std::u32string + decode(const std::string& input) { - return decode(input.c_str(), input.length()); + return utils::decode( + input.c_str(), + input.length(), + decode_advance + ); } /** @@ -241,24 +221,19 @@ namespace peelo::unicode::encoding::utf8 * encoding error is encountered, this function will return false, otherwise * it returns true. */ - inline bool encode_validate( + inline bool + encode_validate( const char32_t* input, const std::size_t length, std::string& output ) { - for (std::size_t i = 0; i < length; ++i) - { - const auto& c = input[i]; - - if (!ctype::isvalid(c)) - { - return false; - } - internal::encode_codepoint(c, output); - } - - return true; + return utils::encode_validate( + input, + length, + output, + encode_codepoint + ); } /** @@ -267,9 +242,15 @@ namespace peelo::unicode::encoding::utf8 * error is encountered, this function will return false, otherwise it * returns true. */ - inline bool encode_validate(const std::u32string& input, std::string& output) + inline bool + encode_validate(const std::u32string& input, std::string& output) { - return encode_validate(input.c_str(), input.length(), output); + return utils::encode_validate( + input.c_str(), + input.length(), + output, + encode_codepoint + ); } /** @@ -278,24 +259,19 @@ namespace peelo::unicode::encoding::utf8 * error is encountered, this function will return false, otherwise it * returns true. */ - inline bool decode_validate( + inline bool + decode_validate( const char* input, const std::size_t length, std::u32string& output ) { - for (std::size_t i = 0; i < length;) - { - char32_t c; - - if (!internal::decode_advance(input, i, length, c)) - { - return false; - } - output.append(1, c); - } - - return true; + return utils::decode_validate( + input, + length, + output, + decode_advance + ); } /** @@ -304,10 +280,14 @@ namespace peelo::unicode::encoding::utf8 * error is encountered, this function will return false, otherwise it * returns true. */ - inline bool decode_validate(const std::string& input, std::u32string& output) + inline bool + decode_validate(const std::string& input, std::u32string& output) { - return decode_validate(input.c_str(), input.length(), output); + return utils::decode_validate( + input.c_str(), + input.length(), + output, + decode_advance + ); } } - -#endif /* !PEELO_UNICODE_ENCODING_UTF8_HPP_GUARD */ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ad04bbc..3d89621 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -17,6 +17,20 @@ FOREACH(TEST_FILENAME ${TEST_SOURCES}) cxx_std_17 ) + IF(MSVC) + TARGET_COMPILE_OPTIONS( + ${TEST_NAME} + PRIVATE + /W4 /WX + ) + ELSE() + TARGET_COMPILE_OPTIONS( + ${TEST_NAME} + PRIVATE + -Wall -Werror + ) + ENDIF() + TARGET_LINK_LIBRARIES( ${TEST_NAME} PeeloUnicode diff --git a/test/test_ctype.cpp b/test/test_ctype.cpp index ccb2eed..427e507 100644 --- a/test/test_ctype.cpp +++ b/test/test_ctype.cpp @@ -1,35 +1,9 @@ -/* - * Copyright (c) 2018-2020, peelo.net - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ #include -#include #include -static void test_isvalid() +static void +test_isvalid() { using peelo::unicode::ctype::isvalid; @@ -41,7 +15,8 @@ static void test_isvalid() assert(!isvalid(0xd801)); } -static void test_isalpha() +static void +test_isalpha() { using peelo::unicode::ctype::isalpha; @@ -55,7 +30,8 @@ static void test_isalpha() assert(!isalpha(U'-')); } -static void test_isalnum() +static void +test_isalnum() { using peelo::unicode::ctype::isalnum; @@ -70,7 +46,8 @@ static void test_isalnum() assert(!isalnum(U'€')); } -static void test_isdigit() +static void +test_isdigit() { using peelo::unicode::ctype::isdigit; @@ -92,7 +69,8 @@ static void test_isdigit() assert(isdigit(U'᠐')); } -static void test_isxdigit() +static void +test_isxdigit() { using peelo::unicode::ctype::isxdigit; @@ -114,7 +92,8 @@ static void test_isxdigit() assert(!isxdigit(U'€')); } -static void test_isblank() +static void +test_isblank() { using peelo::unicode::ctype::isblank; @@ -127,7 +106,8 @@ static void test_isblank() assert(!isblank(U'€')); } -static void test_iscntrl() +static void +test_iscntrl() { using peelo::unicode::ctype::iscntrl; @@ -138,7 +118,8 @@ static void test_iscntrl() assert(!iscntrl(U' ')); } -static void test_isgraph() +static void +test_isgraph() { using peelo::unicode::ctype::isgraph; @@ -149,7 +130,8 @@ static void test_isgraph() assert(!isgraph(U'\u093b')); } -static void test_isprint() +static void +test_isprint() { using peelo::unicode::ctype::isprint; @@ -160,7 +142,8 @@ static void test_isprint() assert(!isprint(U'\u074c')); } -static void test_ispunct() +static void +test_ispunct() { using peelo::unicode::ctype::ispunct; @@ -171,7 +154,8 @@ static void test_ispunct() assert(!ispunct(U' ')); } -static void test_isspace() +static void +test_isspace() { using peelo::unicode::ctype::isspace; @@ -183,7 +167,8 @@ static void test_isspace() assert(!isspace(U'-')); } -static void test_islower() +static void +test_islower() { using peelo::unicode::ctype::islower; @@ -195,7 +180,8 @@ static void test_islower() assert(!islower(U'5')); } -static void test_isupper() +static void +test_isupper() { using peelo::unicode::ctype::isupper; @@ -207,7 +193,8 @@ static void test_isupper() assert(!isupper(U'5')); } -static void test_tolower() +static void +test_tolower() { using peelo::unicode::ctype::tolower; @@ -216,7 +203,8 @@ static void test_tolower() assert(tolower(U'5') == U'5'); } -static void test_toupper() +static void +test_toupper() { using peelo::unicode::ctype::toupper; @@ -225,7 +213,8 @@ static void test_toupper() assert(toupper(U'5') == U'5'); } -int main() +int +main() { test_isvalid(); test_isalpha(); @@ -242,6 +231,4 @@ int main() test_isupper(); test_tolower(); test_toupper(); - - return EXIT_SUCCESS; } diff --git a/test/test_utf16be.cpp b/test/test_utf16be.cpp index 0ee1ba5..879c448 100644 --- a/test/test_utf16be.cpp +++ b/test/test_utf16be.cpp @@ -1,35 +1,9 @@ -/* - * Copyright (c) 2018-2020, peelo.net - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ #include -#include #include -static void test_encode() +static void +test_encode() { using peelo::unicode::encoding::utf16be::encode; @@ -39,7 +13,8 @@ static void test_encode() assert(!encode(U"𐍈").compare(std::string("\xd8\x00\xdfH", 4))); } -static void test_decode() +static void +test_decode() { using peelo::unicode::encoding::utf16be::decode; @@ -49,10 +24,9 @@ static void test_decode() assert(!decode(std::string("\xd8\x00\xdfH", 4)).compare(U"𐍈")); } -int main() +int +main() { test_encode(); test_decode(); - - return EXIT_SUCCESS; } diff --git a/test/test_utf16le.cpp b/test/test_utf16le.cpp index 146d480..eec1d56 100644 --- a/test/test_utf16le.cpp +++ b/test/test_utf16le.cpp @@ -1,35 +1,9 @@ -/* - * Copyright (c) 2018-2020, peelo.net - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ #include -#include #include -static void test_encode() +static void +test_encode() { using peelo::unicode::encoding::utf16le::encode; @@ -39,7 +13,8 @@ static void test_encode() assert(!encode(U"𐍈").compare(std::string("\x00\xd8H\xdf", 4))); } -static void test_decode() +static void +test_decode() { using peelo::unicode::encoding::utf16le::decode; @@ -49,10 +24,9 @@ static void test_decode() assert(!decode(std::string("\x00\xd8H\xdf", 4)).compare(U"𐍈")); } -int main() +int +main() { test_encode(); test_decode(); - - return EXIT_SUCCESS; } diff --git a/test/test_utf32be.cpp b/test/test_utf32be.cpp index 7daee50..59195e6 100644 --- a/test/test_utf32be.cpp +++ b/test/test_utf32be.cpp @@ -1,35 +1,9 @@ -/* - * Copyright (c) 2018-2020, peelo.net - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ #include -#include #include -static void test_encode() +static void +test_encode() { using peelo::unicode::encoding::utf32be::encode; @@ -39,7 +13,8 @@ static void test_encode() assert(!encode(U"𐍈").compare(std::string("\x00\x01\x03H", 4))); } -static void test_decode() +static void +test_decode() { using peelo::unicode::encoding::utf32be::decode; @@ -49,10 +24,9 @@ static void test_decode() assert(!decode(std::string("\x00\x01\x03H", 4)).compare(U"𐍈")); } -int main() +int +main() { test_encode(); test_decode(); - - return EXIT_SUCCESS; } diff --git a/test/test_utf32le.cpp b/test/test_utf32le.cpp index b98330d..5984212 100644 --- a/test/test_utf32le.cpp +++ b/test/test_utf32le.cpp @@ -1,35 +1,9 @@ -/* - * Copyright (c) 2018-2020, peelo.net - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ #include -#include #include -static void test_encode() +static void +test_encode() { using peelo::unicode::encoding::utf32le::encode; @@ -39,20 +13,20 @@ static void test_encode() assert(!encode(U"𐍈").compare(std::string("H\x03\x01\x00", 4))); } -static void test_decode() +static void +test_decode() { using peelo::unicode::encoding::utf32le::decode; - assert(!decode(std::string("$\x00\x00\x00", 4)).compare(U"$")); - assert(!decode(std::string("\xa2\x00\x00\x00", 4)).compare(U"¢")); - assert(!decode(std::string("\xac \x00\x00", 4)).compare(U"€")); - assert(!decode(std::string("H\x03\x01\x00", 4)).compare(U"𐍈")); + assert(!decode("$\x00\x00\x00", 4).compare(U"$")); + assert(!decode("\xa2\x00\x00\x00", 4).compare(U"¢")); + assert(!decode("\xac \x00\x00", 4).compare(U"€")); + assert(!decode("H\x03\x01\x00", 4).compare(U"𐍈")); } -int main() +int +main() { test_encode(); test_decode(); - - return EXIT_SUCCESS; } diff --git a/test/test_utf8.cpp b/test/test_utf8.cpp index 5026da6..f1e7052 100644 --- a/test/test_utf8.cpp +++ b/test/test_utf8.cpp @@ -1,35 +1,9 @@ -/* - * Copyright (c) 2018-2020, peelo.net - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ #include -#include #include -static void test_encode() +static void +test_encode() { using peelo::unicode::encoding::utf8::encode; @@ -39,7 +13,8 @@ static void test_encode() assert(!encode(U"𐍈").compare("\xf0\x90\x8d\x88")); } -static void test_decode() +static void +test_decode() { using peelo::unicode::encoding::utf8::decode; @@ -49,7 +24,8 @@ static void test_decode() assert(!decode("\xf0\x90\x8d\x88").compare(U"𐍈")); } -static void test_sequence_length() +static void +test_sequence_length() { using peelo::unicode::encoding::utf8::sequence_length; @@ -59,7 +35,8 @@ static void test_sequence_length() assert(sequence_length(240) == 4); } -static void test_codepoint_length() +static void +test_codepoint_length() { using peelo::unicode::encoding::utf8::codepoint_length; @@ -69,12 +46,11 @@ static void test_codepoint_length() assert(codepoint_length(0x10348) == 4); } -int main() +int +main() { test_encode(); test_decode(); test_sequence_length(); test_codepoint_length(); - - return EXIT_SUCCESS; }