From 23238fd05a79608f563db978a67c562cd4b21721 Mon Sep 17 00:00:00 2001 From: Rauli Laine Date: Wed, 16 Oct 2024 09:03:02 +0300 Subject: [PATCH] Move BOM stuff to it's own namespace' --- CMakeLists.txt | 2 +- README.md | 4 ++-- include/peelo/unicode/bom.hpp | 38 ++++++++++++++++---------------- test/test_bom.cpp | 41 +++++++++++++++++------------------ 4 files changed, 42 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4b2149a..c9ef967 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.6) PROJECT( PeeloUnicode - VERSION 1.1.0 + VERSION 2.0.0 DESCRIPTION "Header only C++ Unicode utilities." HOMEPAGE_URL "https://github.com/peelonet/peelo-unicode" LANGUAGES CXX diff --git a/README.md b/README.md index 2ae5d60..8d6bcce 100644 --- a/README.md +++ b/README.md @@ -159,9 +159,9 @@ main() length = f.gcount(); f.close(); - if (const auto bom = peelo::unicode::detect_bom(buffer, length)) + if (const auto bom = peelo::unicode::bom::detect(buffer, length)) { - if (*bom == peelo::unicode::bom::utf16_be) + if (*bom == peelo::unicode::bom::type::utf16_be) { std::cout << "File has UTF-16BE BOM." << std::endl; } else { diff --git a/include/peelo/unicode/bom.hpp b/include/peelo/unicode/bom.hpp index 327f703..717f54b 100644 --- a/include/peelo/unicode/bom.hpp +++ b/include/peelo/unicode/bom.hpp @@ -31,12 +31,12 @@ #include #include -namespace peelo::unicode +namespace peelo::unicode::bom { /** * Enumeration of different recognized BOM types. */ - enum class bom + enum class type { utf8, utf16_be, @@ -59,14 +59,14 @@ namespace peelo::unicode * @return Byte order mark detected in the given byte string, or null option * if the given byte string does not contain byte order mark. */ - inline std::optional - detect_bom(const char* input, std::size_t length) + inline std::optional + detect(const char* input, std::size_t length) { struct bom_info { const char* bytes; std::size_t length; - bom type; + enum type type; }; static constexpr std::size_t bom_array_size = 11; static const std::array bom_array = @@ -74,57 +74,57 @@ namespace peelo::unicode { "\xef\xbb\xbf", 3, - bom::utf8, + type::utf8, }, { "\0\0\xfe\xff", 4, - bom::utf32_be, + type::utf32_be, }, { "\xff\xfe\0\0", 4, - bom::utf32_le, + type::utf32_le, }, { "\xfe\xff", 2, - bom::utf16_be, + type::utf16_be, }, { "\xff\xfe", 2, - bom::utf16_le, + type::utf16_le, }, { "\x2b\x2f\x76", 3, - bom::utf7, + type::utf7, }, { "\xf7\x64\x4c", 3, - bom::utf1, + type::utf1, }, { "\xdd\x73\x66\x73", 4, - bom::utf_ebcdic + type::utf_ebcdic }, { "\x0e\xfe\xff", 3, - bom::scsu + type::scsu }, { "\xfb\xee\x28", 3, - bom::bocu_1 + type::bocu_1 }, { "\x84\x31\x95\x33", 4, - bom::gb18030 + type::gb18030 }, }}; @@ -152,9 +152,9 @@ namespace peelo::unicode * @return Byte order mark detected in the given byte string, or null option * if the given byte string does not contain byte order mark. */ - inline std::optional - detect_bom(const std::string& input) + inline std::optional + detect(const std::string& input) { - return detect_bom(input.c_str(), input.length()); + return detect(input.c_str(), input.length()); } } diff --git a/test/test_bom.cpp b/test/test_bom.cpp index b806811..603f185 100644 --- a/test/test_bom.cpp +++ b/test/test_bom.cpp @@ -6,19 +6,18 @@ # pragma warning( disable : 4100 ) #endif -using peelo::unicode::detect_bom; -using peelo::unicode::bom; +using namespace peelo::unicode::bom; static void test_recognized_bom( - bom expected_type, + type expected_type, const char* input, std::size_t length ) { // This looks weird but in GitHub CI I get warnings about unused variables if // I do this in some other way. - if (const auto result = detect_bom(input, length)) + if (const auto result = detect(input, length)) { assert(*result == expected_type); } else { @@ -29,83 +28,83 @@ test_recognized_bom( static void test_utf8() { - test_recognized_bom(bom::utf8, "\xef\xbb\xbf", 3); + test_recognized_bom(type::utf8, "\xef\xbb\xbf", 3); } static void test_utf16_be() { - test_recognized_bom(bom::utf16_be, "\xfe\xff", 2); + test_recognized_bom(type::utf16_be, "\xfe\xff", 2); } static void test_utf16_le() { - test_recognized_bom(bom::utf16_le, "\xff\xfe", 2); + test_recognized_bom(type::utf16_le, "\xff\xfe", 2); } static void test_utf32_be() { - test_recognized_bom(bom::utf32_be, "\0\0\xfe\xff", 4); + test_recognized_bom(type::utf32_be, "\0\0\xfe\xff", 4); } static void test_utf32_le() { - test_recognized_bom(bom::utf32_le, "\xff\xfe\0\0", 4); + test_recognized_bom(type::utf32_le, "\xff\xfe\0\0", 4); } static void test_utf7() { - test_recognized_bom(bom::utf7, "\x2b\x2f\x76", 3); + test_recognized_bom(type::utf7, "\x2b\x2f\x76", 3); } static void test_utf1() { - test_recognized_bom(bom::utf1, "\xf7\x64\x4c", 3); + test_recognized_bom(type::utf1, "\xf7\x64\x4c", 3); } static void test_utf_ebcdic() { - test_recognized_bom(bom::utf_ebcdic, "\xdd\x73\x66\x73", 4); + test_recognized_bom(type::utf_ebcdic, "\xdd\x73\x66\x73", 4); } static void test_scsu() { - test_recognized_bom(bom::scsu, "\x0e\xfe\xff", 3); + test_recognized_bom(type::scsu, "\x0e\xfe\xff", 3); } static void test_bocu_1() { - test_recognized_bom(bom::bocu_1, "\xfb\xee\x28", 3); + test_recognized_bom(type::bocu_1, "\xfb\xee\x28", 3); } static void test_gb18030() { - test_recognized_bom(bom::gb18030, "\x84\x31\x95\x33", 4); + test_recognized_bom(type::gb18030, "\x84\x31\x95\x33", 4); } static void test_unrecognized_bom() { - assert(!detect_bom("", 0)); - assert(!detect_bom("a", 1)); - assert(!detect_bom("a\xef\xbb\xbf", 4)); - assert(!detect_bom("\x00\xbb\xbf\xef\xbb\xbf", 6)); + assert(!detect("", 0)); + assert(!detect("a", 1)); + assert(!detect("a\xef\xbb\xbf", 4)); + assert(!detect("\x00\xbb\xbf\xef\xbb\xbf", 6)); } static void test_with_string() { - assert(!!detect_bom(std::string("\xef\xbb\xbf"))); - assert(!detect_bom(std::string("a"))); + assert(!!detect(std::string("\xef\xbb\xbf"))); + assert(!detect(std::string("a"))); } int