From 738d417df8578a429c399d23fa6e170797cff634 Mon Sep 17 00:00:00 2001 From: Liu Zhangjian Date: Mon, 2 Dec 2024 10:46:16 +0800 Subject: [PATCH] feat: [editor] Add encoding detection and conversion support Add encoding detection and conversion functionality to the editor, including: - Integrate chardet/uchardet/icu libraries for encoding detection - Add GB18030-2022 encoding support with special character handling - Implement encoding conversion between different character sets - Add encoding selection UI in status bar - Support reload file with different encodings - Add encoding configuration file with supported encoding list The changes improve text file handling by: - Automatically detecting file encodings on open - Supporting manual encoding selection via UI - Preserving encoding when saving files - Handling special cases for Chinese encodings - Providing better user feedback about current encoding Log: Add encoding detection and conversion support to editor --- debian/control | 5 +- src/plugins/codeeditor/CMakeLists.txt | 9 + src/plugins/codeeditor/codeeditor.cpp | 2 +- src/plugins/codeeditor/codeeditor.qrc | 1 + src/plugins/codeeditor/encodes/detectcode.cpp | 748 ++++++++++++++++++ src/plugins/codeeditor/encodes/detectcode.h | 61 ++ src/plugins/codeeditor/encodes/encodes.ini | 34 + .../codeeditor/gui/private/texteditor_p.cpp | 25 + .../codeeditor/gui/private/texteditor_p.h | 2 + src/plugins/codeeditor/gui/texteditor.cpp | 82 +- src/plugins/codeeditor/gui/texteditor.h | 3 + .../codeeditor/gui/workspacewidget.cpp | 5 +- src/plugins/codeeditor/status/editorlabel.cpp | 24 - src/plugins/codeeditor/status/editorlabel.h | 25 - .../codeeditor/status/statusinfomanager.cpp | 96 --- .../codeeditor/statusbar/editorstatusbar.cpp | 103 +++ .../codeeditor/statusbar/editorstatusbar.h | 36 + .../codeeditor/statusbar/encodecombobox.cpp | 204 +++++ .../codeeditor/statusbar/encodecombobox.h | 44 ++ .../statusbar/statusinfomanager.cpp | 134 ++++ .../{status => statusbar}/statusinfomanager.h | 0 src/plugins/codeeditor/utils/editorutils.cpp | 27 + src/plugins/codeeditor/utils/editorutils.h | 1 + 23 files changed, 1504 insertions(+), 167 deletions(-) create mode 100644 src/plugins/codeeditor/encodes/detectcode.cpp create mode 100644 src/plugins/codeeditor/encodes/detectcode.h create mode 100644 src/plugins/codeeditor/encodes/encodes.ini delete mode 100644 src/plugins/codeeditor/status/editorlabel.cpp delete mode 100644 src/plugins/codeeditor/status/editorlabel.h delete mode 100644 src/plugins/codeeditor/status/statusinfomanager.cpp create mode 100644 src/plugins/codeeditor/statusbar/editorstatusbar.cpp create mode 100644 src/plugins/codeeditor/statusbar/editorstatusbar.h create mode 100644 src/plugins/codeeditor/statusbar/encodecombobox.cpp create mode 100644 src/plugins/codeeditor/statusbar/encodecombobox.h create mode 100644 src/plugins/codeeditor/statusbar/statusinfomanager.cpp rename src/plugins/codeeditor/{status => statusbar}/statusinfomanager.h (100%) diff --git a/debian/control b/debian/control index eb74c9e16..64a61c39b 100644 --- a/debian/control +++ b/debian/control @@ -39,7 +39,10 @@ Build-Depends: libdtkcore5-bin, libkf5syntaxhighlighting-dev, libyaml-cpp-dev, - libcmark-dev + libcmark-dev, + libchardet-dev, + libuchardet-dev (>= 0.0.6), + libicu-dev Standards-version: 3.9.8 Homepage: http://www.deepin.org diff --git a/src/plugins/codeeditor/CMakeLists.txt b/src/plugins/codeeditor/CMakeLists.txt index eb67f11f6..4b6a6fd1c 100644 --- a/src/plugins/codeeditor/CMakeLists.txt +++ b/src/plugins/codeeditor/CMakeLists.txt @@ -7,8 +7,11 @@ set(CMAKE_INCLUDE_CURRENT_DIR true) add_definitions(-DLIBRARY_INSTALL_PREFIX="${LIBRARY_INSTALL_PREFIX}") +find_package(ICU COMPONENTS i18n uc REQUIRED) find_package(PkgConfig REQUIRED) pkg_check_modules(CMARK REQUIRED libcmark) +pkg_check_modules(CHARDET REQUIRED chardet) +pkg_check_modules(UCHARDET REQUIRED uchardet) FILE(GLOB_RECURSE PROJECT_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/*.h" @@ -31,6 +34,8 @@ add_library(${PROJECT_NAME} target_include_directories(${PROJECT_NAME} PUBLIC ${CMARK_INCLUDE_DIRS} + ${CHARDET_INCLUDE_DIRS} + ${UCHARDET_INCLUDE_DIRS} ) target_link_libraries(${PROJECT_NAME} @@ -42,6 +47,10 @@ target_link_libraries(${PROJECT_NAME} ${PkgUserModules} ${DtkWidget_LIBRARIES} ${CMARK_LIBRARIES} + ${CHARDET_LIBRARIES} + ${UCHARDET_LIBRARIES} + ICU::i18n + ICU::uc ) install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${PLUGIN_INSTALL_PATH}) diff --git a/src/plugins/codeeditor/codeeditor.cpp b/src/plugins/codeeditor/codeeditor.cpp index 5f888dd90..2ccf468a7 100644 --- a/src/plugins/codeeditor/codeeditor.cpp +++ b/src/plugins/codeeditor/codeeditor.cpp @@ -11,7 +11,7 @@ #include "lexer/lexermanager.h" #include "utils/editorutils.h" #include "utils/resourcemanager.h" -#include "status/statusinfomanager.h" +#include "statusbar/statusinfomanager.h" #include "symbol/symbollocator.h" #include "symbol/symbolwidget.h" diff --git a/src/plugins/codeeditor/codeeditor.qrc b/src/plugins/codeeditor/codeeditor.qrc index 79ae86455..689a61782 100644 --- a/src/plugins/codeeditor/codeeditor.qrc +++ b/src/plugins/codeeditor/codeeditor.qrc @@ -52,5 +52,6 @@ icons/deepin/builtin/light/icons/arrow_14px.svg icons/deepin/builtin/light/icons/breakpoint_14px.svg icons/deepin/builtin/light/icons/disabled_breakpoint_14px.svg + encodes/encodes.ini diff --git a/src/plugins/codeeditor/encodes/detectcode.cpp b/src/plugins/codeeditor/encodes/detectcode.cpp new file mode 100644 index 000000000..68f972b36 --- /dev/null +++ b/src/plugins/codeeditor/encodes/detectcode.cpp @@ -0,0 +1,748 @@ +// SPDX-FileCopyrightText: 2024 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "detectcode.h" + +#include +#include +#include +#include +#include + +#include + +#define DISABLE_TEXTCODEC + +QMap DetectCode::sm_LangsMap; + +// Minimum detection accuracy judgment, less than 90% requires adjustment of strategy +static const float s_kMinConfidence = 0.9f; + +// Manually add UTF BOM information +static QMap s_byteOrderMark = { { "UTF-16LE", QByteArray::fromHex("FFFE") }, + { "UTF-16BE", QByteArray::fromHex("FEFF") }, + { "UTF-32LE", QByteArray::fromHex("FFFE0000") }, + { "UTF-32BE", QByteArray::fromHex("0000FEFF") } }; + +/** + FIXME: Temporary solution, will modify iconv and other encoding conversion processing later + # GB18030-2022 - UTF-8 encoding conversion + {s_utf8MapGB18030Data} is the GB18030-UTF8 PUA region mapping table, corresponding to GB18030-2022 specification Appendix D Table D.1 + Replace the differences between 2005 and 2022 specifications, see \l{https://en.wikipedia.org/wiki/GB_18030} + */ +static QHash s_utf8MapGB18030Data { { "\uE81E", 0x37903582 }, + { "\uE826", 0x38903582 }, + { "\uE82B", 0x39903582 }, + { "\uE82C", 0x30913582 }, + { "\uE832", 0x31913582 }, + { "\uE843", 0x32913582 }, + { "\uE854", 0x33913582 }, + { "\uE864", 0x34913582 }, + { "\uE78D", 0x36823184 }, + { "\uE78F", 0x37823184 }, + { "\uE78E", 0x38823184 }, + { "\uE790", 0x39823184 }, + { "\uE791", 0x30833184 }, + { "\uE792", 0x31833184 }, + { "\uE793", 0x32833184 }, + { "\uE794", 0x33833184 }, + { "\uE795", 0x34833184 }, + { "\uE796", 0x35833184 } }; + +/** + # GB18030-2022 specification Appendix D Table D.2 processing + Special handling required when converting GB18030 -> UTF8 encoding to preserve special GB18030 encoded characters + When converting 0xFE51...0xFE91 encoding, GB18030 specification and Unicode 4.1 specification have different conversion results, taking 0xFE51 as an example: + 1. According to GB18030-2022 encoding specification, 0xFE51 converts to \uE816 instead of \u20087, so replace 0xFE51 with conversion identifier 0xFFFFFF01 before conversion; + 2. When iconv encounters 0xFFFFFF01 during conversion, it will exit with error, then convert 0xFFFFFF01 to GB18030-2022 encoding \uE816; + 3. Since according to Unicode 4.1 specification, \uE816 should not exist when converting UTF-8 to GB18030 encoding, it is also preserved and manually restored to 0xFE51. + */ +static QHash s_ReplaceFromGB18030_2005Error { + { QByteArray::fromHex("FE51"), QByteArray::fromHex("FFFFFF01") }, + { QByteArray::fromHex("FE52"), QByteArray::fromHex("FFFFFF02") }, + { QByteArray::fromHex("FE53"), QByteArray::fromHex("FFFFFF03") }, + { QByteArray::fromHex("FE6C"), QByteArray::fromHex("FFFFFF04") }, + { QByteArray::fromHex("FE76"), QByteArray::fromHex("FFFFFF05") }, + { QByteArray::fromHex("FE91"), QByteArray::fromHex("FFFFFF06") } +}; +static QHash s_ReplaceToUTF8_2005Error { { "\uE816", QByteArray::fromHex("FFFFFF01") }, + { "\uE817", QByteArray::fromHex("FFFFFF02") }, + { "\uE818", QByteArray::fromHex("FFFFFF03") }, + { "\uE831", QByteArray::fromHex("FFFFFF04") }, + { "\uE83B", QByteArray::fromHex("FFFFFF05") }, + { "\uE855", QByteArray::fromHex("FFFFFF06") } }; +static QHash s_ReplaceUtf8ToGB18030_2005Error { { "\uE816", QByteArray::fromHex("FE51") }, + { "\uE817", QByteArray::fromHex("FE52") }, + { "\uE818", QByteArray::fromHex("FE53") }, + { "\uE831", QByteArray::fromHex("FE6C") }, + { "\uE83B", QByteArray::fromHex("FE76") }, + { "\uE855", QByteArray::fromHex("FE91") } }; + +/** + Similarly, since both 0xFE51 and 0x95329031 convert to \u20087 in Unicode 4.1 specification, but in reverse conversion, \u20087 converts to 0xFE51. + This causes data to change during bidirectional conversion, so special handling is also needed. When converting UTF-8 to GB18030-2022, replace 0xF0A08287(\u20087), + And replace with 0x95329031 during actual conversion to ensure accurate GB18030-2022 encoding conversion + */ +static QHash s_ReplaceFromUtf8_2020Error { + { QByteArray::fromHex("95329031"), QByteArray::fromHex("FFFF11") }, + { QByteArray::fromHex("95329033"), QByteArray::fromHex("FFFF12") }, + { QByteArray::fromHex("95329730"), QByteArray::fromHex("FFFF13") }, + { QByteArray::fromHex("9536B937"), QByteArray::fromHex("FFFF14") }, + { QByteArray::fromHex("9630BA35"), QByteArray::fromHex("FFFF15") }, + { QByteArray::fromHex("9635B630"), QByteArray::fromHex("FFFF16") }, +}; +// 0xF0A08287 is the UTF-8 HEX encoding for \u20087 +static QHash s_ReplaceToGB18030_2020Error { + { QByteArray::fromHex("F0A08287"), QByteArray::fromHex("FFFF11") }, + { QByteArray::fromHex("F0A08289"), QByteArray::fromHex("FFFF12") }, + { QByteArray::fromHex("F0A0838C"), QByteArray::fromHex("FFFF13") }, + { QByteArray::fromHex("F0A19797"), QByteArray::fromHex("FFFF14") }, + { QByteArray::fromHex("F0A2A68F"), QByteArray::fromHex("FFFF15") }, + { QByteArray::fromHex("F0A487BE"), QByteArray::fromHex("FFFF16") }, +}; +static QHash s_ReplaceFromUtf8ToGB18030_2020Error { + { QByteArray::fromHex("95329031"), "\u20087" }, + { QByteArray::fromHex("95329033"), "\u20089" }, + { QByteArray::fromHex("95329730"), "\u200CC" }, + { QByteArray::fromHex("9536B937"), "\u215D7" }, + { QByteArray::fromHex("9630BA35"), "\u2298F" }, + { QByteArray::fromHex("9635B630"), "\u241FE" }, +}; + +// See QTextCodec::mibEnum() +enum MibEncoding { + UnknownMib = 0, + UTF_8 = 106, + GB18030 = 114, + UTF_16BE = 1013, + UTF_16LE = 1014, + UTF_16 = 1015, + UTF_32 = 1017, + UTF_32BE = 1018, + UTF_32LE = 1019, +}; + +/** + * @brief Get character encoding format of file based on file header content \a content + * @param filepath File to get character encoding + * @param content File header content + * @return File character encoding format + * + * @note For large text files, the file header content \a content may be truncated in the middle of the file, with truncated characters at the end of \a content, + * Greatly reducing character encoding recognition rate. For this reason, when the recognition rate is too low, trim the tail data and re-detect to improve text recognition rate. + */ +QByteArray DetectCode::getFileEncodingFormat(QString filepath, QByteArray content) +{ + QString charDetectedResult; + QByteArray ucharDetectRet; + QByteArrayList icuDetectRetList; + QByteArray detectRet; + float charDetConfidence = 0.0f; + + /* chardet encoding recognition */ + QString str(content); + // Match is Chinese (only in UTF-8 encoding) + bool bFlag = str.contains(QRegExp("[\\x4e00-\\x9fa5]+")); + if (bFlag) { + const QByteArray suffix = "为增加探测率保留的中文"; + QByteArray newContent = content; + // Manually add Chinese characters to avoid misjudging encoding due to too short character length + newContent += suffix; + DetectCode::chartDetDetectingTextCoding(newContent, charDetectedResult, charDetConfidence); + + // Large text data may be truncated in the middle of the document, causing unicode Chinese characters to be truncated and parsed as garbled characters, handling some cases + // According to the text interruption situation, try to parse the encoding and remove the tail characters each time until the recognition rate reaches above 90% + int tryCount = 5; + while (charDetConfidence < s_kMinConfidence && newContent.size() > suffix.size() && tryCount-- > 0) { + // Remove possible garbled tail characters + newContent.remove(newContent.size() - suffix.size(), 1); + DetectCode::chartDetDetectingTextCoding(newContent, charDetectedResult, charDetConfidence); + } + } else { + DetectCode::chartDetDetectingTextCoding(content, charDetectedResult, charDetConfidence); + + // Some non-unicode encodings are also Chinese, such as GB18030, BIG5 and other Chinese encodings, also judge the recognition rate, manually intervene multiple detections when the recognition rate is low + int tryCount = 5; + QByteArray newContent = content; + while (charDetConfidence < s_kMinConfidence && !newContent.isEmpty() && tryCount-- > 0) { + newContent.chop(1); + DetectCode::chartDetDetectingTextCoding(newContent, charDetectedResult, charDetConfidence); + } + } + ucharDetectRet = charDetectedResult.toLatin1(); + + // uchardet encoding recognition. If recognition rate is too low, consider whether it is non-single byte encoding format. + if (ucharDetectRet.contains("unknown") || ucharDetectRet.contains("ASCII") || ucharDetectRet.contains("???") || ucharDetectRet.isEmpty() || charDetConfidence < s_kMinConfidence) { + ucharDetectRet = DetectCode::uchardetCode(filepath); + } + + if (ucharDetectRet.contains("ASCII")) { + // Use configured default file encoding, default is UTF-8 + detectRet = "UTF-8"; + } else { + // icu encoding recognition + DetectCode::icuDetectTextEncoding(filepath, icuDetectRetList); + detectRet = selectCoding(ucharDetectRet, icuDetectRetList, charDetConfidence); + + if (detectRet.contains("ASCII") || detectRet.isEmpty()) { + // Use configured default file encoding, default is UTF-8 + detectRet = "UTF-8"; + } + } + + return detectRet.toUpper(); +} + +QByteArray DetectCode::uchardetCode(QString filePath) +{ + FILE *fp; + QByteArray charset; + + size_t buffer_size = 0x10000; + char *buff = new char[buffer_size]; + memset(buff, 0, buffer_size); + + /* Analyze text encoding through sample characters */ + uchardet_t handle = uchardet_new(); + + /* Open the text file to be detected and read a certain number of sample characters */ + fp = fopen(filePath.toLocal8Bit().data(), "rb"); + + if (fp) { + while (!feof(fp)) { + size_t len = fread(buff, 1, buffer_size, fp); + int retval = uchardet_handle_data(handle, buff, len); + if (retval != 0) { + continue; + } + + break; + } + fclose(fp); + + uchardet_data_end(handle); + charset = uchardet_get_charset(handle); + } + + uchardet_delete(handle); + delete[] buff; + buff = nullptr; + + if (charset == "MAC-CENTRALEUROPE") + charset = "MACCENTRALEUROPE"; + if (charset == "MAC-CYRILLIC") + charset = "MACCYRILLIC"; + if (charset.contains("WINDOWS-")) + charset = charset.replace("WINDOWS-", "CP"); + return charset; +} + +/** + * @author guoshao + * @brief ICU encoding recognition + * @param filePath: file path, listDetectRet: variable to store encoding recognition results + **/ +void DetectCode::icuDetectTextEncoding(const QString &filePath, QByteArrayList &listDetectRet) +{ + FILE *file; + file = fopen(filePath.toLocal8Bit().data(), "rb"); + if (file == nullptr) { + qInfo() << "fopen file failed."; + return; + } + + size_t iBuffSize = 4096; + char *detected = nullptr; + char *buffer = new char[iBuffSize]; + memset(buffer, 0, iBuffSize); + + int readed = 0; + while (!feof(file)) { + size_t len = fread(buffer, 1, iBuffSize, file); + readed += len; + if (readed > 1 * 1024 * 1024) { + break; + } + + if (detectTextEncoding(buffer, len, &detected, listDetectRet)) { + break; + } + } + + delete[] buffer; + buffer = nullptr; + fclose(file); +} + +/** + * @author guoshao + * @brief Inner function for ICU encoding detection + * @param data: content to detect, len: length of content to detect, detected: variable to store detected encoding, + * listDetectRet: list to store detected encodings + * @return true: detection successful, false: detection failed + **/ +bool DetectCode::detectTextEncoding(const char *data, size_t len, char **detected, QByteArrayList &listDetectRet) +{ + Q_UNUSED(detected); + + UCharsetDetector *csd; + const UCharsetMatch **csm; + int32_t matchCount = 0; + + UErrorCode status = U_ZERO_ERROR; + csd = ucsdet_open(&status); + if (status != U_ZERO_ERROR) { + return false; + } + + ucsdet_setText(csd, data, len, &status); + if (status != U_ZERO_ERROR) { + return false; + } + + csm = ucsdet_detectAll(csd, &matchCount, &status); + if (status != U_ZERO_ERROR) { + return false; + } + + int readMax = qMin(6, matchCount); + for (int i = 0; i < readMax; i++) { + auto str = ucsdet_getName(csm[i], &status); + if (status != U_ZERO_ERROR) { + return false; + } + listDetectRet << QByteArray(str); + } + + ucsdet_close(csd); + return true; +} + +/** + * @author guoshao + * @brief Filter detected encodings + * @param ucharDetectdRet: encoding result from chardet/uchardet, icuDetectRetList: list storing detected encodings + * @return Filtered encoding result + **/ +QByteArray DetectCode::selectCoding(QByteArray ucharDetectRet, QByteArrayList icuDetectRetList, float confidence) +{ + // List is not allowed to be empty + if (icuDetectRetList.isEmpty()) { + return QByteArray(); + } + + if (!ucharDetectRet.isEmpty()) { + // Chinese environment prioritizes GB18030 encoding + if (QLocale::Chinese == QLocale::system().language()) { + if (confidence < s_kMinConfidence && icuDetectRetList.contains("GB18030")) { + return QByteArray("GB18030"); + } + } + + if (ucharDetectRet.contains(icuDetectRetList[0])) { + return ucharDetectRet; + } else { + if (icuDetectRetList.contains("GB18030")) { + return QByteArray("GB18030"); + } else { + // Filter out some encoding formats with suffixes, such as UTF-16 BE and UTF-16 + if (icuDetectRetList[0].contains(ucharDetectRet)) { + return icuDetectRetList[0]; + } + + return ucharDetectRet; + } + } + } + + if (ucharDetectRet.isEmpty()) { + if (icuDetectRetList.contains("GB18030")) { + return QByteArray("GB18030"); + } else { + return icuDetectRetList[0]; + } + } + + return QByteArray(); +} + +/** + * @brief Detect encoding using libchardet1 encoding detection library + */ +int DetectCode::chartDetDetectingTextCoding(const char *str, QString &encoding, float &confidence) +{ + DetectObj *obj = detect_obj_init(); + + if (obj == nullptr) { + // qInfo() << "Memory Allocation failed\n"; + return CHARDET_MEM_ALLOCATED_FAIL; + } + +#ifndef CHARDET_BINARY_SAFE + // before 1.0.5. This API is deprecated on 1.0.5 + switch (detect(str, &obj)) +#else + // from 1.0.5 + switch (detect_r(str, strlen(str), &obj)) +#endif + { + case CHARDET_OUT_OF_MEMORY: + qInfo() << "On handle processing, occured out of memory\n"; + detect_obj_free(&obj); + return CHARDET_OUT_OF_MEMORY; + case CHARDET_NULL_OBJECT: + qInfo() << "2st argument of chardet() is must memory allocation with detect_obj_init API\n"; + return CHARDET_NULL_OBJECT; + } + +#ifndef CHARDET_BOM_CHECK + // qInfo() << "encoding:" << obj->encoding << "; confidence: " << obj->confidence; +#else + // from 1.0.6 support return whether exists BOM + qInfo() << "encoding:" << obj->encoding << "; confidence: " << obj->confidence << "; bom: " << obj->bom; +#endif + + encoding = obj->encoding; + confidence = obj->confidence; + detect_obj_free(&obj); + + return CHARDET_SUCCESS; +} + +/** + * @return Returns the number of bytes that the first character in string \a buf may occupy based on UTF-8 character encoding + * @note Examples of different UTF-8 byte divisions, except first byte being 0 + * If first bit of a byte is 0, that byte alone represents a character; if first bit is 1, number of consecutive 1s indicates how many bytes current character occupies. + * 0000 0000-0000 007F | 0xxxxxxx + * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * + * @link https://zh.wikipedia.org/wiki/UTF-8 + */ +int utf8MultiByteCount(char *buf, size_t size) +{ + // UTF character types: single byte, middle byte, double bytes, three bytes, four bytes + enum UtfCharType { + Single, + Mid, + DoubleBytes, + ThreeBytes, + FourBytes, + }; + + // Get UTF-8 byte sequence value + auto LeftBitFunc = [](char data) -> int { + // Return number of leading 1s + int res = 0; + while (data & 0x80) { + res++; + data <<= 1; + } + return res; + }; + + int count = 0; + while (size > 0 && count < FourBytes) { + int leftBits = LeftBitFunc(*buf); + switch (leftBits) { + case Mid: + count++; + break; + case DoubleBytes: + case ThreeBytes: + case FourBytes: + return leftBits; + default: + // Return 1 for more than 4 bytes or single byte + return 1; + } + + buf++; + size--; + } + + return count; +} + +/** + @brief Check for PUA region anomalies when converting GB18030 to UTF-8 + + @note Differences in GB18030 PUA region encoding conversion between versions + iconv currently uses 2022 flag, encounters error in PUA region (no longer supported in 2022), here compatible with 2005 design, + When converting between GB18030 and UTF-8, convert PUA region characters according to 2005 standard + | GB18030 Original Data (HEX) | GB18030-2005 to UTF8 | GB18030-2022 toUtf8 | + |------------------------|-----------------------|-----------------------| + | 0x37903582 | \uE81E | \u9FB4 | + | 0x38903582 | \uE826 | \u9FB5 | + | 0x39903582 | \uE82B | \u9FB6 | + | 0x30913582 | \uE82C | \u9FB7 | + | 0x31913582 | \uE832 | \u9FB8 | + | 0x32913582 | \uE843 | \u9FB9 | + | 0x33913582 | \uE854 | \u9FBA | + | 0x34913582 | \uE864 | \u9FBB | + | 0x36823184 | \uE78D | \uFE10 | + | 0x37823184 | \uE78F | \uFE11 | + | 0x38823184 | \uE78E | \uFE12 | + | 0x39823184 | \uE790 | \uFE13 | + | 0x30833184 | \uE791 | \uFE14 | + | 0x31833184 | \uE792 | \uFE15 | + | 0x32833184 | \uE793 | \uFE16 | + | 0x33833184 | \uE794 | \uFE17 | + | 0x34833184 | \uE795 | \uFE18 | + | 0x35833184 | \uE796 | \uFE19 | + + The following are special characters, 2005 and 2022 converted encoding, mapping to Unicode encoding + | GB18030 Original Data (HEX) | GB18030 to UTF8 | Unicode 4.1 Mapping | + |------------------------|----------------------|------------------------| + | 0x31903295 | U+E816 | \u20087 | + | 0x33903295 | U+E817 | \u20089 | + | 0x30973295 | U+E818 | \u200CC | + | 0x37B93695 | U+E831 | \u215D7 | + | 0x35BA3096 | U+E83B | \u2298F | + | 0x30B63596 | U+E855 | \u241FE | + */ +bool checkGB18030ToUtf8Error(char *buf, size_t size, size_t &replaceLen, QByteArray &appendChar) +{ + // For GB18030-2005 encoding specification PUA region characters, iconv reports error, replace with corresponding character sequence. + static const int sc_minGB18030PUACharLen = 4; + if (size < sc_minGB18030PUACharLen) { + replaceLen = 1; + appendChar = "?"; + return false; + } + + quint32 puaChar = *reinterpret_cast(buf); + appendChar = s_utf8MapGB18030Data.key(puaChar); + if (appendChar.isEmpty()) { + // FFFFFF0X -> \uE816...\uE855 + appendChar = s_ReplaceToUTF8_2005Error.key(QByteArray(buf, sc_minGB18030PUACharLen)); + } + + if (appendChar.isEmpty()) { + replaceLen = 1; + appendChar = "?"; + return false; + } else { + replaceLen = sc_minGB18030PUACharLen; + return true; + } +} + +/** + @brief Check for PUA region anomalies when converting UTF-8 to GB18030 + */ +bool checkUTF8ToGB18030Error(char *buf, size_t size, size_t &replaceLen, QByteArray &appendChar) +{ + // The PUA characters to be converted are all 3 + static const int sc_minUTFPUACharLen = 3; + if (size < sc_minUTFPUACharLen) { + replaceLen = 1; + appendChar = "?"; + return false; + } + + QByteArray puaChar(buf, sc_minUTFPUACharLen); + quint32 gb18030char = s_utf8MapGB18030Data.value(puaChar, 0); + if (!gb18030char) { + // \uE816 -> 0xFE51 + appendChar = s_ReplaceUtf8ToGB18030_2005Error.value(puaChar); + if (appendChar.isEmpty()) { + // 0xFFFF11 -> 0x95329031 + appendChar = s_ReplaceFromUtf8_2020Error.key(puaChar); + } + + if (!appendChar.isEmpty()) { + replaceLen = sc_minUTFPUACharLen; + return true; + } + + replaceLen = 1; + appendChar = "?"; + return false; + } else { + replaceLen = sc_minUTFPUACharLen; + appendChar = QByteArray(reinterpret_cast(&gb18030char), sizeof(gb18030char)); + return true; + } +} + +/** + * @brief Convert input character sequence \a inputStr from encoding \a fromCode to encoding \a toCode, and return the converted character sequence. + * @return Whether character encoding conversion was successful + */ +bool DetectCode::changeFileEncodingFormat(QByteArray &inputStr, + QByteArray &outStr, + const QString &fromCode, + const QString &toCode) +{ + if (fromCode == toCode) { + outStr = inputStr; + return true; + } + + if (inputStr.isEmpty()) { + outStr.clear(); + return true; + } + +#ifndef DISABLE_TEXTCODEC + // Use QTextCodec to handle some encodings + static QStringList codecList { "GB18030" }; + if (codecList.contains(fromCode) || codecList.contains(toCode)) { + return convertEncodingTextCodec(inputStr, outStr, fromCode, toCode); + } +#endif + + iconv_t handle = iconv_open(toCode.toLocal8Bit().data(), fromCode.toLocal8Bit().data()); + if (handle != reinterpret_cast(-1)) { + MibEncoding fromMib = UnknownMib; + QTextCodec *fromCodec = QTextCodec::codecForName(fromCode.toUtf8()); + if (fromCodec) { + fromMib = static_cast(fromCodec->mibEnum()); + } + // When not using modified Iconv processing from upper layer, skip GB18030 conversion special handling without detecting encoding format + MibEncoding toMib = UnknownMib; + char *inbuf = inputStr.data(); + size_t inbytesleft = static_cast(inputStr.size()); + size_t outbytesleft = 4 * inbytesleft; + char *outbuf = new char[outbytesleft]; + char *bufferHeader = outbuf; + size_t maxBufferSize = outbytesleft; + + memset(outbuf, 0, outbytesleft); + + int errorNum = 0; + try { + size_t ret = 0; + do { + ret = iconv(handle, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + if (static_cast(-1) == ret) { + // Record error information + errorNum = errno; + + // For input error, error code EILSEQ (84), skip current position and add '?' + if (EILSEQ == errorNum) { + // Exit if buffer insufficient + if (outbytesleft == 0) { + break; + } + + size_t replaceLen = 1; + // Skip error character, set error character to '?' + QByteArray appendChar = "?"; + + switch (fromMib) { + case UTF_8: { + // Special handling for UTF-8 to GB18030 conversion, check for anomalies first + if (GB18030 == toMib) { + if (checkUTF8ToGB18030Error(inbuf, inbytesleft, replaceLen, appendChar)) { + break; + } + } + + // For UTF-8 source encoding, calculate number of bytes to skip + replaceLen = static_cast(utf8MultiByteCount(inbuf, inbytesleft)); + } break; + case GB18030: + // Special handling for GB18030 to UTF-8 conversion, check for anomalies first + if (UTF_8 == toMib) { + if (checkGB18030ToUtf8Error(inbuf, inbytesleft, replaceLen, appendChar)) { + break; + } + } + break; + default: + break; + } + + // Replace error character with corresponding character + size_t appendSize = static_cast(appendChar.size()); + if (outbytesleft < appendSize) { + break; + } + + outbytesleft -= appendSize; + ::memcpy(outbuf, appendChar.data(), appendSize); + outbuf += appendSize; + + if (inbytesleft <= replaceLen) { + // Move to end + inbuf += inbytesleft; + inbytesleft = 0; + break; + } + + inbuf += replaceLen; + inbytesleft -= replaceLen; + } else { + break; + } + } + } while (static_cast(-1) == ret); + + } catch (const std::exception &e) { + qWarning() << qPrintable("iconv convert encoding catching exception") << qPrintable(e.what()); + } + + if (errorNum) { + qWarning() << qPrintable("iconv() convert text encoding error, errocode:") << errorNum; + } + iconv_close(handle); + + // Manually add UTF BOM information + outStr.append(s_byteOrderMark.value(toCode)); + + // Calculate actual number of bytes converted by iconv() + size_t realConvertSize = maxBufferSize - outbytesleft; + outStr += QByteArray(bufferHeader, static_cast(realConvertSize)); + + delete[] bufferHeader; + bufferHeader = nullptr; + + return true; + + } else { + qWarning() << qPrintable("Text encoding convert error, iconv_open() failed."); + // Try using QTextCodec to load + return convertEncodingTextCodec(inputStr, outStr, fromCode, toCode); + } +} + +/** + * @brief Use QTextCodec to convert input character sequence \a inputStr from encoding \a fromCode to encoding \a toCode, and return the converted character sequence. + * @return Whether character encoding conversion was successful + */ +bool DetectCode::convertEncodingTextCodec(QByteArray &inputStr, + QByteArray &outStr, + const QString &fromCode, + const QString &toCode) +{ + QString convertData; + if (fromCode != "UTF-8") { + QTextCodec *fromCodec = QTextCodec::codecForName(fromCode.toUtf8()); + if (!fromCodec) { + return false; + } + + convertData = fromCodec->toUnicode(inputStr); + } else { + convertData = QString::fromUtf8(inputStr); + } + + if (toCode != "UTF-8") { + QTextCodec *toCodec = QTextCodec::codecForName(toCode.toUtf8()); + if (!toCodec) { + return false; + } + + outStr = toCodec->fromUnicode(convertData); + } else { + outStr = convertData.toUtf8(); + } + + // Manually add UTF BOM information + outStr.append(s_byteOrderMark.value(toCode)); + return true; +} diff --git a/src/plugins/codeeditor/encodes/detectcode.h b/src/plugins/codeeditor/encodes/detectcode.h new file mode 100644 index 000000000..e69aaac29 --- /dev/null +++ b/src/plugins/codeeditor/encodes/detectcode.h @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: 2024 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef DETECTCODE_H +#define DETECTCODE_H + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * + * Text encoding detection using third-party libraries chardet1 and uchardet + * If chardet fails to detect, uchardet will be used + * Using iconv library for encoding conversion + * + */ + +class QByteArray; +class QString; + +class DetectCode +{ +public: + // libchardet1 encoding detection library + static int chartDetDetectingTextCoding(const char *str, QString &encoding, float &confidence); + // Detect text encoding using uchardet + static QByteArray uchardetCode(QString filePath); + // Detect encoding using ICU library + static void icuDetectTextEncoding(const QString &filePath, QByteArrayList &listDetectRet); + // Inner function for ICU encoding detection + static bool detectTextEncoding(const char *data, size_t len, char **detected, QByteArrayList &listDetectRet); + // Filter detected encodings + static QByteArray selectCoding(QByteArray ucharDetectRet, QByteArrayList icuDetectRetList, float confidence); + // Get file encoding format + static QByteArray getFileEncodingFormat(QString filePath, QByteArray content = QByteArray("")); + // Convert text encoding format + static bool changeFileEncodingFormat(QByteArray &inputStr, + QByteArray &outStr, + const QString &fromCode, + const QString &toCode = QString("UTF-8")); + static bool convertEncodingTextCodec(QByteArray &inputStr, + QByteArray &outStr, + const QString &fromCode, + const QString &toCode = QString("UTF-8")); + +private: + static QMap sm_LangsMap; +}; + +#endif // DETECTCODE_H diff --git a/src/plugins/codeeditor/encodes/encodes.ini b/src/plugins/codeeditor/encodes/encodes.ini new file mode 100644 index 000000000..4aa3b35d9 --- /dev/null +++ b/src/plugins/codeeditor/encodes/encodes.ini @@ -0,0 +1,34 @@ +[Unicode] +encodes=UTF-8,UTF-16BE,UTF-16LE,UTF-32BE,UTF-32LE +[WesternEuropean] +encodes=ISO-8859-1,ISO-8859-15,CP1252 +[CentralEuropean] +encodes=ISO-8859-2,ISO-8859-3,CP1125,CP1250,MACCENTRALEUROPE,IBM852 +[Baltic] +encodes=ISO-8859-4,ISO-8859-10,ISO-8859-13,CP1257,IBM775 +[Cyrillic] +encodes=ISO-8859-5,CP1251,KOI8-R,KOI8-U,MACCYRILLIC,IBM855,IBM866 +[Arabic] +encodes=ISO-8859-6,CP1256 +[Greek] +encodes=ISO-8859-7,CP1253 +[Hebrew] +encodes=ISO-8859-8,CP1255 +[Turkish] +encodes=ISO-8859-9,CP1254 +[Thai] +encodes=ISO-8859-11,TIS-620 +[Celtic] +encodes=ISO-8859-14 +[SouthEasternEuropean] +encodes=ISO-8859-16 +[ChineseSimplified] +encodes=GB18030,ISO-2022-CN +[ChineseTraditional] +encodes=BIG5,EUC-TW +[Japanese] +encodes=EUC-JP,SHIFT_JIS,ISO-2022-JP +[Korean] +encodes=EUC-KR,ISO-2022-KR +[Vietnamese] +encodes=CP1258 diff --git a/src/plugins/codeeditor/gui/private/texteditor_p.cpp b/src/plugins/codeeditor/gui/private/texteditor_p.cpp index 6d1d23144..53172f34f 100644 --- a/src/plugins/codeeditor/gui/private/texteditor_p.cpp +++ b/src/plugins/codeeditor/gui/private/texteditor_p.cpp @@ -8,6 +8,7 @@ #include "utils/resourcemanager.h" #include "lexer/lexermanager.h" #include "transceiver/codeeditorreceiver.h" +#include "encodes/detectcode.h" #include "common/common.h" #include "gui/settings/editorsettings.h" #include "gui/settings/settingsdefine.h" @@ -300,6 +301,30 @@ void TextEditorPrivate::initLanguageClient() languageClient->updateTokens(); } +bool TextEditorPrivate::readFile(const QString &encode) +{ + QFile file(fileName); + if (file.open(QIODevice::ReadOnly)) { + QByteArray fileContent = file.readAll(); + QString newEncode = encode; + if (newEncode.isEmpty()) + newEncode = DetectCode::getFileEncodingFormat(fileName, fileContent.left(1024 * 1024)); + + QByteArray outData; + DetectCode::changeFileEncodingFormat(fileContent, outData, newEncode, QString("UTF-8")); + if (outData.isEmpty()) { + outData = fileContent; + newEncode = "UTF-8"; + } + q->setText(outData); + file.close(); + documentEncode = newEncode; + q->setModified(false); + return true; + } + return false; +} + int TextEditorPrivate::cursorPosition() const { return static_cast(q->SendScintilla(TextEditor::SCI_GETCURRENTPOS)); diff --git a/src/plugins/codeeditor/gui/private/texteditor_p.h b/src/plugins/codeeditor/gui/private/texteditor_p.h index d453e5935..d34baa747 100644 --- a/src/plugins/codeeditor/gui/private/texteditor_p.h +++ b/src/plugins/codeeditor/gui/private/texteditor_p.h @@ -50,6 +50,7 @@ class TextEditorPrivate : public QObject void updateColorTheme(); void loadLexer(); void initLanguageClient(); + bool readFile(const QString &encode); int cursorPosition() const; int marginsWidth(); @@ -104,6 +105,7 @@ public slots: bool postionChangedByGoto { false }; QString fontName; int fontSize { 10 }; + QString documentEncode { "UTF-8" }; using CompletionCache = QPair; CompletionCache inlineCompletionCache { -1, "" }; diff --git a/src/plugins/codeeditor/gui/texteditor.cpp b/src/plugins/codeeditor/gui/texteditor.cpp index eedd74aba..351814099 100644 --- a/src/plugins/codeeditor/gui/texteditor.cpp +++ b/src/plugins/codeeditor/gui/texteditor.cpp @@ -5,6 +5,7 @@ #include "texteditor.h" #include "private/texteditor_p.h" #include "utils/editorutils.h" +#include "encodes/detectcode.h" #include "common/common.h" #include "common/tooltip/tooltip.h" #include "settings/settingsdefine.h" @@ -53,14 +54,8 @@ void TextEditor::openFile(const QString &fileName) beginUndoAction(); d->isAutoCompletionEnabled = false; d->fileName = fileName; - QString text; - QFile file(d->fileName); - if (file.open(QFile::OpenModeFlag::ReadOnly)) { - text = file.readAll(); - file.close(); - } - - setText(text.toUtf8()); + setReadOnly(!QFileInfo(fileName).isWritable()); + d->readFile(""); setModified(false); editor.fileOpened(fileName); d->loadLexer(); @@ -89,6 +84,11 @@ QString TextEditor::getFile() const return d->fileName; } +QString TextEditor::documentEncode() const +{ + return d->documentEncode; +} + void TextEditor::save() { if (!isModified()) @@ -109,7 +109,35 @@ void TextEditor::save() return; } - file.write(text().toUtf8()); + QByteArray fileContent = text().toLocal8Bit(); + if (!fileContent.isEmpty()) { + QByteArray Outdata; + DetectCode::changeFileEncodingFormat(fileContent, Outdata, QString("UTF-8"), d->documentEncode); + if (Outdata.isEmpty()) { + qWarning() << qPrintable(QString("iconv Encode Transformat from '%1' to '%2' Fail! start QTextCodec Encode Transformat.") + .arg(QString("UTF-8"), d->documentEncode)); + // Using QTextCodec to convert + QTextCodec *codec = QTextCodec::codecForName(d->documentEncode.toUtf8()); + if (codec) { + QByteArray encodedString = codec->fromUnicode(fileContent); + if (encodedString.isEmpty()) { + qWarning() << qPrintable("Both iconv and QTextCodec Encode Transformat Fail!"); + } else { + qWarning() << qPrintable(QString("QTextCodec Encode Transformat from '%1' to '%2' Success!") + .arg(QString("UTF-8"), d->documentEncode)); + Outdata = encodedString; + } + } else { + qWarning() << qPrintable("Unsupported QTextCodec format:") << d->documentEncode; + } + } + + if (!Outdata.isEmpty()) + file.write(Outdata); + } else { + file.write(fileContent); + } + file.close(); setModified(false); editor.fileSaved(d->fileName); @@ -141,21 +169,37 @@ void TextEditor::reload() int line = 0, index = 0; getCursorPosition(&line, &index); const auto &markers = d->allMarkers(); - - QString text; - QFile file(d->fileName); - if (file.open(QFile::OpenModeFlag::ReadOnly)) { - text = file.readAll(); - file.close(); - } - setText(text.toUtf8()); - setModified(false); - + d->readFile(""); d->setMarkers(markers); setCursorPosition(line, index); emit textChanged(); } +bool TextEditor::reload(const QString &encode) +{ + if (encode == d->documentEncode) + return false; + + if (length() == 0) { + d->documentEncode = encode; + return true; + } + + if (isModified()) { + DDialog dlg(tr("Encoding changed. Do you want to save the file now?"), "", this); + dlg.setIcon(QIcon::fromTheme("ide")); + dlg.addButton(QString(tr("Cancel", "button"))); + dlg.addButton(QString(tr("Save", "button")), true, DDialog::ButtonRecommend); + int res = dlg.exec(); + if (res == 0) + return false; + + if (res == 1) + save(); + } + return d->readFile(encode); +} + void TextEditor::addBreakpoint(int line, bool enabled) { if (hasBreakpoint(line)) diff --git a/src/plugins/codeeditor/gui/texteditor.h b/src/plugins/codeeditor/gui/texteditor.h index 912e81c38..e68876b5c 100644 --- a/src/plugins/codeeditor/gui/texteditor.h +++ b/src/plugins/codeeditor/gui/texteditor.h @@ -28,11 +28,13 @@ class TextEditor : public QsciScintilla void openFile(const QString &fileName); void openFileWithDocument(const QString &fileName, const QsciDocument &doc); QString getFile() const; + QString documentEncode() const; void save(); void saveAs(); void saveAs(const QString &fileName); void reload(); + bool reload(const QString &encode); // debug void addBreakpoint(int line, bool enabled = true); @@ -138,6 +140,7 @@ public slots: void cursorRecordChanged(int pos); void requestOpenFiles(const QList &fileList); void delayCursorPositionChanged(int line, int index); + void cursorModeChanged(); private: void init(); diff --git a/src/plugins/codeeditor/gui/workspacewidget.cpp b/src/plugins/codeeditor/gui/workspacewidget.cpp index fc200dba6..3ae62a111 100644 --- a/src/plugins/codeeditor/gui/workspacewidget.cpp +++ b/src/plugins/codeeditor/gui/workspacewidget.cpp @@ -449,8 +449,11 @@ void WorkspaceWidgetPrivate::initActions() if (!tabWidget) return; - if (auto editor = tabWidget->currentEditor()) + if (auto editor = tabWidget->currentEditor()) { editor->SendScintilla(val); + if (val == QsciCommand::EditToggleOvertype) + Q_EMIT editor->cursorModeChanged(); + } }); } } diff --git a/src/plugins/codeeditor/status/editorlabel.cpp b/src/plugins/codeeditor/status/editorlabel.cpp deleted file mode 100644 index ba4018297..000000000 --- a/src/plugins/codeeditor/status/editorlabel.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-FileCopyrightText: 2024 UnionTech Software Technology Co., Ltd. -// -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "editorlabel.h" - -#include -#include - -EditorLabel::EditorLabel(QWidget *parent) - : QWidget(parent) -{ - QHBoxLayout *layout = new QHBoxLayout(this); - layout->setContentsMargins(0, 0, 0, 0); - - cursorlabel = new QLabel(this); - layout->addWidget(cursorlabel); -} - -void EditorLabel::updateCursor(int line, int column) -{ - QString format(tr("Line %1 Column %2")); - cursorlabel->setText(format.arg(line + 1).arg(column + 1)); -} diff --git a/src/plugins/codeeditor/status/editorlabel.h b/src/plugins/codeeditor/status/editorlabel.h deleted file mode 100644 index bb890e8af..000000000 --- a/src/plugins/codeeditor/status/editorlabel.h +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-FileCopyrightText: 2024 UnionTech Software Technology Co., Ltd. -// -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef EDITORLABEL_H -#define EDITORLABEL_H - -#include - -class QLabel; - -class EditorLabel : public QWidget -{ - Q_OBJECT -public: - explicit EditorLabel(QWidget *parent = nullptr); - -public Q_SLOTS: - void updateCursor(int line, int column); - -private: - QLabel *cursorlabel { nullptr }; -}; - -#endif // EDITORLABEL_H diff --git a/src/plugins/codeeditor/status/statusinfomanager.cpp b/src/plugins/codeeditor/status/statusinfomanager.cpp deleted file mode 100644 index 6850924e7..000000000 --- a/src/plugins/codeeditor/status/statusinfomanager.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// SPDX-FileCopyrightText: 2024 UnionTech Software Technology Co., Ltd. -// -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "statusinfomanager.h" -#include "editorlabel.h" -#include "gui/texteditor.h" - -#include "services/window/windowservice.h" - -#include - -using namespace dpfservice; - -class StatusInfoManagerPrivate : public QObject -{ -public: - void init(WindowService *winSrv); - void initConnect(); - - void resetEditor(TextEditor *edit); - void updateLabelInfo(); - void handleFocusChanged(QWidget *old, QWidget *now); - -public: - TextEditor *currentEditor { nullptr }; - EditorLabel *editorLabel { nullptr }; -}; - -void StatusInfoManagerPrivate::init(WindowService *winSrv) -{ - editorLabel = new EditorLabel(); - winSrv->addStatusBarItem(editorLabel); -} - -void StatusInfoManagerPrivate::initConnect() -{ - connect(qApp, &QApplication::focusChanged, this, &StatusInfoManagerPrivate::handleFocusChanged); -} - -void StatusInfoManagerPrivate::resetEditor(TextEditor *edit) -{ - if (currentEditor == edit) - return; - - if (currentEditor) { - disconnect(currentEditor, &TextEditor::destroyed, this, 0); - disconnect(currentEditor, &TextEditor::cursorPositionChanged, editorLabel, &EditorLabel::updateCursor); - } - - currentEditor = edit; - connect(edit, &TextEditor::destroyed, this, [this] { currentEditor = nullptr; }); - connect(currentEditor, &TextEditor::cursorPositionChanged, editorLabel, &EditorLabel::updateCursor); - updateLabelInfo(); -} - -void StatusInfoManagerPrivate::updateLabelInfo() -{ - int line = 0, col = 0; - currentEditor->getCursorPosition(&line, &col); - editorLabel->updateCursor(line, col); -} - -void StatusInfoManagerPrivate::handleFocusChanged(QWidget *old, QWidget *now) -{ - Q_UNUSED(old) - - auto edit = qobject_cast(now); - if (!edit) - return; - - resetEditor(edit); -} - -StatusInfoManager::StatusInfoManager(QObject *parent) - : QObject(parent), - d(new StatusInfoManagerPrivate) -{ -} - -StatusInfoManager::~StatusInfoManager() -{ - delete d; -} - -StatusInfoManager *StatusInfoManager::instance() -{ - static StatusInfoManager ins; - return &ins; -} - -void StatusInfoManager::init(WindowService *winSrv) -{ - d->init(winSrv); - d->initConnect(); -} diff --git a/src/plugins/codeeditor/statusbar/editorstatusbar.cpp b/src/plugins/codeeditor/statusbar/editorstatusbar.cpp new file mode 100644 index 000000000..cc6bf45e8 --- /dev/null +++ b/src/plugins/codeeditor/statusbar/editorstatusbar.cpp @@ -0,0 +1,103 @@ +// SPDX-FileCopyrightText: 2024 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "editorstatusbar.h" +#include "encodecombobox.h" + +#include + +#include +#include + +DWIDGET_USE_NAMESPACE + +class EditorStatusBarPrivate : public QObject +{ +public: + explicit EditorStatusBarPrivate(EditorStatusBar *qq); + + void initUI(); + void initConnection(); + +public: + EditorStatusBar *q; + + QLabel *cursorlabel { nullptr }; + QLabel *modeLabel { nullptr }; + EncodeComboBox *encodedCB { nullptr }; +}; + +EditorStatusBarPrivate::EditorStatusBarPrivate(EditorStatusBar *qq) + : q(qq) +{ +} + +void EditorStatusBarPrivate::initUI() +{ + QHBoxLayout *layout = new QHBoxLayout(q); + layout->setContentsMargins(0, 0, 0, 0); + layout->setSpacing(20); + + auto addVLine = [this, layout] { + DVerticalLine *vLine = new DVerticalLine(q); + vLine->setFixedHeight(14); + layout->addWidget(vLine); + }; + + cursorlabel = new QLabel(q); + modeLabel = new QLabel(q); + encodedCB = new EncodeComboBox(q); + + layout->addWidget(cursorlabel); + addVLine(); + layout->addWidget(modeLabel); + addVLine(); + layout->addWidget(encodedCB); +} + +void EditorStatusBarPrivate::initConnection() +{ + connect(encodedCB, &EncodeComboBox::encodeChanged, q, &EditorStatusBar::encodeChanged); +} + +EditorStatusBar::EditorStatusBar(QWidget *parent) + : QWidget(parent), + d(new EditorStatusBarPrivate(this)) +{ + d->initUI(); + d->initConnection(); +} + +EditorStatusBar::~EditorStatusBar() +{ + delete d; +} + +void EditorStatusBar::updateCursor(int line, int column) +{ + QString format(tr("Line %1 Column %2")); + d->cursorlabel->setText(format.arg(line + 1).arg(column + 1)); +} + +void EditorStatusBar::updateCursorMode(CursorMode mode) +{ + QString text; + switch (mode) { + case Insert: + text = tr("INSERT"); + break; + case Overwrite: + text = tr("OVERWRITE"); + break; + case Readonly: + text = tr("R/O"); + break; + } + d->modeLabel->setText(text); +} + +void EditorStatusBar::updateEncodedMode(const QString &mode) +{ + d->encodedCB->setEncodeName(mode); +} diff --git a/src/plugins/codeeditor/statusbar/editorstatusbar.h b/src/plugins/codeeditor/statusbar/editorstatusbar.h new file mode 100644 index 000000000..666fb4312 --- /dev/null +++ b/src/plugins/codeeditor/statusbar/editorstatusbar.h @@ -0,0 +1,36 @@ +// SPDX-FileCopyrightText: 2024 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef EDITORSTATUSBAR_H +#define EDITORSTATUSBAR_H + +#include + +class EditorStatusBarPrivate; +class EditorStatusBar : public QWidget +{ + Q_OBJECT +public: + enum CursorMode { + Insert, + Overwrite, + Readonly + }; + + explicit EditorStatusBar(QWidget *parent = nullptr); + ~EditorStatusBar(); + +public Q_SLOTS: + void updateCursor(int line, int column); + void updateCursorMode(CursorMode mode); + void updateEncodedMode(const QString &mode); + +Q_SIGNALS: + void encodeChanged(const QString &encode); + +private: + EditorStatusBarPrivate *const d; +}; + +#endif // EDITORSTATUSBAR_H diff --git a/src/plugins/codeeditor/statusbar/encodecombobox.cpp b/src/plugins/codeeditor/statusbar/encodecombobox.cpp new file mode 100644 index 000000000..5cbaf0fdb --- /dev/null +++ b/src/plugins/codeeditor/statusbar/encodecombobox.cpp @@ -0,0 +1,204 @@ +// SPDX-FileCopyrightText: 2024 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "encodecombobox.h" +#include "utils/editorutils.h" + +#include +#include + +#include +#include + +DWIDGET_USE_NAMESPACE +DGUI_USE_NAMESPACE + +EncodeComboBox::EncodeComboBox(QWidget *parent) + : QFrame(parent) +{ + initUI(); + initMenuData(); + initConnection(); +} + +void EncodeComboBox::initUI() +{ + toolBtn = new DToolButton(this); + toolBtn->setFocusPolicy(Qt::NoFocus); + toolBtn->setToolButtonStyle(Qt::ToolButtonIconOnly); + toolBtn->setArrowType(Qt::NoArrow); + toolBtn->setFixedHeight(28); + toolBtn->installEventFilter(this); + + menu = new QMenu(this); + + QHBoxLayout *hLayout = new QHBoxLayout(this); + hLayout->setContentsMargins(0, 0, 0, 0); + hLayout->addWidget(toolBtn); +} + +void EncodeComboBox::initConnection() +{ + connect(menu, &QMenu::triggered, this, [this](QAction *action) { + if (curEncodeName != action->text()) + Q_EMIT encodeChanged(action->text()); + }); +} + +void EncodeComboBox::initMenuData() +{ + static QMap translationMap { + { "Unicode", tr("Unicode") }, + { "WesternEuropean", tr("WesternEuropean") }, + { "CentralEuropean", tr("CentralEuropean") }, + { "Baltic", tr("Baltic") }, + { "Cyrillic", tr("Cyrillic") }, + { "Arabic", tr("Arabic") }, + { "Greek", tr("Greek") }, + { "Hebrew", tr("Hebrew") }, + { "Turkish", tr("Turkish") }, + { "Thai", tr("Thai") }, + { "Celtic", tr("Celtic") }, + { "SouthEasternEuropean", tr("SouthEasternEuropean") }, + { "ChineseSimplified", tr("ChineseSimplified") }, + { "ChineseTraditional", tr("ChineseTraditional") }, + { "Japanese", tr("Japanese") }, + { "Korean", tr("Korean") }, + { "Vietnamese", tr("Vietnamese") } + }; + + auto groupEncodeVec = EditorUtils::supportEncoding(); + if (!groupEncodeVec.isEmpty()) { + int cnt = groupEncodeVec.size(); + for (int i = 0; i < cnt; i++) { + const auto &groupName = groupEncodeVec[i].first; + QMenu *groupMenu = new QMenu(translationMap.value(groupName, groupName)); + foreach (const QString &var, groupEncodeVec[i].second) { + groupMenu->addAction(var); + } + menu->addMenu(groupMenu); + } + } +} + +void EncodeComboBox::setEncodeName(const QString &encoding) +{ + if (curEncodeName == encoding) + return; + + curEncodeName = encoding; + toolBtn->setIcon(createIcon()); + + for (auto ac : menu->actions()) { + setCheckedExclusive(ac, encoding); + } +} + +QString EncodeComboBox::encodeName() const +{ + return curEncodeName; +} + +bool EncodeComboBox::eventFilter(QObject *obj, QEvent *e) +{ + if (obj == toolBtn) { + if (e->type() == QEvent::MouseButtonPress) { + QMouseEvent *mouseEvent = static_cast(e); + if (mouseEvent->button() == Qt::LeftButton) { + isPressed = true; + toolBtn->setIcon(createIcon()); + return true; + } else if (mouseEvent->button() == Qt::RightButton) + return true; + } else if (e->type() == QEvent::MouseButtonRelease) { + QMouseEvent *mouseEvent = static_cast(e); + if (mouseEvent->button() == Qt::LeftButton) { + isPressed = false; + toolBtn->setIcon(createIcon()); + showContextMenu(); + } + } + } + + return QFrame::eventFilter(obj, e); +} + +QIcon EncodeComboBox::createIcon() +{ + DPalette dpalette = DPaletteHelper::instance()->palette(toolBtn); + QColor textColor; + QPixmap arrowPixmap = QCommonStyle().standardPixmap(QStyle::SP_ArrowDown); + if (isPressed) { + textColor = dpalette.color(DPalette::Highlight); + QPainter arrowPainter(&arrowPixmap); + arrowPainter.setCompositionMode(QPainter::CompositionMode_SourceIn); + arrowPainter.fillRect(arrowPixmap.rect(), dpalette.color(DPalette::Highlight)); + arrowPainter.end(); + } else { + textColor = dpalette.color(DPalette::WindowText); + } + + QFontMetrics metrics(font()); + int fontWidth = metrics.width(curEncodeName) + 20; + int fontHeight = metrics.size(Qt::TextSingleLine, curEncodeName).height(); + int iconW = 8; + int iconH = 5; + + int totalWidth = fontWidth + iconW + 10; + toolBtn->setFixedSize(totalWidth, 28); + toolBtn->setIconSize(QSize(totalWidth, 28)); + + qreal rate = this->devicePixelRatioF(); + QPixmap icon(QSize(totalWidth, 28) * rate); + icon.setDevicePixelRatio(rate); + icon.fill(Qt::transparent); + + QPainter painter(&icon); + painter.setRenderHint(QPainter::Antialiasing, true); + painter.setRenderHints(QPainter::SmoothPixmapTransform); + + painter.save(); + painter.setFont(font()); + painter.setPen(textColor); + painter.drawText(QRectF(10, (28 - fontHeight) / 2, fontWidth, fontHeight), curEncodeName); + painter.restore(); + painter.drawPixmap(QRectF(fontWidth, (28 - iconH) / 2, iconW, iconH), arrowPixmap, arrowPixmap.rect()); + + painter.end(); + return icon; +} + +void EncodeComboBox::showContextMenu() +{ + QPoint center = this->mapToGlobal(this->rect().center()); + int menuHeight = menu->sizeHint().height(); + int menuWidth = menu->sizeHint().width(); + center.setY(center.y() - menuHeight - this->rect().height() / 2); + center.setX(center.x() - menuWidth / 2); + menu->move(center); + menu->exec(); + + QEvent event(QEvent::HoverLeave); + QApplication::sendEvent(toolBtn, &event); +} + +void EncodeComboBox::setCheckedExclusive(QAction *action, const QString &name) +{ + if (!action) + return; + + if (action->menu()) { + for (auto ac : action->menu()->actions()) { + setCheckedExclusive(ac, name); + } + } else { + if (action->text() != name) { + action->setCheckable(false); + action->setChecked(false); + } else { + action->setCheckable(true); + action->setChecked(true); + } + } +} diff --git a/src/plugins/codeeditor/statusbar/encodecombobox.h b/src/plugins/codeeditor/statusbar/encodecombobox.h new file mode 100644 index 000000000..c62e4b843 --- /dev/null +++ b/src/plugins/codeeditor/statusbar/encodecombobox.h @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: 2024 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ENCODECOMBOBOX_H +#define ENCODECOMBOBOX_H + +#include + +#include +#include + +class EncodeComboBox : public QFrame +{ + Q_OBJECT +public: + explicit EncodeComboBox(QWidget *parent = nullptr); + + void setEncodeName(const QString &encoding); + QString encodeName() const; + +Q_SIGNALS: + void encodeChanged(const QString &encode); + +protected: + bool eventFilter(QObject *obj, QEvent *e) override; + +private: + void initUI(); + void initConnection(); + void initMenuData(); + QIcon createIcon(); + void showContextMenu(); + void setCheckedExclusive(QAction *action, const QString &name); + +private: + DTK_WIDGET_NAMESPACE::DToolButton *toolBtn { nullptr }; + QMenu *menu { nullptr }; + + QString curEncodeName; + bool isPressed { false }; +}; + +#endif // ENCODECOMBOBOX_H diff --git a/src/plugins/codeeditor/statusbar/statusinfomanager.cpp b/src/plugins/codeeditor/statusbar/statusinfomanager.cpp new file mode 100644 index 000000000..7f0d3afa7 --- /dev/null +++ b/src/plugins/codeeditor/statusbar/statusinfomanager.cpp @@ -0,0 +1,134 @@ +// SPDX-FileCopyrightText: 2024 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "statusinfomanager.h" +#include "editorstatusbar.h" +#include "gui/texteditor.h" + +#include "services/window/windowservice.h" + +#include + +using namespace dpfservice; + +class StatusInfoManagerPrivate : public QObject +{ +public: + void init(WindowService *winSrv); + void initConnect(); + + void resetEditor(TextEditor *edit); + void updateStatusBar(); + void handleFocusChanged(QWidget *old, QWidget *now); + void handleEncodeChanged(const QString &encode); + + void updateEditorCursorMode(); + void updateEditorEncodedMode(); + +public: + TextEditor *currentEditor { nullptr }; + EditorStatusBar *statusBar { nullptr }; +}; + +void StatusInfoManagerPrivate::init(WindowService *winSrv) +{ + statusBar = new EditorStatusBar(); + statusBar->setVisible(false); + winSrv->addStatusBarItem(statusBar); +} + +void StatusInfoManagerPrivate::initConnect() +{ + connect(qApp, &QApplication::focusChanged, this, &StatusInfoManagerPrivate::handleFocusChanged); + connect(statusBar, &EditorStatusBar::encodeChanged, this, &StatusInfoManagerPrivate::handleEncodeChanged); +} + +void StatusInfoManagerPrivate::resetEditor(TextEditor *edit) +{ + if (currentEditor == edit) + return; + + if (currentEditor) { + currentEditor->disconnect(this); + currentEditor->disconnect(statusBar); + } + + currentEditor = edit; + statusBar->setVisible(true); + connect(edit, &TextEditor::destroyed, this, [this] { + statusBar->setVisible(false); + currentEditor = nullptr; + }); + connect(currentEditor, &TextEditor::cursorPositionChanged, statusBar, &EditorStatusBar::updateCursor); + connect(currentEditor, &TextEditor::cursorModeChanged, this, &StatusInfoManagerPrivate::updateEditorCursorMode); + updateStatusBar(); +} + +void StatusInfoManagerPrivate::updateStatusBar() +{ + int line = 0, col = 0; + currentEditor->getCursorPosition(&line, &col); + statusBar->updateCursor(line, col); + + updateEditorCursorMode(); + updateEditorEncodedMode(); +} + +void StatusInfoManagerPrivate::handleFocusChanged(QWidget *old, QWidget *now) +{ + Q_UNUSED(old) + + auto edit = qobject_cast(now); + if (!edit) + return; + + resetEditor(edit); +} + +void StatusInfoManagerPrivate::handleEncodeChanged(const QString &encode) +{ + if (!currentEditor) + return; + + currentEditor->reload(encode); + updateEditorEncodedMode(); +} + +void StatusInfoManagerPrivate::updateEditorCursorMode() +{ + if (currentEditor->isReadOnly()) { + statusBar->updateCursorMode(EditorStatusBar::Readonly); + } else { + bool overwrite = currentEditor->overwriteMode(); + statusBar->updateCursorMode(overwrite ? EditorStatusBar::Overwrite : EditorStatusBar::Insert); + } +} + +void StatusInfoManagerPrivate::updateEditorEncodedMode() +{ + statusBar->updateEncodedMode(currentEditor->documentEncode()); +} + +StatusInfoManager::StatusInfoManager(QObject *parent) + : QObject(parent), + d(new StatusInfoManagerPrivate) +{ +} + +StatusInfoManager::~StatusInfoManager() +{ + delete d; +} + +StatusInfoManager *StatusInfoManager::instance() +{ + static StatusInfoManager ins; + return &ins; +} + +void StatusInfoManager::init(WindowService *winSrv) +{ + d->init(winSrv); + d->initConnect(); +} diff --git a/src/plugins/codeeditor/status/statusinfomanager.h b/src/plugins/codeeditor/statusbar/statusinfomanager.h similarity index 100% rename from src/plugins/codeeditor/status/statusinfomanager.h rename to src/plugins/codeeditor/statusbar/statusinfomanager.h diff --git a/src/plugins/codeeditor/utils/editorutils.cpp b/src/plugins/codeeditor/utils/editorutils.cpp index bc27699d3..225a2c206 100644 --- a/src/plugins/codeeditor/utils/editorutils.cpp +++ b/src/plugins/codeeditor/utils/editorutils.cpp @@ -6,6 +6,9 @@ #include "common/actionmanager/actionmanager.h" +#include +#include + int EditorUtils::nbDigitsFromNbLines(long nbLines) { int nbDigits = 0; // minimum number of digit should be 4 @@ -40,3 +43,27 @@ Command *EditorUtils::registerShortcut(QAction *act, const QString &id, const QK cmd->setDefaultKeySequence(shortCut); return cmd; } + +QVector> EditorUtils::supportEncoding() +{ + static QVector> groupEncodeVec; + if (groupEncodeVec.isEmpty()) { + QFile file(":/encodes/encodes.ini"); + QString data; + if (file.open(QIODevice::ReadOnly)) { + data = QString::fromUtf8(file.readAll()); + file.close(); + } + + QTextStream readStream(&data, QIODevice::ReadOnly); + while (!readStream.atEnd()) { + QString group = readStream.readLine(); + QString key = group.mid(1, group.length() - 2); + QString encodes = readStream.readLine(); + QString value = encodes.mid(8, encodes.length() - 2); + groupEncodeVec.append(QPair(key, value.split(","))); + } + } + + return groupEncodeVec; +} diff --git a/src/plugins/codeeditor/utils/editorutils.h b/src/plugins/codeeditor/utils/editorutils.h index 009d3704e..9e9019b8f 100644 --- a/src/plugins/codeeditor/utils/editorutils.h +++ b/src/plugins/codeeditor/utils/editorutils.h @@ -14,6 +14,7 @@ class EditorUtils : public QObject public: static int nbDigitsFromNbLines(long nbLines); static Command *registerShortcut(QAction *act, const QString &id, const QKeySequence &shortCut); + static QVector> supportEncoding(); }; #endif // EDITORUTILS_H