From c6ba9ca0749cb1dda5120b2b0e2ed6272a5177e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=97=AB=E8=8C=82=E6=BA=90?= <yanmaoyuan@baijiayun.com>
Date: Sat, 1 Jun 2024 18:21:50 +0800
Subject: [PATCH] feat: clean code

---
 .../io/github/jmecn/text/EmojiIterator.java   |   2 +-
 .../java/io/github/jmecn/text/EmojiRun.java   |  62 ++++
 .../jmecn/font/shaping/GMarkParser.java       |  58 ----
 .../jmecn/font/shaping/TestCharDetect.java    | 264 ------------------
 .../github/jmecn/font/shaping/TestEmoji.java  |  67 -----
 .../jmecn/font/shaping/TestEmojiIter.java     |  48 ----
 .../github/jmecn/font/shaping/TextSpan.java   |  13 -
 .../io/github/jmecn/text/TestBidiRun.java     | 128 +++++++++
 .../github/jmecn/text/TestEmojiIterator.java  | 163 +++++++++++
 .../github/jmecn/text/TestRichTextToSpan.java | 129 +++++++++
 10 files changed, 483 insertions(+), 451 deletions(-)
 create mode 100644 lib/src/main/java/io/github/jmecn/text/EmojiRun.java
 delete mode 100644 lib/src/test/java/io/github/jmecn/font/shaping/GMarkParser.java
 delete mode 100644 lib/src/test/java/io/github/jmecn/font/shaping/TestCharDetect.java
 delete mode 100644 lib/src/test/java/io/github/jmecn/font/shaping/TestEmoji.java
 delete mode 100644 lib/src/test/java/io/github/jmecn/font/shaping/TestEmojiIter.java
 delete mode 100644 lib/src/test/java/io/github/jmecn/font/shaping/TextSpan.java
 create mode 100644 lib/src/test/java/io/github/jmecn/text/TestBidiRun.java
 create mode 100644 lib/src/test/java/io/github/jmecn/text/TestEmojiIterator.java
 create mode 100644 lib/src/test/java/io/github/jmecn/text/TestRichTextToSpan.java

diff --git a/lib/src/main/java/io/github/jmecn/text/EmojiIterator.java b/lib/src/main/java/io/github/jmecn/text/EmojiIterator.java
index 4d1fe17..50af78e 100644
--- a/lib/src/main/java/io/github/jmecn/text/EmojiIterator.java
+++ b/lib/src/main/java/io/github/jmecn/text/EmojiIterator.java
@@ -79,7 +79,7 @@ public int getTextEnd() {
     }
 
     public boolean next() {
-        if (this.end >= this.nChars - 1) {
+        if (this.end >= this.nChars) {
             return false;
         }
 
diff --git a/lib/src/main/java/io/github/jmecn/text/EmojiRun.java b/lib/src/main/java/io/github/jmecn/text/EmojiRun.java
new file mode 100644
index 0000000..f4df0ff
--- /dev/null
+++ b/lib/src/main/java/io/github/jmecn/text/EmojiRun.java
@@ -0,0 +1,62 @@
+package io.github.jmecn.text;
+
+import java.util.Objects;
+
+/**
+ * desc:
+ *
+ * @author yanmaoyuan
+ */
+public class EmojiRun {
+
+    private final boolean isEmoji;
+    private final int unicodeStart;
+    private final int unicodeEnd;
+    private final int textStart;
+    private final int textEnd;
+
+    EmojiRun(boolean isEmoji, int unicodeStart, int unicodeEnd, int textStart, int textEnd) {
+        this.isEmoji = isEmoji;
+        this.unicodeStart = unicodeStart;
+        this.unicodeEnd = unicodeEnd;
+        this.textStart = textStart;
+        this.textEnd = textEnd;
+    }
+
+    public boolean isEmoji() {
+        return isEmoji;
+    }
+
+    public int getUnicodeStart() {
+        return unicodeStart;
+    }
+
+    public int getUnicodeEnd() {
+        return unicodeEnd;
+    }
+
+    public int getTextStart() {
+        return textStart;
+    }
+
+    public int getTextEnd() {
+        return textEnd;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) {
+            return true;
+        }
+        if (!(o instanceof EmojiRun)) {
+            return false;
+        }
+        EmojiRun that = (EmojiRun) o;
+        return isEmoji == that.isEmoji && unicodeStart == that.unicodeStart && unicodeEnd == that.unicodeEnd && textStart == that.textStart && textEnd == that.textEnd;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(isEmoji, unicodeStart, unicodeEnd, textStart, textEnd);
+    }
+}
diff --git a/lib/src/test/java/io/github/jmecn/font/shaping/GMarkParser.java b/lib/src/test/java/io/github/jmecn/font/shaping/GMarkParser.java
deleted file mode 100644
index 080ac6b..0000000
--- a/lib/src/test/java/io/github/jmecn/font/shaping/GMarkParser.java
+++ /dev/null
@@ -1,58 +0,0 @@
-package io.github.jmecn.font.shaping;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-class GMarkTag {
-    private String name;
-    private String attributes;
-    private String content;
-
-    public GMarkTag(String name, String attributes, String content) {
-        this.name = name;
-        this.attributes = attributes;
-        this.content = content;
-    }
-
-    public String getName() {
-        return name;
-    }
-
-    public String getAttributes() {
-        return attributes;
-    }
-
-    public String getContent() {
-        return content;
-    }
-}
-
-public class GMarkParser {
-    private static final Pattern tagPattern = Pattern.compile("<(\\w+)(.*?)>(.*?)</\\1>");
-
-    public static List<GMarkTag> extractTags(String gmarkText) {
-        List<GMarkTag> tags = new ArrayList<>();
-        Matcher matcher = tagPattern.matcher(gmarkText);
-        while (matcher.find()) {
-            String tagName = matcher.group(1);
-            String attributes = matcher.group(2);
-            String content = matcher.group(3);
-            GMarkTag tag = new GMarkTag(tagName, attributes, content);
-            tags.add(tag);
-        }
-        return tags;
-    }
-
-    public static void main(String[] args) {
-        String gmarkText = "这是一个美丽的新世界。<i><b>Hello world</b></i>我希望大家<font>永远开心</font>";
-        List<GMarkTag> tags = extractTags(gmarkText);
-        for (GMarkTag tag : tags) {
-            System.out.println("Tag: " + tag.getName());
-            System.out.println("Attributes: " + tag.getAttributes());
-            System.out.println("Content: " + tag.getContent());
-            System.out.println();
-        }
-    }
-}
\ No newline at end of file
diff --git a/lib/src/test/java/io/github/jmecn/font/shaping/TestCharDetect.java b/lib/src/test/java/io/github/jmecn/font/shaping/TestCharDetect.java
deleted file mode 100644
index 5e5bd73..0000000
--- a/lib/src/test/java/io/github/jmecn/font/shaping/TestCharDetect.java
+++ /dev/null
@@ -1,264 +0,0 @@
-package io.github.jmecn.font.shaping;
-
-import com.ibm.icu.lang.UScript;
-import com.ibm.icu.lang.UScriptRun;
-import org.junit.jupiter.api.Test;
-
-import java.text.Bidi;
-import java.text.BreakIterator;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * desc:
- *
- * @author yanmaoyuan
- * @date 2024/5/21
- */
-public class TestCharDetect {
-    static final String TEXT = "Love and peace." +// latin
-            "爱与和平。世界是我们的，也是你们的。" +// Han
-            "الحب 123والسلام" + // Arabic
-            "사랑과 평화" + // Hangul
-            "👋🤔️" // emoji
-            ;
-
-    @Test
-    void testPropertyDetect() {
-        for (int i = 0; i < TEXT.length(); i++) {
-            int codepoint = Character.codePointAt(TEXT, i);
-            byte dir = Character.getDirectionality(codepoint);
-            Character.UnicodeScript script = Character.UnicodeScript.of(codepoint);
-
-            System.out.printf("[U+%04X] %s %s %s, %s\n", codepoint, Character.getName(codepoint), Character.getType(codepoint), dir, script);
-        }
-    }
-
-    @Test void testLineBreak() {
-        BreakIterator iterator = BreakIterator.getLineInstance();
-        iterator.setText(TEXT);
-        // 迭代并分割文本
-        int start = iterator.first();
-        int end;
-        while ((end = iterator.next()) != BreakIterator.DONE) {
-            String line = TEXT.substring(start, end);
-            System.out.println(line);
-            start = end;
-        }
-    }
-    @Test void testSimpleBidi() {
-        List<BidiRun> bidiRuns = extractBidiRuns(TEXT);
-
-        // 输出每个 BidiRun 的文本和方向性
-        for (BidiRun bidiRun : bidiRuns) {
-            System.out.printf("Directionality: %d %s\n", bidiRun.getDirectionality(), bidiRun.getText());
-        }
-    }
-
-    @Test void testUScriptRun() {
-        UScriptRun run = new UScriptRun(TEXT);
-        while (run.next()) {
-            int start = run.getScriptStart();
-            int limit = run.getScriptLimit();
-            int script = run.getScriptCode();
-            System.out.printf("Script %s from %d to %d\n", UScript.getName(script), start, limit);
-        }
-    }
-
-    @Test void testBidi() {
-        Bidi bidi = new Bidi(TEXT, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);
-        System.out.printf("isMixed:%b, runCount:%d\n", bidi.isMixed(), bidi.getRunCount());
-
-        for (int i = 0; i < bidi.getRunCount(); i++) {
-            int start = bidi.getRunStart(i);
-            int limit = bidi.getRunLimit(i);
-            System.out.printf("start=%d, limit=%d, level=%d, %s\n", start, limit, bidi.getRunLevel(i), TEXT.substring(start, limit));// 0-left_to_right, 1-right_to_left
-        }
-    }
-
-    // 将字符串分解为多个 BidiRun
-    private static List<BidiRun> extractBidiRuns(String text) {
-        List<BidiRun> bidiRuns = new ArrayList<>();
-        StringBuilder runText = new StringBuilder();
-        byte currentDirectionality = -1; // 初始方向性为 -1，表示未知
-
-        // 遍历字符串中的每个字符
-        for (int i = 0; i < text.length(); i++) {
-            char c = text.charAt(i);
-            byte directionality = Character.getDirectionality(c);
-
-            // 如果当前字符的方向性与前一个字符不同，或者当前字符是控制字符，则结束当前 Run，并添加到列表中
-            if (directionality != currentDirectionality || Character.isMirrored(c)) {
-                if (runText.length() > 0) {
-                    bidiRuns.add(new BidiRun(runText.toString(), currentDirectionality));
-                    runText.setLength(0);
-                }
-                currentDirectionality = directionality;
-            }
-
-            // 将当前字符添加到当前 Run 中
-            runText.append(c);
-        }
-
-        // 添加最后一个 Run
-        if (runText.length() > 0) {
-            bidiRuns.add(new BidiRun(runText.toString(), currentDirectionality));
-        }
-
-        return bidiRuns;
-    }
-
-    // 表示一个 Bidi Run 的类
-    static class BidiRun {
-        private final String text;
-        private final byte directionality;
-
-        public BidiRun(String text, byte directionality) {
-            this.text = text;
-            this.directionality = directionality;
-        }
-
-        public String getText() {
-            return text;
-        }
-
-        public byte getDirectionality() {
-            return directionality;
-        }
-    }
-
-    private static final Pattern tagPattern = Pattern.compile("<(b|i|u|color|span|style)(.*?)>(.*?)</\\1>");
-
-    public static void parse(String gmarkText) {
-        Matcher matcher = tagPattern.matcher(gmarkText);
-        while (matcher.find()) {
-            String tag = matcher.group(1);
-            String attributes = matcher.group(2);
-            String content = matcher.group(3);
-            System.out.println("Tag: " + tag);
-            if (!attributes.isEmpty()) {
-                System.out.println("Attributes: " + attributes);
-            }
-            System.out.println("Content: " + content);
-        }
-    }
-
-    @Test void testParseMarker() {
-        String gmarkText = "<b>Hello</b> <i>world</i> <color id=\"#FFCCDD\">!</color>";
-        parse(gmarkText);
-    }
-
-    public static List<TextSpan> extractTagContents(TextSpan parentSpan) {
-        List<TextSpan> contents = new ArrayList<>();
-        Matcher matcher = tagPattern.matcher(parentSpan.text);
-        int lastEnd = 0;
-        while (matcher.find()) {
-            // 添加标签之前的文本部分
-            String beforeTag = parentSpan.text.substring(lastEnd, matcher.start());
-            if (!beforeTag.isEmpty()) {
-                contents.add(new TextSpan(beforeTag, parentSpan.attributes));
-            }
-            String tag = matcher.group(1);
-            String attributes = matcher.group(2);
-            // 添加标签内的内容
-            String content = matcher.group(3);
-
-            List<String> attrList = new ArrayList<>();
-            if (parentSpan.attributes != null) {
-                // 外层优先级低，放在前面
-                attrList.addAll(parentSpan.attributes);
-            }
-            // 内层优先级高，放在后面。
-            attrList.add(tag + ":" + attributes);
-            if (!content.isEmpty()) {
-                TextSpan span = new TextSpan(content, attrList);
-                if (tagPattern.matcher(content).find()) {
-                    List<TextSpan> spans = extractTagContents(span);
-                    contents.addAll(spans);
-                } else {
-                    contents.add(span);
-                }
-            }
-            // 更新上一个标签结束的位置
-            lastEnd = matcher.end();
-        }
-        // 添加剩余的文本部分
-        String remainder = parentSpan.text.substring(lastEnd);
-        if (!remainder.isEmpty()) {
-            contents.add(new TextSpan(remainder, parentSpan.attributes));
-        }
-        return contents;
-    }
-
-    public static List<TextSpan> extractTagContents(String gmarkText) {
-        List<TextSpan> contents = new ArrayList<>();
-        Matcher matcher = tagPattern.matcher(gmarkText);
-        int lastEnd = 0;
-        while (matcher.find()) {
-            // 添加标签之前的文本部分
-            String beforeTag = gmarkText.substring(lastEnd, matcher.start());
-            if (!beforeTag.isEmpty()) {
-                contents.add(new TextSpan(beforeTag, null));
-            }
-            String tag = matcher.group(1);
-            String attributes = matcher.group(2);
-            // 添加标签内的内容
-            String content = matcher.group(3);
-
-            List<String> attrList = new ArrayList<>();
-            attrList.add(tag + ":" + attributes);
-            if (!content.isEmpty()) {
-                TextSpan span = new TextSpan(content, attrList);
-                if (tagPattern.matcher(content).find()) {
-                    List<TextSpan> spans = extractTagContents(span);
-                    contents.addAll(spans);
-                } else {
-                    contents.add(span);
-                }
-            }
-            // 更新上一个标签结束的位置
-            lastEnd = matcher.end();
-        }
-        // 添加剩余的文本部分
-        String remainder = gmarkText.substring(lastEnd);
-        if (!remainder.isEmpty()) {
-            contents.add(new TextSpan(remainder, null));
-        }
-        return contents;
-    }
-
-    @Test void parseTag() {
-        String gmarkText = "This is a nice place. <style id='my-style'><i><b>Hello</b>,<span color='#FF0000'>world</span></i>I hope <u>you</u> happy here.</style>";
-        List<TextSpan> extractedContents = extractTagContents(gmarkText);
-        for (TextSpan content : extractedContents) {
-            System.out.println(content);
-        }
-    }
-
-    static class TextSpan {
-        String text;
-        List<String> attributes;
-
-        public TextSpan(String text, List<String> attributes) {
-            this.text = text;
-            this.attributes = attributes;
-        }
-
-        public void add(List<String> attributes) {
-            if (this.attributes == null) {
-                this.attributes = new ArrayList<>();
-            }
-            this.attributes.addAll(attributes);
-        }
-        @Override
-        public String toString() {
-            return "TextSpan{" +
-                    "text='" + text + '\'' +
-                    ", attributes=" + attributes +
-                    '}';
-        }
-    }
-}
diff --git a/lib/src/test/java/io/github/jmecn/font/shaping/TestEmoji.java b/lib/src/test/java/io/github/jmecn/font/shaping/TestEmoji.java
deleted file mode 100644
index b8fb71d..0000000
--- a/lib/src/test/java/io/github/jmecn/font/shaping/TestEmoji.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package io.github.jmecn.font.shaping;
-
-import org.junit.jupiter.api.Test;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-/**
- * desc:
- *
- * @author yanmaoyuan
- */
-class TestEmoji {
-
-    @Test void charDetect() {
-        String text = "Hello😊";
-
-        // the string looks like only have 6 chars, but emoji is a surrogate pair, so the length is 7
-        assertFalse(6 == text.length());
-        assertTrue(7 == text.length());
-
-        // the 6th and 7th char is a surrogate pair
-        assertTrue(Character.isHighSurrogate(text.charAt(5)));
-        assertTrue(Character.isLowSurrogate(text.charAt(6)));
-
-        // the codepoint is not equal to the char
-        assertEquals(0xD83D, text.charAt(5));
-        assertEquals(0x1F60A, Character.codePointAt(text, 5));
-
-        // 0xDE0A is a control character, it is not a high surrogate
-        assertEquals(0xDE0A, text.charAt(6));
-        assertEquals(0xDE0A, Character.codePointAt(text, 6));
-
-        // print all chars
-        for (int i = 0; i < text.length(); i++) {
-            char c = text.charAt(i);
-            int codepoint = Character.codePointAt(text, i);
-            System.out.printf("char=%c, charAt=0x%X, codepoint=0x%X isHighSurrogate=%b, isLowSurrogate=%b\n", c, (int)c, codepoint, Character.isHighSurrogate(c), Character.isLowSurrogate(c));
-        }
-    }
-
-    @Test void testFitzpatrickModifier() {
-        String text = "\uD83E\uDDD1\uD83E\uDDD1\uD83C\uDFFB\uD83E\uDDD1\uD83C\uDFFC\uD83E\uDDD1\uD83C\uDFFD\uD83E\uDDD1\uD83C\uDFFE\uD83E\uDDD1\uD83C\uDFFF";
-        System.out.println(text);
-        System.out.println("\uD83C\uDFFB");
-        System.out.println("\uD83C\uDFFC");
-        System.out.println("\uD83C\uDFFD");
-        System.out.println("\uD83C\uDFFE");
-        System.out.println("\uD83C\uDFFF");
-
-        // print all chars
-        for (int i = 0; i < text.length(); i++) {
-            char c = text.charAt(i);
-            int codepoint = Character.codePointAt(text, i);
-            System.out.printf("char=%c, charAt=0x%X, codepoint=0x%X isHighSurrogate=%b, isLowSurrogate=%b\n", c, (int)c, codepoint, Character.isHighSurrogate(c), Character.isLowSurrogate(c));
-        }
-    }
-
-    @Test void testEmojiZwj() {
-        String name = "👨‍👩‍👧‍👦";
-        assertEquals(11, name.length());
-        assertEquals("\uD83D\uDC68\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66", name);
-        System.out.println("\uD83D\uDC68");
-        System.out.println("\uD83D\uDC69");
-        System.out.println("\uD83D\uDC67");
-        System.out.println("\uD83D\uDC66");
-    }
-}
diff --git a/lib/src/test/java/io/github/jmecn/font/shaping/TestEmojiIter.java b/lib/src/test/java/io/github/jmecn/font/shaping/TestEmojiIter.java
deleted file mode 100644
index ecaf291..0000000
--- a/lib/src/test/java/io/github/jmecn/font/shaping/TestEmojiIter.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package io.github.jmecn.font.shaping;
-
-import io.github.jmecn.text.EmojiIterator;
-import io.github.jmecn.text.Unichar;
-import org.junit.jupiter.api.Test;
-
-/**
- * desc:
- *
- * @author yanmaoyuan
- */
-class TestEmojiIter {
-
-    void process(String text) {
-        char[] chars = text.toCharArray();
-        EmojiIterator iter = new EmojiIterator(chars);
-
-        Unichar[] unichars = iter.getUnicodeChars();
-        for (Unichar unichar : unichars) {
-            System.out.println(unichar);
-        }
-        System.out.println("Unicode count:" + unichars.length);
-        System.out.println(text);
-        System.out.println("Character count:" + chars.length);
-        while (iter.next()) {
-            int start = iter.getStart();
-            int end = iter.getEnd();
-            int ts = iter.getTextStart();
-            int te = iter.getTextEnd();
-            String substr = text.substring(ts, te);
-            System.out.printf("isEmoji:%b, unicode:[%d, %d), text:[%d, %s), %s\n", iter.isEmoji(), start, end, ts, te, substr);
-        }
-    }
-    @Test void testSentence() {
-        String text = "Hello" + "🙋🧑🧑🏻🧑🏼🧑🏽🧑🏾🧑🏿" + "world" + "🍰🐒" + "一家人" + "👨‍👩‍👧‍👦";
-        process(text);
-    }
-
-    @Test void testZwjSequenceWithText() {
-        String text = "我" + "👨‍👩‍👧‍👦";
-        process(text);
-    }
-
-    @Test void testZwjSequence() {
-        String text = "👨‍👩‍👧‍👦";
-        process(text);
-    }
-}
diff --git a/lib/src/test/java/io/github/jmecn/font/shaping/TextSpan.java b/lib/src/test/java/io/github/jmecn/font/shaping/TextSpan.java
deleted file mode 100644
index 62aea69..0000000
--- a/lib/src/test/java/io/github/jmecn/font/shaping/TextSpan.java
+++ /dev/null
@@ -1,13 +0,0 @@
-package io.github.jmecn.font.shaping;
-
-import java.util.List;
-
-/**
- * desc:
- *
- * @author yanmaoyuan
- */
-public class TextSpan {
-    private String text;
-    private List<String> attributes;
-}
\ No newline at end of file
diff --git a/lib/src/test/java/io/github/jmecn/text/TestBidiRun.java b/lib/src/test/java/io/github/jmecn/text/TestBidiRun.java
new file mode 100644
index 0000000..0b872a9
--- /dev/null
+++ b/lib/src/test/java/io/github/jmecn/text/TestBidiRun.java
@@ -0,0 +1,128 @@
+package io.github.jmecn.text;
+
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.lang.UScriptRun;
+import org.junit.jupiter.api.Test;
+
+import java.text.Bidi;
+import java.text.BreakIterator;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * desc:
+ *
+ * @author yanmaoyuan
+ */
+class TestBidiRun {
+    static final String TEXT = "Love and peace." +// latin
+            "爱与和平。世界是我们的，也是你们的。" +// Han
+            "الحب 123والسلام" + // Arabic
+            "사랑과 평화" + // Hangul
+            "👋🤔️" // emoji
+            ;
+
+    @Test
+    void testPropertyDetect() {
+        for (int i = 0; i < TEXT.length(); i++) {
+            int codepoint = Character.codePointAt(TEXT, i);
+            byte dir = Character.getDirectionality(codepoint);
+            Character.UnicodeScript script = Character.UnicodeScript.of(codepoint);
+
+            System.out.printf("[U+%04X] %s %s %s, %s\n", codepoint, Character.getName(codepoint), Character.getType(codepoint), dir, script);
+        }
+    }
+
+    @Test void testLineBreak() {
+        BreakIterator iterator = BreakIterator.getLineInstance();
+        iterator.setText(TEXT);
+        // 迭代并分割文本
+        int start = iterator.first();
+        int end;
+        while ((end = iterator.next()) != BreakIterator.DONE) {
+            String line = TEXT.substring(start, end);
+            System.out.println(line);
+            start = end;
+        }
+    }
+    @Test void testSimpleBidi() {
+        List<BidiRun> bidiRuns = extractBidiRuns(TEXT);
+
+        // 输出每个 BidiRun 的文本和方向性
+        for (BidiRun bidiRun : bidiRuns) {
+            System.out.printf("Directionality: %d %s\n", bidiRun.getDirectionality(), bidiRun.getText());
+        }
+    }
+
+    @Test void testUScriptRun() {
+        UScriptRun run = new UScriptRun(TEXT);
+        while (run.next()) {
+            int start = run.getScriptStart();
+            int limit = run.getScriptLimit();
+            int script = run.getScriptCode();
+            System.out.printf("Script %s from %d to %d\n", UScript.getName(script), start, limit);
+        }
+    }
+
+    @Test void testBidi() {
+        Bidi bidi = new Bidi(TEXT, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);
+        System.out.printf("isMixed:%b, runCount:%d\n", bidi.isMixed(), bidi.getRunCount());
+
+        for (int i = 0; i < bidi.getRunCount(); i++) {
+            int start = bidi.getRunStart(i);
+            int limit = bidi.getRunLimit(i);
+            System.out.printf("start=%d, limit=%d, level=%d, %s\n", start, limit, bidi.getRunLevel(i), TEXT.substring(start, limit));// 0-left_to_right, 1-right_to_left
+        }
+    }
+
+    // 将字符串分解为多个 BidiRun
+    private static List<BidiRun> extractBidiRuns(String text) {
+        List<BidiRun> bidiRuns = new ArrayList<>();
+        StringBuilder runText = new StringBuilder();
+        byte currentDirectionality = -1; // 初始方向性为 -1，表示未知
+
+        // 遍历字符串中的每个字符
+        for (int i = 0; i < text.length(); i++) {
+            char c = text.charAt(i);
+            byte directionality = Character.getDirectionality(c);
+
+            // 如果当前字符的方向性与前一个字符不同，或者当前字符是控制字符，则结束当前 Run，并添加到列表中
+            if (directionality != currentDirectionality || Character.isMirrored(c)) {
+                if (runText.length() > 0) {
+                    bidiRuns.add(new BidiRun(runText.toString(), currentDirectionality));
+                    runText.setLength(0);
+                }
+                currentDirectionality = directionality;
+            }
+
+            // 将当前字符添加到当前 Run 中
+            runText.append(c);
+        }
+
+        // 添加最后一个 Run
+        if (runText.length() > 0) {
+            bidiRuns.add(new BidiRun(runText.toString(), currentDirectionality));
+        }
+
+        return bidiRuns;
+    }
+
+    // 表示一个 Bidi Run 的类
+    static class BidiRun {
+        private final String text;
+        private final byte directionality;
+
+        public BidiRun(String text, byte directionality) {
+            this.text = text;
+            this.directionality = directionality;
+        }
+
+        public String getText() {
+            return text;
+        }
+
+        public byte getDirectionality() {
+            return directionality;
+        }
+    }
+}
diff --git a/lib/src/test/java/io/github/jmecn/text/TestEmojiIterator.java b/lib/src/test/java/io/github/jmecn/text/TestEmojiIterator.java
new file mode 100644
index 0000000..76dea98
--- /dev/null
+++ b/lib/src/test/java/io/github/jmecn/text/TestEmojiIterator.java
@@ -0,0 +1,163 @@
+package io.github.jmecn.text;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * desc:
+ *
+ * @author yanmaoyuan
+ */
+class TestEmojiIterator {
+
+    void test(String text, EmojiRun[] expectedList) {
+        display(text);
+
+        List<EmojiRun> actualList = new ArrayList<>();
+        EmojiIterator iterator = new EmojiIterator(text.toCharArray());
+        while (iterator.next()) {
+            actualList.add(new EmojiRun(iterator.isEmoji(), iterator.getStart(), iterator.getEnd(), iterator.getTextStart(), iterator.getTextEnd()));
+        }
+
+        assertEquals(expectedList.length, actualList.size(), "size:" + text);
+        int size = expectedList.length;
+        for (int i = 0; i < size; i++) {
+            EmojiRun expected = expectedList[i];
+            EmojiRun actual = actualList.get(i);
+            assertEquals(expected.isEmoji(), actual.isEmoji(), "isEmoji:" + text);
+            assertEquals(expected.getUnicodeStart(), actual.getUnicodeStart(), "unicodeStart:" + text);
+            assertEquals(expected.getUnicodeEnd(), actual.getUnicodeEnd(), "unicodeEnd:" + text);
+            assertEquals(expected.getTextStart(), actual.getTextStart(), "textStart:" + text);
+            assertEquals(expected.getTextEnd(), actual.getTextEnd(), "textEnd:" + text);
+        }
+    }
+
+    void display(String text) {
+        char[] chars = text.toCharArray();
+        EmojiIterator iter = new EmojiIterator(chars);
+        Unichar[] unichars = iter.getUnicodeChars();
+        System.out.printf(">>>> %s <<<<\nunicode count:%d, character count:%d\n", text, unichars.length, chars.length);
+        System.out.println("[id]:  unicode,   string, emoji, text");
+        int runs = 0;
+
+        while (iter.next()) {
+            int start = iter.getStart();
+            int end = iter.getEnd();
+            int ts = iter.getTextStart();
+            int te = iter.getTextEnd();
+            String substr = text.substring(ts, te);
+            System.out.printf("[%2d]: [%2d, %2d), [%2d, %2d), %5b, %s\n", runs++, start, end, ts, te, iter.isEmoji(), substr);
+        }
+        System.out.println();
+    }
+
+    static class TestData {
+        String text;
+        EmojiRun[] expectedList;
+
+        TestData(String text, EmojiRun[] expectedList) {
+            this.text = text;
+            this.expectedList = expectedList;
+        }
+    }
+
+    static List<TestData> getTestData() {
+        String text;
+        EmojiRun[] expectedList;
+
+        List<TestData> list = new ArrayList<>();
+
+        // emoji base: smile
+        text = "\uD83D\uDE0A";// 😊
+        expectedList = new EmojiRun[] {
+                new EmojiRun(true, 0, 1, 0, 2)
+        };
+        list.add(new TestData(text, expectedList));
+
+        // zero-width joiner
+        // family: man and woman and girl and boy
+        text = "\uD83D\uDC68\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66";// 👨‍👩‍👧‍👦
+        expectedList = new EmojiRun[] {
+                new EmojiRun(true, 0, 7, 0, 11)
+        };
+        list.add(new TestData(text, expectedList));
+
+        // emoji fitzpatrick modifier
+        // a hand with light skin tone
+        text = "\u270B\uD83C\uDFFB"; // ✋🏻
+        expectedList = new EmojiRun[] {
+                new EmojiRun(true, 0, 2, 0, 3)
+        };
+        list.add(new TestData(text, expectedList));
+
+        // emoji fitzpatrick modifier and zero-width joiner
+        // a female firefighter with medium-darker skin tone
+        text = "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDE92"; // 👩🏽‍🚒
+        expectedList = new EmojiRun[] {
+                new EmojiRun(true, 0, 4, 0, 7)
+        };
+        list.add(new TestData(text, expectedList));
+
+        // alphanum: cool button
+        text = "\uD83C\uDD92";// 🆒
+        expectedList = new EmojiRun[] {
+                new EmojiRun(true, 0, 1, 0, 2)
+        };
+        list.add(new TestData(text, expectedList));
+
+        // flag: China
+        text = "\uD83C\uDDE8\uD83C\uDDF3"; // 🇨🇳
+        expectedList = new EmojiRun[] {
+                new EmojiRun(true, 0, 2, 0, 4)
+        };
+        list.add(new TestData(text, expectedList));
+
+        // flag: pirate flag
+        text = "\uD83C\uDFF4\u200D\u2620\uFE0F";// 🏴‍☠️
+        expectedList = new EmojiRun[] {
+                new EmojiRun(true, 0, 4, 0, 5)
+        };
+        list.add(new TestData(text, expectedList));
+
+        // keycap: #️⃣*️⃣0️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣
+        text = "#\uFE0F\u20E3*\uFE0F\u20E30\uFE0F\u20E31\uFE0F\u20E32\uFE0F\u20E33\uFE0F\u20E34\uFE0F\u20E35\uFE0F\u20E36\uFE0F\u20E37\uFE0F\u20E38\uFE0F\u20E39\uFE0F\u20E3";
+        expectedList = new EmojiRun[]{
+                new EmojiRun(true, 0, 36, 0, 36),
+        };
+        list.add(new TestData(text, expectedList));
+
+        // complex text
+        text = "Hello, 你好，🌍世界！";
+        expectedList = new EmojiRun[] {
+                new EmojiRun(false, 0, 10, 0, 10),
+                new EmojiRun(true, 10, 11, 10, 12),
+                new EmojiRun(false, 11, 14, 12, 15),
+        };
+        list.add(new TestData(text, expectedList));
+
+        // complex emoji combined with text
+        text = "Hello" + "🙋🧑🧑🏻🧑🏼🧑🏽🧑🏾🧑🏿" + "world" + "🍰🐒" + "家庭" + "👨‍👩‍👧‍👦";
+        expectedList = new EmojiRun[] {
+                new EmojiRun(false, 0, 5, 0, 5),
+                new EmojiRun(true, 5, 17, 5, 29),
+                new EmojiRun(false, 17, 22, 29, 34),
+                new EmojiRun(true, 22, 24, 34, 38),
+                new EmojiRun(false, 24, 26, 38, 40),
+                new EmojiRun(true, 26, 33, 40, 51)
+        };
+
+        list.add(new TestData(text, expectedList));
+
+        return list;
+    }
+
+    @Test void testAll() {
+        for (TestData data : getTestData()) {
+            test(data.text, data.expectedList);
+        }
+    }
+}
diff --git a/lib/src/test/java/io/github/jmecn/text/TestRichTextToSpan.java b/lib/src/test/java/io/github/jmecn/text/TestRichTextToSpan.java
new file mode 100644
index 0000000..8612802
--- /dev/null
+++ b/lib/src/test/java/io/github/jmecn/text/TestRichTextToSpan.java
@@ -0,0 +1,129 @@
+package io.github.jmecn.text;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Test case to split rich text into text spans.
+ *
+ * @author yanmaoyuan
+ */
+public class TestRichTextToSpan {
+
+    private static final Pattern tagPattern = Pattern.compile("<(b|i|u|color|span|style)(.*?)>(.*?)</\\1>");
+
+    public static List<TextSpan> extractTagContents(TextSpan parentSpan) {
+        List<TextSpan> contents = new ArrayList<>();
+        Matcher matcher = tagPattern.matcher(parentSpan.text);
+        int lastEnd = 0;
+        while (matcher.find()) {
+            // 添加标签之前的文本部分
+            String beforeTag = parentSpan.text.substring(lastEnd, matcher.start());
+            if (!beforeTag.isEmpty()) {
+                contents.add(new TextSpan(beforeTag, parentSpan.attributes));
+            }
+            String tag = matcher.group(1);
+            String attributes = matcher.group(2);
+            // 添加标签内的内容
+            String content = matcher.group(3);
+
+            List<String> attrList = new ArrayList<>();
+            if (parentSpan.attributes != null) {
+                // 外层优先级低，放在前面
+                attrList.addAll(parentSpan.attributes);
+            }
+            // 内层优先级高，放在后面。
+            attrList.add(tag + ":" + attributes);
+            if (!content.isEmpty()) {
+                TextSpan span = new TextSpan(content, attrList);
+                if (tagPattern.matcher(content).find()) {
+                    List<TextSpan> spans = extractTagContents(span);
+                    contents.addAll(spans);
+                } else {
+                    contents.add(span);
+                }
+            }
+            // 更新上一个标签结束的位置
+            lastEnd = matcher.end();
+        }
+        // 添加剩余的文本部分
+        String remainder = parentSpan.text.substring(lastEnd);
+        if (!remainder.isEmpty()) {
+            contents.add(new TextSpan(remainder, parentSpan.attributes));
+        }
+        return contents;
+    }
+
+    public static List<TextSpan> extractTagContents(String gmarkText) {
+        List<TextSpan> contents = new ArrayList<>();
+        Matcher matcher = tagPattern.matcher(gmarkText);
+        int lastEnd = 0;
+        while (matcher.find()) {
+            // 添加标签之前的文本部分
+            String beforeTag = gmarkText.substring(lastEnd, matcher.start());
+            if (!beforeTag.isEmpty()) {
+                contents.add(new TextSpan(beforeTag, null));
+            }
+            String tag = matcher.group(1);
+            String attributes = matcher.group(2);
+            // 添加标签内的内容
+            String content = matcher.group(3);
+
+            List<String> attrList = new ArrayList<>();
+            attrList.add(tag + ":" + attributes);
+            if (!content.isEmpty()) {
+                TextSpan span = new TextSpan(content, attrList);
+                if (tagPattern.matcher(content).find()) {
+                    List<TextSpan> spans = extractTagContents(span);
+                    contents.addAll(spans);
+                } else {
+                    contents.add(span);
+                }
+            }
+            // 更新上一个标签结束的位置
+            lastEnd = matcher.end();
+        }
+        // 添加剩余的文本部分
+        String remainder = gmarkText.substring(lastEnd);
+        if (!remainder.isEmpty()) {
+            contents.add(new TextSpan(remainder, null));
+        }
+        return contents;
+    }
+
+    @Test void parseTag() {
+        String gmarkText = "This is a nice place. <style id='my-style'><i><b>Hello</b>,<span color='#FF0000'>world</span></i>I hope <u>you</u> happy here.</style>";
+        List<TextSpan> extractedContents = extractTagContents(gmarkText);
+        for (TextSpan content : extractedContents) {
+            System.out.println(content);
+        }
+    }
+
+    static class TextSpan {
+        String text;
+        List<String> attributes;
+
+        public TextSpan(String text, List<String> attributes) {
+            this.text = text;
+            this.attributes = attributes;
+        }
+
+        public void add(List<String> attributes) {
+            if (this.attributes == null) {
+                this.attributes = new ArrayList<>();
+            }
+            this.attributes.addAll(attributes);
+        }
+        @Override
+        public String toString() {
+            return "TextSpan{" +
+                    "text='" + text + '\'' +
+                    ", attributes=" + attributes +
+                    '}';
+        }
+    }
+}