Skip to content

Commit 5810ec3

Browse files
authored
Merge pull request #50 from SeeSharpSoft/fb_ea_fullwidth
[FEATURE] East Asian Fullwidth Character Support (optional)
2 parents 387e4ca + 038fcdd commit 5810ec3

19 files changed

+732
-6
lines changed

src/main/java/net/seesharpsoft/intellij/plugins/csv/formatter/CsvFormatHelper.java

+136-3
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,18 @@
55
import com.intellij.lang.ASTNode;
66
import com.intellij.psi.codeStyle.CodeStyleSettings;
77
import com.intellij.psi.formatter.common.AbstractBlock;
8-
import net.seesharpsoft.intellij.plugins.csv.settings.CsvCodeStyleSettings;
98
import net.seesharpsoft.intellij.plugins.csv.CsvColumnInfo;
109
import net.seesharpsoft.intellij.plugins.csv.CsvLanguage;
1110
import net.seesharpsoft.intellij.plugins.csv.psi.CsvElementType;
1211
import net.seesharpsoft.intellij.plugins.csv.psi.CsvTypes;
12+
import net.seesharpsoft.intellij.plugins.csv.settings.CsvCodeStyleSettings;
1313
import org.jetbrains.annotations.Nullable;
1414

15+
import java.io.BufferedReader;
16+
import java.io.IOException;
17+
import java.io.InputStream;
18+
import java.io.InputStreamReader;
19+
import java.util.ArrayList;
1520
import java.util.HashMap;
1621
import java.util.List;
1722
import java.util.Map;
@@ -23,16 +28,144 @@ public final class CsvFormatHelper {
2328
private static final Pattern BEGIN_WHITE_SPACE_PATTERN = Pattern.compile("^" + WHITE_SPACE_PATTERN_STRING);
2429
private static final Pattern END_WHITE_SPACE_PATTERN = Pattern.compile(WHITE_SPACE_PATTERN_STRING + "$");
2530

31+
private static final int HEX_RADIX = 16;
32+
private static final int[][] WIDTH_DOUBLE_CHARCODE_RANGES;
33+
private static final int[][] AMBIGUOUS_DOUBLE_CHARCODE_RANGES;
34+
private static final int[][] WIDTH_ZERO_CHARCODE_RANGES = {
35+
{0x0300, 0x036F}, {0x0483, 0x0486}, {0x0488, 0x0489},
36+
{0x0591, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2},
37+
{0x05C4, 0x05C5}, {0x05C7, 0x05C7}, {0x0600, 0x0603},
38+
{0x0610, 0x0615}, {0x064B, 0x065E}, {0x0670, 0x0670},
39+
{0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED},
40+
{0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A},
41+
{0x07A6, 0x07B0}, {0x07EB, 0x07F3}, {0x0901, 0x0902},
42+
{0x093C, 0x093C}, {0x0941, 0x0948}, {0x094D, 0x094D},
43+
{0x0951, 0x0954}, {0x0962, 0x0963}, {0x0981, 0x0981},
44+
{0x09BC, 0x09BC}, {0x09C1, 0x09C4}, {0x09CD, 0x09CD},
45+
{0x09E2, 0x09E3}, {0x0A01, 0x0A02}, {0x0A3C, 0x0A3C},
46+
{0x0A41, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D},
47+
{0x0A70, 0x0A71}, {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC},
48+
{0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD},
49+
{0x0AE2, 0x0AE3}, {0x0B01, 0x0B01}, {0x0B3C, 0x0B3C},
50+
{0x0B3F, 0x0B3F}, {0x0B41, 0x0B43}, {0x0B4D, 0x0B4D},
51+
{0x0B56, 0x0B56}, {0x0B82, 0x0B82}, {0x0BC0, 0x0BC0},
52+
{0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C48},
53+
{0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, {0x0CBC, 0x0CBC},
54+
{0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
55+
{0x0CE2, 0x0CE3}, {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D},
56+
{0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6},
57+
{0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E},
58+
{0x0EB1, 0x0EB1}, {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC},
59+
{0x0EC8, 0x0ECD}, {0x0F18, 0x0F19}, {0x0F35, 0x0F35},
60+
{0x0F37, 0x0F37}, {0x0F39, 0x0F39}, {0x0F71, 0x0F7E},
61+
{0x0F80, 0x0F84}, {0x0F86, 0x0F87}, {0x0F90, 0x0F97},
62+
{0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030},
63+
{0x1032, 0x1032}, {0x1036, 0x1037}, {0x1039, 0x1039},
64+
{0x1058, 0x1059}, {0x1160, 0x11FF}, {0x135F, 0x135F},
65+
{0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753},
66+
{0x1772, 0x1773}, {0x17B4, 0x17B5}, {0x17B7, 0x17BD},
67+
{0x17C6, 0x17C6}, {0x17C9, 0x17D3}, {0x17DD, 0x17DD},
68+
{0x180B, 0x180D}, {0x18A9, 0x18A9}, {0x1920, 0x1922},
69+
{0x1927, 0x1928}, {0x1932, 0x1932}, {0x1939, 0x193B},
70+
{0x1A17, 0x1A18}, {0x1B00, 0x1B03}, {0x1B34, 0x1B34},
71+
{0x1B36, 0x1B3A}, {0x1B3C, 0x1B3C}, {0x1B42, 0x1B42},
72+
{0x1B6B, 0x1B73}, {0x1DC0, 0x1DCA}, {0x1DFE, 0x1DFF},
73+
{0x200B, 0x200F}, {0x202A, 0x202E}, {0x2060, 0x2063},
74+
{0x206A, 0x206F}, {0x20D0, 0x20EF}, {0x302A, 0x302F},
75+
{0x3099, 0x309A}, {0xA806, 0xA806}, {0xA80B, 0xA80B},
76+
{0xA825, 0xA826}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F},
77+
{0xFE20, 0xFE23}, {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB},
78+
{0x10A01, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F},
79+
{0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F}, {0x1D167, 0x1D169},
80+
{0x1D173, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD},
81+
{0x1D242, 0x1D244}, {0xE0001, 0xE0001}, {0xE0020, 0xE007F},
82+
{0xE0100, 0xE01EF}
83+
};
84+
85+
static {
86+
final List<String> wideLines = new ArrayList<>();
87+
final List<String> ambiguousLines = new ArrayList<>();
88+
try (InputStream is = CsvFormatHelper.class.getClassLoader().getResourceAsStream("misc/EastAsianDoubleWidth.csv")) {
89+
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
90+
reader.lines().forEach(line -> {
91+
if (line.endsWith("W") || line.endsWith("F")) {
92+
wideLines.add(line.substring(0, line.length() - 2));
93+
}
94+
if (line.endsWith("A")) {
95+
ambiguousLines.add(line.substring(0, line.length() - 2));
96+
}
97+
}
98+
);
99+
} catch (IOException e) {
100+
e.printStackTrace();
101+
}
102+
103+
WIDTH_DOUBLE_CHARCODE_RANGES = convertRangeTextToRangeArray(wideLines);
104+
AMBIGUOUS_DOUBLE_CHARCODE_RANGES = convertRangeTextToRangeArray(ambiguousLines);
105+
}
106+
107+
private static int[][] convertRangeTextToRangeArray(List<String> wideLines) {
108+
int[][] targetArray = new int[wideLines.size()][2];
109+
for (int i = 0; i < targetArray.length; ++i) {
110+
String[] split = wideLines.get(i).split("\\.\\.");
111+
targetArray[i][0] = Integer.parseInt(split[0], HEX_RADIX);
112+
targetArray[i][1] = split.length == 1 ? targetArray[i][0] : Integer.parseInt(split[1], HEX_RADIX);
113+
}
114+
return targetArray;
115+
}
116+
117+
private static boolean binarySearch(int[][] ranges, int charCode) {
118+
int min = 0;
119+
int mid;
120+
int max = ranges.length - 1;
121+
122+
if (charCode < ranges[0][0] || charCode > ranges[max][1]) {
123+
return false;
124+
}
125+
while (max >= min) {
126+
mid = (min + max) / 2;
127+
if (charCode > ranges[mid][1]) {
128+
min = mid + 1;
129+
} else if (charCode < ranges[mid][0]) {
130+
max = mid - 1;
131+
} else {
132+
return true;
133+
}
134+
}
135+
136+
return false;
137+
}
138+
139+
public static int charWidth(int charCode, boolean ambiguousWide) {
140+
if (charCode <= 0 || binarySearch(WIDTH_ZERO_CHARCODE_RANGES, charCode)) {
141+
return 0;
142+
}
143+
if (binarySearch(WIDTH_DOUBLE_CHARCODE_RANGES, charCode) || (ambiguousWide && binarySearch(AMBIGUOUS_DOUBLE_CHARCODE_RANGES, charCode))) {
144+
return 2;
145+
}
146+
return 1;
147+
}
148+
149+
public static int charWidth(CharSequence s, boolean ambiguousWide) {
150+
int result = 0;
151+
for (int i = 0; i < s.length(); i++) {
152+
result += charWidth(s.charAt(i), ambiguousWide);
153+
}
154+
return result;
155+
}
156+
26157
public static int getTextLength(ASTNode node, CodeStyleSettings codeStyleSettings) {
27158
CsvCodeStyleSettings csvCodeStyleSettings = codeStyleSettings.getCustomSettings(CsvCodeStyleSettings.class);
28159
String text = node.getText();
29-
int length = node.getTextLength();
160+
int length = 0;
30161
if (csvCodeStyleSettings.TABULARIZE && !csvCodeStyleSettings.WHITE_SPACES_OUTSIDE_QUOTES && text.startsWith("\"")) {
31162
text = text.substring(1, text.length() - 1);
32163
text = BEGIN_WHITE_SPACE_PATTERN.matcher(text).replaceFirst("");
33164
text = END_WHITE_SPACE_PATTERN.matcher(text).replaceFirst("");
34-
length = text.length() + 2;
165+
length += 2;
35166
}
167+
length += csvCodeStyleSettings.ENABLE_WIDE_CHARACTER_DETECTION ? charWidth(text, csvCodeStyleSettings.TREAT_AMBIGUOUS_CHARACTERS_AS_WIDE) : text.length();
168+
36169
return length;
37170
}
38171

src/main/java/net/seesharpsoft/intellij/plugins/csv/settings/CsvCodeStyleSettings.java

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ public class CsvCodeStyleSettings extends CustomCodeStyleSettings {
2222
public boolean WHITE_SPACES_OUTSIDE_QUOTES = true;
2323
public boolean LEADING_WHITE_SPACES = false;
2424
public int SEPARATOR_INDEX = 0;
25+
public boolean ENABLE_WIDE_CHARACTER_DETECTION = false;
26+
public boolean TREAT_AMBIGUOUS_CHARACTERS_AS_WIDE = false;
2527

2628
public static final String DEFAULT_SEPARATOR = ",";
2729
public static final String TAB_SEPARATOR = "\t";

src/main/java/net/seesharpsoft/intellij/plugins/csv/settings/CsvLanguageCodeStyleSettingsProvider.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,14 @@ public void customizeSettings(@NotNull CodeStyleSettingsCustomizable consumer, @
5656
"LEADING_WHITE_SPACES",
5757
"Leading whitespaces",
5858
"Tabularize (ignores Trimming settings)");
59+
consumer.showCustomOption(CsvCodeStyleSettings.class,
60+
"ENABLE_WIDE_CHARACTER_DETECTION",
61+
"East Asian fullwidth character support (lowers performance)",
62+
"Tabularize (ignores Trimming settings)");
63+
consumer.showCustomOption(CsvCodeStyleSettings.class,
64+
"TREAT_AMBIGUOUS_CHARACTERS_AS_WIDE",
65+
"EA fullwidth ambiguous characters treated as double wide",
66+
"Tabularize (ignores Trimming settings)");
5967
}
6068

6169
if (settingsType == SettingsType.WRAPPING_AND_BRACES_SETTINGS) {
@@ -75,7 +83,7 @@ public String getCodeSample(@NotNull SettingsType settingsType) {
7583
"3,\"Cardinal Slant-D® Ring Binder, Heavy Gauge Vinyl \",Barry French, 293 ,46.71 ,8.69\n" +
7684
"4 , R380 ,Clay Rozendal,483, 1198.97,195.99 \n" +
7785
"3.1\n" +
78-
"5 ,Holmes HEPA Air Purifier,Carlos Soltero,515,30.94,21.78";
86+
"5 ,Holmes HEPA Air Purifier,Carlos Soltero,汉字宋,30.94,21.78";
7987
}
8088

8189
@Override

0 commit comments

Comments
 (0)