Skip to content

Commit a5af0ab

Browse files
tarekghgfoidlericstj
authored
Normalization APIs using the spans (#110465)
* Normalization APIs using the spans * Address the feedback * Update src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Icu.cs Co-authored-by: Günther Foidl <[email protected]> * Fix comment indent --------- Co-authored-by: Günther Foidl <[email protected]> Co-authored-by: Eric StJohn <[email protected]>
1 parent 016d356 commit a5af0ab

File tree

8 files changed

+461
-89
lines changed

8 files changed

+461
-89
lines changed

src/libraries/System.Private.CoreLib/src/Resources/Strings.resx

+30-27
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
<?xml version="1.0" encoding="utf-8"?>
22
<root>
3-
<!--
4-
Microsoft ResX Schema
5-
3+
<!--
4+
Microsoft ResX Schema
5+
66
Version 2.0
7-
8-
The primary goals of this format is to allow a simple XML format
9-
that is mostly human readable. The generation and parsing of the
10-
various data types are done through the TypeConverter classes
7+
8+
The primary goals of this format is to allow a simple XML format
9+
that is mostly human readable. The generation and parsing of the
10+
various data types are done through the TypeConverter classes
1111
associated with the data types.
12-
12+
1313
Example:
14-
14+
1515
... ado.net/XML headers & schema ...
1616
<resheader name="resmimetype">text/microsoft-resx</resheader>
1717
<resheader name="version">2.0</resheader>
@@ -26,36 +26,36 @@
2626
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
2727
<comment>This is a comment</comment>
2828
</data>
29-
30-
There are any number of "resheader" rows that contain simple
29+
30+
There are any number of "resheader" rows that contain simple
3131
name/value pairs.
32-
33-
Each data row contains a name, and value. The row also contains a
34-
type or mimetype. Type corresponds to a .NET class that support
35-
text/value conversion through the TypeConverter architecture.
36-
Classes that don't support this are serialized and stored with the
32+
33+
Each data row contains a name, and value. The row also contains a
34+
type or mimetype. Type corresponds to a .NET class that support
35+
text/value conversion through the TypeConverter architecture.
36+
Classes that don't support this are serialized and stored with the
3737
mimetype set.
38-
39-
The mimetype is used for serialized objects, and tells the
40-
ResXResourceReader how to depersist the object. This is currently not
38+
39+
The mimetype is used for serialized objects, and tells the
40+
ResXResourceReader how to depersist the object. This is currently not
4141
extensible. For a given mimetype the value must be set accordingly:
42-
43-
Note - application/x-microsoft.net.object.binary.base64 is the format
44-
that the ResXResourceWriter will generate, however the reader can
42+
43+
Note - application/x-microsoft.net.object.binary.base64 is the format
44+
that the ResXResourceWriter will generate, however the reader can
4545
read any of the formats listed below.
46-
46+
4747
mimetype: application/x-microsoft.net.object.binary.base64
48-
value : The object must be serialized with
48+
value : The object must be serialized with
4949
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
5050
: and then encoded with base64 encoding.
51-
51+
5252
mimetype: application/x-microsoft.net.object.soap.base64
53-
value : The object must be serialized with
53+
value : The object must be serialized with
5454
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
5555
: and then encoded with base64 encoding.
5656
5757
mimetype: application/x-microsoft.net.object.bytearray.base64
58-
value : The object must be serialized into a byte array
58+
value : The object must be serialized into a byte array
5959
: using a System.ComponentModel.TypeConverter
6060
: and then encoded with base64 encoding.
6161
-->
@@ -1315,6 +1315,9 @@
13151315
<data name="Argument_InvalidNormalizationForm" xml:space="preserve">
13161316
<value>Invalid or unsupported normalization form.</value>
13171317
</data>
1318+
<data name="Argument_UnsupportedNormalizationFormInBrowser" xml:space="preserve">
1319+
<value>`NormalizationForm.FormKC` and `NormalizationForm.FormKD` are not supported in browser environments or WebAssembly.</value>
1320+
</data>
13181321
<data name="Argument_InvalidNumberStyles" xml:space="preserve">
13191322
<value>An undefined NumberStyles value is being used.</value>
13201323
</data>

src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Icu.cs

+89-16
Original file line numberDiff line numberDiff line change
@@ -10,31 +10,33 @@ namespace System.Globalization
1010
{
1111
internal static partial class Normalization
1212
{
13-
private static unsafe bool IcuIsNormalized(string strInput, NormalizationForm normalizationForm)
13+
private static unsafe bool IcuIsNormalized(ReadOnlySpan<char> source, NormalizationForm normalizationForm)
1414
{
1515
Debug.Assert(!GlobalizationMode.Invariant);
1616
Debug.Assert(!GlobalizationMode.UseNls);
17+
Debug.Assert(!source.IsEmpty);
18+
Debug.Assert(normalizationForm is NormalizationForm.FormC or NormalizationForm.FormD or NormalizationForm.FormKC or NormalizationForm.FormKD);
1719

18-
ValidateArguments(strInput, normalizationForm);
20+
ValidateArguments(source, normalizationForm, nameof(source));
1921

2022
int ret;
21-
fixed (char* pInput = strInput)
23+
fixed (char* pInput = source)
2224
{
2325
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
2426
if (GlobalizationMode.Hybrid)
2527
{
26-
ret = Interop.Globalization.IsNormalizedNative(normalizationForm, pInput, strInput.Length);
28+
ret = Interop.Globalization.IsNormalizedNative(normalizationForm, pInput, source.Length);
2729
}
2830
else
2931
#endif
3032
{
31-
ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length);
33+
ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, source.Length);
3234
}
3335
}
3436

3537
if (ret == -1)
3638
{
37-
throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput));
39+
throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(source));
3840
}
3941

4042
return ret == 1;
@@ -44,6 +46,7 @@ private static unsafe string IcuNormalize(string strInput, NormalizationForm nor
4446
{
4547
Debug.Assert(!GlobalizationMode.Invariant);
4648
Debug.Assert(!GlobalizationMode.UseNls);
49+
Debug.Assert(normalizationForm == NormalizationForm.FormC || normalizationForm == NormalizationForm.FormD || normalizationForm == NormalizationForm.FormKC || normalizationForm == NormalizationForm.FormKD);
4750

4851
ValidateArguments(strInput, normalizationForm);
4952

@@ -114,25 +117,95 @@ private static unsafe string IcuNormalize(string strInput, NormalizationForm nor
114117
}
115118
}
116119

117-
private static void ValidateArguments(string strInput, NormalizationForm normalizationForm)
120+
private static unsafe bool IcuTryNormalize(ReadOnlySpan<char> source, Span<char> destination, out int charsWritten, NormalizationForm normalizationForm = NormalizationForm.FormC)
118121
{
119-
Debug.Assert(strInput != null);
122+
Debug.Assert(!GlobalizationMode.Invariant);
123+
Debug.Assert(!GlobalizationMode.UseNls);
124+
Debug.Assert(!source.IsEmpty);
125+
Debug.Assert(normalizationForm == NormalizationForm.FormC || normalizationForm == NormalizationForm.FormD || normalizationForm == NormalizationForm.FormKC || normalizationForm == NormalizationForm.FormKD);
120126

121-
if ((OperatingSystem.IsBrowser() || OperatingSystem.IsWasi())&& (normalizationForm == NormalizationForm.FormKC || normalizationForm == NormalizationForm.FormKD))
127+
if (destination.IsEmpty)
122128
{
123-
// Browser's ICU doesn't contain data needed for FormKC and FormKD
124-
throw new PlatformNotSupportedException();
129+
charsWritten = 0;
130+
return false;
131+
}
132+
133+
ValidateArguments(source, normalizationForm, nameof(source));
134+
135+
int realLen;
136+
fixed (char* pInput = source)
137+
fixed (char* pDest = destination)
138+
{
139+
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
140+
if (GlobalizationMode.Hybrid)
141+
{
142+
realLen = Interop.Globalization.NormalizeStringNative(normalizationForm, pInput, source.Length, pDest, destination.Length);
143+
}
144+
else
145+
#endif
146+
{
147+
realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, source.Length, pDest, destination.Length);
148+
}
149+
}
150+
151+
if (realLen < 0)
152+
{
153+
throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(source));
154+
}
155+
156+
if (realLen <= destination.Length)
157+
{
158+
charsWritten = realLen;
159+
return true;
160+
}
161+
162+
charsWritten = 0;
163+
return false;
164+
}
165+
166+
private static unsafe int IcuGetNormalizedLength(ReadOnlySpan<char> source, NormalizationForm normalizationForm)
167+
{
168+
Debug.Assert(!GlobalizationMode.Invariant);
169+
Debug.Assert(!GlobalizationMode.UseNls);
170+
Debug.Assert(!source.IsEmpty);
171+
Debug.Assert(normalizationForm == NormalizationForm.FormC || normalizationForm == NormalizationForm.FormD || normalizationForm == NormalizationForm.FormKC || normalizationForm == NormalizationForm.FormKD);
172+
173+
ValidateArguments(source, normalizationForm, nameof(source));
174+
175+
int realLen;
176+
fixed (char* pInput = source)
177+
{
178+
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
179+
if (GlobalizationMode.Hybrid)
180+
{
181+
realLen = Interop.Globalization.NormalizeStringNative(normalizationForm, pInput, source.Length, null, 0);
182+
}
183+
else
184+
#endif
185+
{
186+
realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, source.Length, null, 0);
187+
}
188+
}
189+
190+
if (realLen < 0)
191+
{
192+
throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(source));
125193
}
126194

127-
if (normalizationForm != NormalizationForm.FormC && normalizationForm != NormalizationForm.FormD &&
128-
normalizationForm != NormalizationForm.FormKC && normalizationForm != NormalizationForm.FormKD)
195+
return realLen;
196+
}
197+
198+
private static void ValidateArguments(ReadOnlySpan<char> strInput, NormalizationForm normalizationForm, string paramName = "strInput")
199+
{
200+
if ((OperatingSystem.IsBrowser() || OperatingSystem.IsWasi()) && (normalizationForm == NormalizationForm.FormKC || normalizationForm == NormalizationForm.FormKD))
129201
{
130-
throw new ArgumentException(SR.Argument_InvalidNormalizationForm, nameof(normalizationForm));
202+
// Browser's ICU doesn't contain data needed for FormKC and FormKD
203+
throw new PlatformNotSupportedException(SR.Argument_UnsupportedNormalizationFormInBrowser);
131204
}
132205

133206
if (HasInvalidUnicodeSequence(strInput))
134207
{
135-
throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput));
208+
throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, paramName);
136209
}
137210
}
138211

@@ -143,7 +216,7 @@ private static void ValidateArguments(string strInput, NormalizationForm normali
143216
/// We walk the string ourselves looking for these bad sequences so we can continue to throw
144217
/// ArgumentException in these cases.
145218
/// </summary>
146-
private static bool HasInvalidUnicodeSequence(string s)
219+
private static bool HasInvalidUnicodeSequence(ReadOnlySpan<char> s)
147220
{
148221
for (int i = 0; i < s.Length; i++)
149222
{

0 commit comments

Comments
 (0)