Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[iOS][non-icu] HybridGlobalization implement normalization functions #90582

Merged
merged 5 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/libraries/Common/src/Interop/Interop.Normalization.iOS.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.InteropServices;
using System.Text;

internal static partial class Interop
{
internal static partial class Globalization
{
[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_IsNormalizedNative", StringMarshalling = StringMarshalling.Utf16)]
internal static unsafe partial int IsNormalizedNative(NormalizationForm normalizationForm, char* src, int srcLen);

[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_NormalizeStringNative", StringMarshalling = StringMarshalling.Utf16)]
internal static unsafe partial int NormalizeStringNative(NormalizationForm normalizationForm, char* src, int srcLen, char* buffer, int bufferLength);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>$(NetCoreAppCurrent)-ios;$(NetCoreAppCurrent)-tvos;$(NetCoreAppCurrent)-maccatalyst</TargetFrameworks>
<TestRuntime>true</TestRuntime>
<HybridGlobalization>true</HybridGlobalization>
</PropertyGroup>
<ItemGroup>
<Compile Include="..\Normalization\StringNormalizationTests.cs" />
<Compile Include="..\Normalization\NormalizationAll.cs" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="..\Normalization\Data\win8.txt">
<LogicalName>NormalizationDataWin8</LogicalName>
</EmbeddedResource>
<EmbeddedResource Include="..\Normalization\Data\win7.txt">
<LogicalName>NormalizationDataWin7</LogicalName>
</EmbeddedResource>
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public void Normalize()
VerifyConformanceInvariant(NormalizationForm.FormD, part0, part1, part2, part3, part4);

// Mobile / Browser ICU doesn't support FormKC and FormKD
if (PlatformDetection.IsNotUsingLimitedCultures)
if (PlatformDetection.IsNotUsingLimitedCultures || PlatformDetection.IsHybridGlobalizationOnOSX)
{
// Form KC
VerifyConformanceInvariant(NormalizationForm.FormKC, part0, part1, part2, part3, part4);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public static IEnumerable<object[]> NormalizeTestData()
yield return new object[] { "\u1E9b\u0323", NormalizationForm.FormC, "\u1E9b\u0323" };
yield return new object[] { "\u1E9b\u0323", NormalizationForm.FormD, "\u017f\u0323\u0307" };

if (PlatformDetection.IsNotUsingLimitedCultures)
if (PlatformDetection.IsNotUsingLimitedCultures || PlatformDetection.IsHybridGlobalizationOnOSX)
{
// Mobile / Browser ICU doesn't support FormKC and FormKD
yield return new object[] { "\uFB01", NormalizationForm.FormKC, "fi" };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,9 @@
<Compile Include="$(CommonPath)Interop\Interop.Normalization.cs">
<Link>Common\Interop\Interop.Normalization.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Interop.Normalization.iOS.cs" Condition="'$(IsiOSLike)' == 'true'">
<Link>Common\Interop\Interop.Normalization.iOS.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Interop.ResultCode.cs">
<Link>Common\Interop\Interop.ResultCode.cs</Link>
</Compile>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@ private static unsafe bool IcuIsNormalized(string strInput, NormalizationForm no
int ret;
fixed (char* pInput = strInput)
{
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
ret = Interop.Globalization.IsNormalizedNative(normalizationForm, pInput, strInput.Length);
else
ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length);
#else
ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length);
#endif
}

if (ret == -1)
Expand Down Expand Up @@ -53,7 +60,14 @@ private static unsafe string IcuNormalize(string strInput, NormalizationForm nor
fixed (char* pInput = strInput)
fixed (char* pDest = &MemoryMarshal.GetReference(buffer))
{
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
realLen = Interop.Globalization.NormalizeStringNative(normalizationForm, pInput, strInput.Length, pDest, buffer.Length);
else
realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length);
#else
realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length);
#endif
}

if (realLen == -1)
Expand Down Expand Up @@ -100,7 +114,6 @@ private static void ValidateArguments(string strInput, NormalizationForm normali
{
Debug.Assert(strInput != null);


if (OperatingSystem.IsBrowser() && (normalizationForm == NormalizationForm.FormKC || normalizationForm == NormalizationForm.FormKD))
{
// Browser's ICU doesn't contain data needed for FormKC and FormKD
Expand Down
3 changes: 2 additions & 1 deletion src/mono/mono/mini/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ if(HAVE_SYS_ICU)
pal_locale.m
pal_collation.m
pal_casing.m
pal_calendarData.m)
pal_calendarData.m
pal_normalization.m)

addprefix(icu_shim_darwin_sources "${ICU_SHIM_PATH}" "${icu_shim_darwin_sources_base}")
set(icu_shim_sources ${icu_shim_sources} ${icu_shim_darwin_sources})
Expand Down
3 changes: 2 additions & 1 deletion src/native/libs/System.Globalization.Native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ if (CLR_CMAKE_TARGET_APPLE)
pal_locale.m
pal_collation.m
pal_casing.m
pal_calendarData.m)
pal_calendarData.m
pal_normalization.m)
set_source_files_properties(${NATIVEGLOBALIZATION_SOURCES_OBJC} PROPERTIES COMPILE_FLAGS "-fobjc-arc ${CLR_CMAKE_COMMON_OBJC_FLAGS}")
set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} ${NATIVEGLOBALIZATION_SOURCES_OBJC})
endif()
Expand Down
2 changes: 2 additions & 0 deletions src/native/libs/System.Globalization.Native/entrypoints.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ static const Entry s_globalizationNative[] =
DllImportEntry(GlobalizationNative_GetLocaleNameNative)
DllImportEntry(GlobalizationNative_GetLocaleTimeFormatNative)
DllImportEntry(GlobalizationNative_IndexOfNative)
DllImportEntry(GlobalizationNative_IsNormalizedNative)
DllImportEntry(GlobalizationNative_NormalizeStringNative)
DllImportEntry(GlobalizationNative_StartsWithNative)
#endif
};
Expand Down
31 changes: 0 additions & 31 deletions src/native/libs/System.Globalization.Native/pal_casing.m
Original file line number Diff line number Diff line change
Expand Up @@ -55,37 +55,6 @@
} \
}

/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* Converts code points outside of Basic Multilingual Plane into
* corresponding surrogate pairs if sufficient space in the string.
* High surrogate range: 0xD800 - 0xDBFF
* Low surrogate range: 0xDC00 - 0xDFFF
* If the code point is not valid or a trail surrogate does not fit,
* then isError is set to true.
*
* @param buffer const uint16_t * string buffer
* @param offset string offset, must be offset<capacity
* @param capacity size of the string buffer
* @param codePoint code point to append
* @param isError output bool set to true if an error occurs, otherwise not modified
*/
#define Append(buffer, offset, capacity, codePoint, isError) { \
if ((offset) >= (capacity)) /* insufficiently sized destination buffer */ { \
(isError) = InsufficientBuffer; \
} else if ((uint32_t)(codePoint) > 0x10ffff) /* invalid code point */ { \
(isError) = InvalidCodePoint; \
} else if ((uint32_t)(codePoint) <= 0xffff) { \
(buffer)[(offset)++] = (uint16_t)(codePoint); \
} else { \
(buffer)[(offset)++] = (uint16_t)(((codePoint) >> 10) + 0xd7c0); \
(buffer)[(offset)++] = (uint16_t)(((codePoint)&0x3ff) | 0xdc00); \
} \
}

/*
Function:
ChangeCaseNative
Expand Down
32 changes: 32 additions & 0 deletions src/native/libs/System.Globalization.Native/pal_icushim_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,3 +346,35 @@ const char* GlobalizationNative_GetICUDataPathFallback(void);
#endif

#endif // !defined(STATIC_ICU)
#if defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS)
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* Converts code points outside of Basic Multilingual Plane into
* corresponding surrogate pairs if sufficient space in the string.
* High surrogate range: 0xD800 - 0xDBFF
* Low surrogate range: 0xDC00 - 0xDFFF
* If the code point is not valid or a trail surrogate does not fit,
* then isError is set to true.
*
* @param buffer const uint16_t * string buffer
* @param offset string offset, must be offset<capacity
* @param capacity size of the string buffer
* @param codePoint code point to append
* @param isError output bool set to true if an error occurs, otherwise not modified
*/
#define Append(buffer, offset, capacity, codePoint, isError) { \
if ((offset) >= (capacity)) /* insufficiently sized destination buffer */ { \
(isError) = InsufficientBuffer; \
} else if ((uint32_t)(codePoint) > 0x10ffff) /* invalid code point */ { \
(isError) = InvalidCodePoint; \
} else if ((uint32_t)(codePoint) <= 0xffff) { \
(buffer)[(offset)++] = (uint16_t)(codePoint); \
} else { \
(buffer)[(offset)++] = (uint16_t)(((codePoint) >> 10) + 0xd7c0); \
(buffer)[(offset)++] = (uint16_t)(((codePoint)&0x3ff) | 0xdc00); \
} \
}
#endif
12 changes: 12 additions & 0 deletions src/native/libs/System.Globalization.Native/pal_normalization.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,15 @@ PALEXPORT int32_t GlobalizationNative_NormalizeString(NormalizationForm normaliz
int32_t cwSrcLength,
UChar* lpDst,
int32_t cwDstLength);

#ifdef __APPLE__
PALEXPORT int32_t GlobalizationNative_IsNormalizedNative(NormalizationForm normalizationForm,
const uint16_t* lpStr,
int32_t cwStrLength);

PALEXPORT int32_t GlobalizationNative_NormalizeStringNative(NormalizationForm normalizationForm,
const uint16_t* lpSource,
int32_t cwSourceLength,
uint16_t* lpDst,
int32_t cwDstLength);
#endif
87 changes: 87 additions & 0 deletions src/native/libs/System.Globalization.Native/pal_normalization.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
//

#include "pal_errors.h"
#include "pal_icushim_internal.h"
#include "pal_normalization.h"
#import <Foundation/Foundation.h>

#if defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS)
static NSString* GetNormalizedStringForForm(NormalizationForm normalizationForm, NSString* sourceString)
{
switch (normalizationForm)
{
case FormC:
return sourceString.precomposedStringWithCanonicalMapping;
case FormD:
return sourceString.decomposedStringWithCanonicalMapping;
case FormKC:
return sourceString.precomposedStringWithCompatibilityMapping;
case FormKD:
return sourceString.decomposedStringWithCompatibilityMapping;
default:
return NULL;
}
}

/*
Function:
IsNormalized

Used by System.StringNormalizationExtensions.IsNormalized to detect if a string
is in a certain Unicode Normalization Form.

Return values:
0: lpStr is not normalized.
1: lpStr is normalized.
-1: internal error during normalization.
*/
int32_t GlobalizationNative_IsNormalizedNative(NormalizationForm normalizationForm, const uint16_t* lpStr, int32_t cwStrLength)
{
@autoreleasepool
{
NSString *sourceString = [NSString stringWithCharacters: lpStr length: cwStrLength];
NSString *normalizedString = GetNormalizedStringForForm(normalizationForm, sourceString);

return normalizedString == NULL ? -1 : [sourceString isEqualToString: normalizedString];
}
}

/*
Function:
NormalizeString

Used by System.StringNormalizationExtensions.Normalize to normalize a string
into a certain Unicode Normalization Form.

Return values:
0: internal error during normalization.
>0: the length of the normalized string (not counting the null terminator).
*/
int32_t GlobalizationNative_NormalizeStringNative(NormalizationForm normalizationForm, const uint16_t* lpSource, int32_t cwSourceLength, uint16_t* lpDst, int32_t cwDstLength)
{
@autoreleasepool
{
NSString *sourceString = [NSString stringWithCharacters: lpSource length: cwSourceLength];
NSString *normalizedString = GetNormalizedStringForForm(normalizationForm, sourceString);

if (normalizedString == NULL || normalizedString.length == 0)
{
return 0;
}

int32_t index = 0, dstIdx = 0, isError = 0;
uint16_t dstCodepoint;
while (index < normalizedString.length)
{
dstCodepoint = [normalizedString characterAtIndex: index];
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
index++;
}

return !isError ? [normalizedString length] : 0;
}
}
#endif