Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OSX] HybridGlobalization implement compare native function #85965

Merged
merged 22 commits into from
May 24, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/libraries/Common/src/Interop/Interop.Collation.OSX.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Globalization;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

internal static partial class Interop
{
internal static partial class Globalization
{
[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_CompareStringNative", StringMarshalling = StringMarshalling.Utf8)]
internal static unsafe partial int CompareStringNative(string localeName, char* lpStr1, int cwStr1Len, char* lpStr2, int cwStr2Len, CompareOptions options);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ public static string GetDistroVersionString()

public static bool IsInvariantGlobalization => m_isInvariant.Value;
public static bool IsHybridGlobalizationOnBrowser => m_isHybrid.Value && IsBrowser;
public static bool IsHybridGlobalizationOnOSX => m_isHybrid.Value && (IsOSX || IsMacCatalyst || IsiOS || IstvOS);
public static bool IsNotHybridGlobalizationOnBrowser => !IsHybridGlobalizationOnBrowser;
public static bool IsNotInvariantGlobalization => !IsInvariantGlobalization;
public static bool IsIcuGlobalization => ICUVersion > new Version(0, 0, 0, 0);
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,7 @@
<Compile Include="..\NumberFormatInfo\NumberFormatInfoPercentNegativePattern.cs" />
<Compile Include="..\NumberFormatInfo\NumberFormatInfoPercentGroupSizes.cs" />
<Compile Include="..\NumberFormatInfo\NumberFormatInfoPercentPositivePattern.cs" />
<Compile Include="..\CompareInfo\CompareInfoTestsBase.cs" />
<Compile Include="..\CompareInfo\CompareInfoTests.Compare.cs" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\CompareInfo.Invariant.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\CompareInfo.Nls.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\CompareInfo.WebAssembly.cs" Condition="'$(TargetsBrowser)' == 'true'" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\CompareInfo.OSX.cs" Condition="'$(IsOSXLike)' == 'true'" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\CompareOptions.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\CultureData.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\CultureData.Icu.cs" />
Expand Down Expand Up @@ -1264,6 +1265,9 @@
</Compile>
<Compile Include="$(CommonPath)Interop\Interop.Collation.cs">
<Link>Common\Interop\Interop.Collation.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Interop.Collation.OSX.cs" Condition="'$(IsOSXLike)' == 'true'">
<Link>Common\Interop\Interop.Collation.OSX.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Interop.ICU.cs">
<Link>Common\Interop\Interop.ICU.cs</Link>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Text;

namespace System.Globalization
{
public partial class CompareInfo
{
private unsafe int CompareStringNative(ReadOnlySpan<char> string1, ReadOnlySpan<char> string2, CompareOptions options)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);
Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
string cultureName = m_name;

// GetReference may return nullptr if the input span is defaulted. The native layer handles
// this appropriately; no workaround is needed on the managed side.
int result;
fixed (char* pString1 = &MemoryMarshal.GetReference(string1))
fixed (char* pString2 = &MemoryMarshal.GetReference(string2))
{
result = Interop.Globalization.CompareStringNative(cultureName, pString1, string1.Length, pString2, string2.Length, options);
}

if (result == -2)
{
throw new ArgumentException("Passed compare options are not supported");
}

return result;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,9 @@ private unsafe int CompareStringCore(ReadOnlySpan<char> string1, ReadOnlySpan<ch
#if TARGET_BROWSER
GlobalizationMode.Hybrid ?
JsCompareString(string1, string2, options) :
#elif TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
GlobalizationMode.Hybrid ?
CompareStringNative(string1, string2, options) :
#endif
IcuCompareString(string1, string2, options);

Expand Down
3 changes: 2 additions & 1 deletion src/mono/mono/mini/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ if(HAVE_SYS_ICU)
if (TARGET_DARWIN)
set(icu_shim_sources_base
${icu_shim_sources_base}
pal_locale.m)
pal_locale.m
pal_collation.m)
endif()

addprefix(icu_shim_sources "${ICU_SHIM_PATH}" "${icu_shim_sources_base}")
Expand Down
2 changes: 1 addition & 1 deletion src/native/libs/System.Globalization.Native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ else()
endif()

if (CLR_CMAKE_TARGET_APPLE)
set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m)
set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m)
endif()

# time zone names are filtered out of icu data for the browser and associated functionality is disabled
Expand Down
1 change: 1 addition & 0 deletions src/native/libs/System.Globalization.Native/entrypoints.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ static const Entry s_globalizationNative[] =
DllImportEntry(GlobalizationNative_ToUnicode)
DllImportEntry(GlobalizationNative_WindowsIdToIanaId)
#ifdef __APPLE__
DllImportEntry(GlobalizationNative_CompareStringNative)
DllImportEntry(GlobalizationNative_GetLocaleNameNative)
DllImportEntry(GlobalizationNative_GetLocaleInfoStringNative)
DllImportEntry(GlobalizationNative_GetLocaleInfoIntNative)
Expand Down
9 changes: 9 additions & 0 deletions src/native/libs/System.Globalization.Native/pal_collation.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,12 @@ PALEXPORT int32_t GlobalizationNative_GetSortKey(SortHandle* pSortHandle,
uint8_t* sortKey,
int32_t cbSortKeyLength,
int32_t options);

#ifdef __APPLE__
PALEXPORT int32_t GlobalizationNative_CompareStringNative(const char* localeName,
const char* lpStr1,
int32_t cwStr1Length,
const char* lpStr2,
int32_t cwStr2Length,
int32_t options);
#endif
127 changes: 127 additions & 0 deletions src/native/libs/System.Globalization.Native/pal_collation.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#include <stdlib.h>
#include "pal_locale_internal.h"
#include "pal_collation.h"

#import <Foundation/Foundation.h>

#if defined(TARGET_OSX) || defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS)

/*
Function:
CompareString
*/
int32_t GlobalizationNative_CompareStringNative(const char* localeName, const char* lpStr1, int32_t cwStr1Length,
const char* lpStr2, int32_t cwStr2Length, int32_t comparisonOptions)
{
NSLocale *currentLocale;
if(localeName == NULL || strlen(localeName) == 0)
{
currentLocale = [NSLocale systemLocale];
}
else
{
NSString *locName = [NSString stringWithFormat:@"%s", localeName];
currentLocale = [[NSLocale alloc] initWithLocaleIdentifier:locName];
}

NSString *firstString = [NSString stringWithCharacters: (const unichar *)lpStr1 length: cwStr1Length];
NSString *secondString = [NSString stringWithCharacters: (const unichar *)lpStr2 length: cwStr2Length];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Migrating comment


Re: passing in string lengths, could we use something like initWithUTF8String or is there a chance that lpStr1's length is less than cwStr1Length? Just not sure why we need to pass in string lengths.


Ah looking into it more, it seems like lpStr1 and lpStr2 will not be null terminated so we cannot use initWithUTF8String, it would've been nice to not have to pass extra parameters.

Given that GlobalizationNative_CompareStringNative is StringMarshalled with UTF8, it feels like there would be problems casting to const unichar * as that is UTF16, and a character represented in UTF8 might have a completely different representation in UTF16 that wouldn't align.

Are there any test cases that mightve failed because of a string marshalled into UTF8 is different in its UTF16 representation?

Maybe we should be using initWithBytes:length:encoding?
[NSString initWithBytes: lpStr1 length: cwStr1Length encoding: NSUTF8StringEncoding]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tried with initWithBytes:length:encoding?
[NSString initWithBytes: lpStr1 length: cwStr1Length encoding: NSUTF8StringEncoding]
It can't convert properly from char* C# to objective C string , result is wrong. Thanks for pointing out UTF16 , will match it in C# and C and see how test cases will behave.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you happen to know what the bytes passed to pString1 in Interop.Globalization.CompareStringNative(m_name, m_name.Length, pString1, string1.Length, pString2, string2.Length, options); look like and what the bytes in lpStr1 look like here when sticking with StringMarshalling.UTF8?
Are NSUTF8StringEncoding and the StringMarshalling.UTF8 differnt? I'm curious as to why it fails to create the correct string.

I'm not sure what the full repercussions of using UTF16 if UTF8 is the default for OSX and for all other interop on OSX, we have been using UTF8.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As for bytes I will check. But changing from StringMarshaling.UTF8 to StringMarshaling.UTF16 didn't affect test cases.

Currently in Interop.Collation.cs there are functions using UTF8 and some of them UTF16 . The ones that pass char* are using StringMarshaling.UTF16 but not sure if this is the reason https://github.com/dotnet/runtime/blob/main/src/libraries/Common/src/Interop/Interop.Collation.cs.
In OSX https://github.com/dotnet/runtime/blob/main/src/libraries/Common/src/Interop/Interop.Locale.OSX.cs so far only UTF8 is used, but also there is no case of passing char*.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked a little bit for the differences and it depends what are we passing from C# to C. In some functions we pass string, in others char*. When we need to pass char*, as char keyword occupies 2 bytes (16 bits) in the memory => UTF16 is the choice. But in my previous implementations I was only passing string (cultureName) so I chose to use StringMarshalling.Utf8 https://github.com/dotnet/runtime/blob/main/src/libraries/Common/src/Interop/Interop.Locale.OSX.cs#L10
@steveisok could you please verify if this looks correct?

NSRange string1Range = NSMakeRange(0, cwStr1Length);
NSStringCompareOptions options = 0;
switch (comparisonOptions)
{
case 0:
// 0: None
options = NSLiteralSearch;
break;
case 1:
// 1: IgnoreCase
options = NSCaseInsensitiveSearch;
break;
case 2:
// 2: IgnoreNonSpace
options = NSDiacriticInsensitiveSearch;
break;
case 3:
// 3: IgnoreNonSpace | IgnoreCase
options = NSCaseInsensitiveSearch | NSDiacriticInsensitiveSearch;
break;
case 16:
// 16: IgnoreWidth
options = NSWidthInsensitiveSearch;
break;
case 17:
// 17: IgnoreWidth | IgnoreCase
options = NSWidthInsensitiveSearch | NSCaseInsensitiveSearch;
break;
case 18:
// 18: IgnoreWidth | IgnoreNonSpace
options = NSWidthInsensitiveSearch | NSDiacriticInsensitiveSearch;
break;
case 19:
// 19: IgnoreWidth | IgnoreNonSpace | IgnoreCase
options = NSWidthInsensitiveSearch | NSDiacriticInsensitiveSearch | NSCaseInsensitiveSearch;
break;
case 4:
case 5:
case 6:
case 7:
case 8:
case 9:
case 10:
case 11:
case 12:
case 13:
case 14:
case 15:
case 20:
case 21:
case 22:
case 23:
case 24:
case 25:
case 26:
case 27:
case 28:
case 29:
case 30:
case 31:
default:
// 4: IgnoreSymbols
// 5: IgnoreSymbols | IgnoreCase
// 6: IgnoreSymbols | IgnoreNonSpace
// 7: IgnoreSymbols | IgnoreNonSpace | IgnoreCase
// 8: IgnoreKanaType
// 9: IgnoreKanaType | IgnoreCase
// 10: IgnoreKanaType | IgnoreNonSpace
// 11: IgnoreKanaType | IgnoreNonSpace | IgnoreCase
// 12: IgnoreKanaType | IgnoreSymbols
// 13: IgnoreKanaType | IgnoreCase | IgnoreSymbols
// 14: IgnoreKanaType | IgnoreSymbols | IgnoreNonSpace
// 15: IgnoreKanaType | IgnoreSymbols | IgnoreNonSpace | IgnoreCase
// 20: IgnoreWidth | IgnoreSymbols
// 21: IgnoreWidth | IgnoreSymbols | IgnoreCase
// 22: IgnoreWidth | IgnoreSymbols | IgnoreNonSpace
// 23: IgnoreWidth | IgnoreSymbols | IgnoreNonSpace | IgnoreCase
// 24: IgnoreKanaType | IgnoreWidth
// 25: IgnoreKanaType | IgnoreWidth | IgnoreCase
// 26: IgnoreKanaType | IgnoreWidth | IgnoreNonSpace
// 27: IgnoreKanaType | IgnoreWidth | IgnoreNonSpace | IgnoreCase
// 28: IgnoreKanaType | IgnoreWidth | IgnoreSymbols
// 29: IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreCase
// 30: IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace
// 31: IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace | IgnoreCase
return -2;
}

return [firstString compare:secondString
options:options
range:string1Range
locale:currentLocale];
}

#endif