@@ -799,28 +799,81 @@ static void* GetTlsIndexObjectAddress();
799
799
800
800
#if !defined(TARGET_OSX) && defined(TARGET_UNIX) && (defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64))
801
801
extern " C" size_t GetTLSResolverAddress ();
802
- #endif // !TARGET_OSX && TARGET_UNIX && (TARGET_ARM64 || TARGET_LOONGARCH64)
803
802
804
- bool CanJITOptimizeTLSAccess ()
803
+ // Check if the resolver address retrieval code is expected. We verify the exact
804
+ // code sequence for all the instructions. However, for two instructions adrp/ldr,
805
+ // we make sure that the instruction's opcode and registers matches.
806
+ // If the resolver address retrieval code is correct, we invoke it to determine if
807
+ // it is a static or dynamic resolver. TLS optimization is enabled only for for static
808
+ // resolver. That's because for static resolver, the TP offset is same for all threads.
809
+ // For dynamic resolver, TP offset returned is for the current thread and will be
810
+ // different for the other threads.
811
+ static bool IsValidTLSResolver ()
805
812
{
806
- LIMITED_METHOD_CONTRACT;
813
+ #define READ_CODE (p, code ) \
814
+ code = (p[3 ] << 24 ) | (p[2 ] << 16 ) | (p[1 ] << 8 ) | p[0 ]; \
815
+ p += 4 ;
807
816
808
- bool optimizeThreadStaticAccess = false ;
809
- #if defined(TARGET_ARM)
810
- // Optimization is disabled for linux/windows arm
811
- #elif !defined(TARGET_WINDOWS) && defined(TARGET_X86)
812
- // Optimization is disabled for linux/x86
813
- #elif defined(TARGET_LINUX_MUSL) && defined(TARGET_ARM64)
814
- // Optimization is disabled for linux musl arm64
815
- #elif defined(TARGET_FREEBSD) && defined(TARGET_ARM64)
816
- // Optimization is disabled for FreeBSD/arm64
817
- #elif defined(FEATURE_INTERPRETER)
818
- // Optimization is disabled when interpreter may be used
819
- #elif !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_ARM64)
820
- // Optimization is enabled for linux/arm64 only for static resolver.
821
- // For static resolver, the TP offset is same for all threads.
822
- // For dynamic resolver, TP offset returned is for the current thread and
823
- // will be different for the other threads.
817
+ uint32_t code;
818
+ uint8_t * p = reinterpret_cast <uint8_t *>(&GetTLSResolverAddress);
819
+
820
+ // stp x29, x30, [sp, #-32]!
821
+ READ_CODE (p, code)
822
+ if (code != 0xA9BE7BFD )
823
+ {
824
+ return false ;
825
+ }
826
+
827
+ // mov x29, sp
828
+ READ_CODE (p, code)
829
+ if (code != 0x910003FD )
830
+ {
831
+ return false ;
832
+ }
833
+
834
+ // adrp x0, <address>
835
+ // 28:24 have 0x10 for adrp
836
+ // 4:0 should have x0
837
+ READ_CODE (p, code)
838
+ if ((code & 0x9F00001F ) != 0x90000000 )
839
+ {
840
+ return false ;
841
+ }
842
+
843
+ // ldr x0, [x0, <offet>]
844
+ READ_CODE (p, code)
845
+ // 31:24 have 0xf9 for ldr
846
+ // 23:22 have 01
847
+ // 9:5 have 0 for x0
848
+ // 4:0 have 1 for x1
849
+ if ((code & 0xFFC003FF ) != 0xF9400001 )
850
+ {
851
+ return false ;
852
+ }
853
+
854
+ // mov x0, x1
855
+ READ_CODE (p, code)
856
+ if (code != 0xAA0103E0 )
857
+ {
858
+ return false ;
859
+ }
860
+
861
+ // ldp x29, x30, [sp], #32
862
+ READ_CODE (p, code)
863
+ if (code != 0xA8C27BFD )
864
+ {
865
+ return false ;
866
+ }
867
+
868
+ // ret
869
+ READ_CODE (p, code)
870
+ if (code != 0xD65F03C0 )
871
+ {
872
+ return false ;
873
+ }
874
+
875
+ // Now invoke the code to retrieve the resolver address
876
+ // and verify if that is as expected.
824
877
uint32_t * resolverAddress = reinterpret_cast <uint32_t *>(GetTLSResolverAddress ());
825
878
int ip = 0 ;
826
879
if ((resolverAddress[ip] == 0xd503201f ) || (resolverAddress[ip] == 0xd503241f ))
@@ -837,14 +890,44 @@ bool CanJITOptimizeTLSAccess()
837
890
// ret
838
891
(resolverAddress[ip + 1 ] == 0xd65f03c0 )
839
892
)
893
+ {
894
+ return true ;
895
+ }
896
+
897
+ return false ;
898
+ }
899
+ #endif // !TARGET_OSX && TARGET_UNIX && (TARGET_ARM64 || TARGET_LOONGARCH64)
900
+
901
+ bool CanJITOptimizeTLSAccess ()
902
+ {
903
+ LIMITED_METHOD_CONTRACT;
904
+ if (g_pConfig->DisableOptimizedThreadStaticAccess ())
905
+ {
906
+ return false ;
907
+ }
908
+
909
+ bool optimizeThreadStaticAccess = false ;
910
+ #if defined(TARGET_ARM)
911
+ // Optimization is disabled for linux/windows arm
912
+ #elif !defined(TARGET_WINDOWS) && defined(TARGET_X86)
913
+ // Optimization is disabled for linux/x86
914
+ #elif defined(TARGET_LINUX_MUSL) && defined(TARGET_ARM64)
915
+ // Optimization is disabled for linux musl arm64
916
+ #elif defined(TARGET_FREEBSD) && defined(TARGET_ARM64)
917
+ // Optimization is disabled for FreeBSD/arm64
918
+ #elif defined(FEATURE_INTERPRETER)
919
+ // Optimization is disabled when interpreter may be used
920
+ #elif !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_ARM64)
921
+ bool tlsResolverValid = IsValidTLSResolver ();
922
+ if (tlsResolverValid)
840
923
{
841
924
optimizeThreadStaticAccess = true ;
842
925
#ifdef _DEBUG
843
926
if (CLRConfig::GetConfigValue (CLRConfig::EXTERNAL_AssertNotStaticTlsResolver) != 0 )
844
927
{
845
928
_ASSERTE (!" Detected static resolver in use when not expected" );
846
929
}
847
- #endif
930
+ #endif // _DEBUG
848
931
}
849
932
#elif defined(TARGET_LOONGARCH64)
850
933
// Optimization is enabled for linux/loongarch64 only for static resolver.
@@ -878,11 +961,6 @@ bool CanJITOptimizeTLSAccess()
878
961
#endif // !TARGET_OSX && TARGET_UNIX && TARGET_AMD64
879
962
#endif
880
963
881
- if (g_pConfig->DisableOptimizedThreadStaticAccess ())
882
- {
883
- optimizeThreadStaticAccess = false ;
884
- }
885
-
886
964
return optimizeThreadStaticAccess;
887
965
}
888
966
0 commit comments