Skip to content

Commit 1d27c3d

Browse files
authored
[NativeAOT] enable background GC on Unix (#74735)
* enable for unix+arm64 * include softwarewritewatch.cpp * make the unix crst recursive * enable ww on Unix x64 * enabled card bundles * comment
1 parent a5e9811 commit 1d27c3d

File tree

5 files changed

+102
-41
lines changed

5 files changed

+102
-41
lines changed

src/coreclr/nativeaot/Runtime/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ set(COMMON_RUNTIME_SOURCES
5050
${GC_DIR}/handletablecore.cpp
5151
${GC_DIR}/handletablescan.cpp
5252
${GC_DIR}/objecthandle.cpp
53+
${GC_DIR}/softwarewritewatch.cpp
5354
)
5455

5556
set(SERVER_GC_SOURCES
@@ -206,6 +207,12 @@ include_directories(${ARCH_SOURCES_DIR})
206207

207208
add_definitions(-DFEATURE_BASICFREEZE)
208209
add_definitions(-DFEATURE_CONSERVATIVE_GC)
210+
211+
if(CLR_CMAKE_TARGET_UNIX)
212+
add_definitions(-DFEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP)
213+
add_definitions(-DFEATURE_MANUALLY_MANAGED_CARD_BUNDLES)
214+
endif()
215+
209216
add_definitions(-DFEATURE_CUSTOM_IMPORTS)
210217
add_definitions(-DFEATURE_DYNAMIC_CODE)
211218
add_compile_definitions($<$<OR:$<CONFIG:Debug>,$<CONFIG:Checked>>:FEATURE_GC_STRESS>)

src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S

Lines changed: 63 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,21 @@ LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG):
8484
// we're in a debug build and write barrier checking has been enabled).
8585
UPDATE_GC_SHADOW \BASENAME, \REFREG, rdi
8686

87+
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
88+
mov r11, [C_VAR(g_write_watch_table)]
89+
cmp r11, 0x0
90+
je LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG)
91+
92+
mov r10, rdi
93+
shr r10, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift
94+
add r10, r11
95+
cmp byte ptr [r10], 0x0
96+
jne LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG)
97+
mov byte ptr [r10], 0xFF
98+
#endif
99+
100+
LOCAL_LABEL(\BASENAME\()_CheckCardTable_\REFREG):
101+
87102
// If the reference is to an object that's not in an ephemeral generation we have no need to track it
88103
// (since the object won't be collected or moved by an ephemeral collection).
89104
cmp \REFREG, [C_VAR(g_ephemeral_low)]
@@ -95,17 +110,25 @@ LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG):
95110
// track this write. The location address is translated into an offset in the card table bitmap. We set
96111
// an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
97112
// the byte if it hasn't already been done since writes are expensive and impact scaling.
98-
shr rdi, 11
99-
add rdi, [C_VAR(g_card_table)]
100-
cmp byte ptr [rdi], 0x0FF
101-
jne LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG)
102-
103-
LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG):
104-
ret
113+
shr rdi, 0x0B
114+
mov r10, [C_VAR(g_card_table)]
115+
cmp byte ptr [rdi + r10], 0x0FF
116+
je LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
105117

106118
// We get here if it's necessary to update the card table.
107-
LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG):
108-
mov byte ptr [rdi], 0x0FF
119+
mov byte ptr [rdi + r10], 0xFF
120+
121+
#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
122+
// Shift rdi by 0x0A more to get the card bundle byte (we shifted by 0x0B already)
123+
shr rdi, 0x0A
124+
add rdi, [C_VAR(g_card_bundle_table)]
125+
cmp byte ptr [rdi], 0xFF
126+
je LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
127+
128+
mov byte ptr [rdi], 0xFF
129+
#endif
130+
131+
LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG):
109132
ret
110133

111134
.endm
@@ -252,32 +275,52 @@ LEAF_ENTRY RhpByRefAssignRef, _TEXT
252275
// we're in a debug build and write barrier checking has been enabled).
253276
UPDATE_GC_SHADOW BASENAME, rcx, rdi
254277

278+
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
279+
mov r11, [C_VAR(g_write_watch_table)]
280+
cmp r11, 0x0
281+
je LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable)
282+
283+
mov r10, rdi
284+
shr r10, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift
285+
add r10, r11
286+
cmp byte ptr [r10], 0x0
287+
jne LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable)
288+
mov byte ptr [r10], 0xFF
289+
#endif
290+
291+
LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable):
292+
255293
// If the reference is to an object that's not in an ephemeral generation we have no need to track it
256294
// (since the object won't be collected or moved by an ephemeral collection).
257295
cmp rcx, [C_VAR(g_ephemeral_low)]
258296
jb LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
259297
cmp rcx, [C_VAR(g_ephemeral_high)]
260298
jae LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
261299

262-
// move current rdi value into rcx and then increment the pointers
300+
// move current rdi value into rcx, we need to keep rdi and eventually increment by 8
263301
mov rcx, rdi
264-
add rsi, 0x8
265-
add rdi, 0x8
266302

267303
// We have a location on the GC heap being updated with a reference to an ephemeral object so we must
268304
// track this write. The location address is translated into an offset in the card table bitmap. We set
269305
// an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
270306
// the byte if it hasn't already been done since writes are expensive and impact scaling.
271-
shr rcx, 11
272-
add rcx, [C_VAR(g_card_table)]
273-
cmp byte ptr [rcx], 0x0FF
274-
jne LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable)
275-
ret
307+
shr rcx, 0x0B
308+
mov r10, [C_VAR(g_card_table)]
309+
cmp byte ptr [rcx + r10], 0x0FF
310+
je LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
276311

277312
// We get here if it's necessary to update the card table.
278-
LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable):
279-
mov byte ptr [rcx], 0x0FF
280-
ret
313+
mov byte ptr [rcx + r10], 0xFF
314+
315+
#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
316+
// Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already)
317+
shr rcx, 0x0A
318+
add rcx, [C_VAR(g_card_bundle_table)]
319+
cmp byte ptr [rcx], 0xFF
320+
je LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
321+
322+
mov byte ptr [rcx], 0xFF
323+
#endif
281324

282325
LOCAL_LABEL(RhpByRefAssignRef_NotInHeap):
283326
// Increment the pointers before leaving

src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,11 @@
9292
// destReg: location to be updated
9393
// refReg: objectref to be stored
9494
// trash: register nr than can be trashed
95-
// trash2: register than can be trashed
9695
//
9796
// On exit:
9897
// destReg: trashed
9998
//
100-
.macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg, trash, trash2
99+
.macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg, trash
101100

102101
// Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
103102
// we are in a debug build and write barrier checking has been enabled).
@@ -129,27 +128,27 @@
129128
// Set this objects card, if it has not already been set.
130129

131130
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x\trash
132-
add \trash2, x\trash, \destReg, lsr #11
131+
add x17, x\trash, \destReg, lsr #11
133132

134133
// Check that this card has not already been written. Avoiding useless writes is a big win on
135134
// multi-proc systems since it avoids cache thrashing.
136-
ldrb w\trash, [\trash2]
135+
ldrb w\trash, [x17]
137136
cmp x\trash, 0xFF
138137
beq 0f
139138

140139
mov x\trash, 0xFF
141-
strb w\trash, [\trash2]
140+
strb w\trash, [x17]
142141

143142
#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
144143
// Check if we need to update the card bundle table
145144
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x\trash
146-
add \trash2, x\trash, \destReg, lsr #21
147-
ldrb w\trash, [\trash2]
145+
add x17, x\trash, \destReg, lsr #21
146+
ldrb w\trash, [x17]
148147
cmp x\trash, 0xFF
149148
beq 0f
150149

151150
mov x\trash, 0xFF
152-
strb w\trash, [\trash2]
151+
strb w\trash, [x17]
153152
#endif
154153

155154
0:
@@ -160,12 +159,11 @@
160159
// destReg: location to be updated
161160
// refReg: objectref to be stored
162161
// trash: register nr than can be trashed
163-
// trash2: register than can be trashed
164162
//
165163
// On exit:
166164
// destReg: trashed
167165
//
168-
.macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg, trash, trash2
166+
.macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg, trash
169167

170168
// The "check" of this checked write barrier - is destReg
171169
// within the heap? if no, early out.
@@ -180,7 +178,7 @@
180178
ccmp \destReg, x\trash, #0x2, hs
181179
bhs 0f
182180

183-
INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg, \trash, \trash2
181+
INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg, \trash
184182

185183
0:
186184
// Exit label
@@ -265,7 +263,7 @@ CmpXchgRetry:
265263
// The following barrier code takes the destination in x0 and the value in x1 so the arguments are
266264
// already correctly set up.
267265

268-
INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x0
266+
INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9
269267

270268
CmpXchgNoUpdate:
271269
// x10 still contains the original value.
@@ -307,7 +305,7 @@ ExchangeRetry:
307305
// The following barrier code takes the destination in x0 and the value in x1 so the arguments are
308306
// already correctly set up.
309307

310-
INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x0
308+
INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9
311309

312310
// x10 still contains the original value.
313311
mov x0, x10
@@ -321,7 +319,7 @@ LEAF_ENTRY RhpAssignRefArm64, _TEXT
321319
ALTERNATE_ENTRY RhpAssignRefX1AVLocation
322320
stlr x15, [x14]
323321

324-
INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15, 12, X14
322+
INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15, 12
325323

326324
ret
327325
LEAF_END RhpAssignRefArm64, _TEXT
@@ -343,9 +341,7 @@ LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT
343341

344342
stlr x15, [x14]
345343

346-
INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12, X15
347-
348-
add x14, x14, #8
344+
INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12
349345

350346
ret
351347
LEAF_END RhpCheckedAssignRefArm64, _TEXT
@@ -366,7 +362,7 @@ LEAF_ENTRY RhpByRefAssignRefArm64, _TEXT
366362
ldr x15, [x13]
367363
stlr x15, [x14]
368364

369-
INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12, X15
365+
INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12
370366

371367
add X13, x13, #8
372368
add x14, x14, #8

src/coreclr/nativeaot/Runtime/gcrhenv.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1018,8 +1018,8 @@ void GCToEEInterface::DiagWalkBGCSurvivors(void* gcContext)
10181018
#endif // FEATURE_EVENT_TRACE
10191019
}
10201020

1021-
#if defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) && (!defined(TARGET_ARM64) || !defined(TARGET_UNIX))
1022-
#error FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP is only implemented for ARM64 and UNIX
1021+
#if defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) && !defined(TARGET_UNIX)
1022+
#error FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP is only implemented for UNIX
10231023
#endif
10241024

10251025
void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args)

src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,22 @@ extern "C" UInt32_BOOL DuplicateHandle(
869869

870870
extern "C" UInt32_BOOL InitializeCriticalSection(CRITICAL_SECTION * lpCriticalSection)
871871
{
872-
return pthread_mutex_init(&lpCriticalSection->mutex, NULL) == 0;
872+
pthread_mutexattr_t mutexAttributes;
873+
int st = pthread_mutexattr_init(&mutexAttributes);
874+
if (st != 0)
875+
{
876+
return false;
877+
}
878+
879+
st = pthread_mutexattr_settype(&mutexAttributes, PTHREAD_MUTEX_RECURSIVE);
880+
if (st == 0)
881+
{
882+
st = pthread_mutex_init(&lpCriticalSection->mutex, &mutexAttributes);
883+
}
884+
885+
pthread_mutexattr_destroy(&mutexAttributes);
886+
887+
return (st == 0);
873888
}
874889

875890
extern "C" UInt32_BOOL InitializeCriticalSectionEx(CRITICAL_SECTION * lpCriticalSection, uint32_t arg2, uint32_t arg3)

0 commit comments

Comments
 (0)