diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 64e220772e1ba2..31b6f5cbac3ad1 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -244,6 +244,15 @@ func genAMD64() { // TODO: MXCSR register? + // Apparently, the signal handling code path in darwin kernel leaves + // the upper bits of Y registers in a dirty state, which causes + // many SSE operations (128-bit and narrower) become much slower. + // Clear the upper bits to get to a clean state. See issue #37174. + // It is safe here as Go code don't use the upper bits of Y registers. + p("#ifdef GOOS_darwin") + p("VZEROUPPER") + p("#endif") + p("PUSHQ BP") p("MOVQ SP, BP") p("// Save flags before clobbering them") diff --git a/src/runtime/preempt_amd64.s b/src/runtime/preempt_amd64.s index d50c2f3a5169c8..0f2fd7d8dd4d5b 100644 --- a/src/runtime/preempt_amd64.s +++ b/src/runtime/preempt_amd64.s @@ -4,6 +4,9 @@ #include "textflag.h" TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 + #ifdef GOOS_darwin + VZEROUPPER + #endif PUSHQ BP MOVQ SP, BP // Save flags before clobbering them