Skip to content

Commit

Permalink
[Backport] 8262355: Support for AVX-512 opmask register allocation.
Browse files Browse the repository at this point in the history
Summary: 8262355: Support for AVX-512 opmask register allocation.

Test Plan: ci jtreg

Reviewed-by: JoshuaZhuwj

Issue: dragonwell-project#607
  • Loading branch information
JinZhonghui committed Oct 30, 2023
1 parent d781571 commit 630752b
Show file tree
Hide file tree
Showing 33 changed files with 1,640 additions and 243 deletions.
16 changes: 16 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2495,6 +2495,22 @@ void Assembler::kmovwl(KRegister dst, Address src) {
emit_operand((Register)dst, src);
}

void Assembler::kmovwl(Address dst, KRegister src) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x91);
emit_operand((Register)src, dst);
}

void Assembler::kmovwl(KRegister dst, KRegister src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int16((unsigned char)0x90, (0xC0 | encode));
}

void Assembler::kmovdl(KRegister dst, Register src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1494,6 +1494,8 @@ class Assembler : public AbstractAssembler {
void kmovwl(KRegister dst, Register src);
void kmovwl(KRegister dst, Address src);
void kmovwl(Register dst, KRegister src);
void kmovwl(Address dst, KRegister src);
void kmovwl(KRegister dst, KRegister src);
void kmovdl(KRegister dst, Register src);
void kmovdl(Register dst, KRegister src);
void kmovql(KRegister dst, KRegister src);
Expand Down
17 changes: 9 additions & 8 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -988,10 +988,11 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg
reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
}

void C2_MacroAssembler::genmask(Register dst, Register len, Register temp) {
void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
// assert(ArrayCopyPartialInlineSize <= 64,""); Not full 8252848 merged
mov64(dst, -1L);
bzhiq(dst, dst, len);
mov64(temp, -1L);
bzhiq(temp, temp, len);
kmovql(dst, temp);
}
#endif // _LP64

Expand Down Expand Up @@ -1249,7 +1250,8 @@ void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask
}
}

void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1, XMMRegister vtmp2, KRegister mask) {
switch(vlen) {
case 4:
assert(vtmp1 != xnoreg, "required.");
Expand Down Expand Up @@ -1287,14 +1289,13 @@ void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegist
break;
case 64:
{
KRegister ktemp = k2; // Use a hardcoded temp due to no k register allocation.
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
evpcmpeqb(ktemp, src1, src2, Assembler::AVX_512bit);
evpcmpeqb(mask, src1, src2, Assembler::AVX_512bit);
if (bt == BoolTest::ne) {
ktestql(ktemp, ktemp);
ktestql(mask, mask);
} else {
assert(bt == BoolTest::overflow, "required");
kortestql(ktemp, ktemp);
kortestql(mask, mask);
}
}
break;
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@

// vector test
void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg);
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg);

// blend
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
Expand All @@ -93,7 +93,7 @@
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void genmask(Register dst, Register len, Register temp);
void genmask(KRegister dst, Register len, Register temp);
#endif // _LP64

// dst = reduce(op, src2) using vtmp as temps
Expand Down
35 changes: 32 additions & 3 deletions src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -150,7 +150,6 @@ void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
// Call VM
call_vm(masm, ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), dst, scratch);

// Restore registers
__ movdqu(xmm0, Address(rsp, xmm_size * 0));
__ movdqu(xmm1, Address(rsp, xmm_size * 1));
__ movdqu(xmm2, Address(rsp, xmm_size * 2));
Expand Down Expand Up @@ -395,6 +394,7 @@ class ZSaveLiveRegisters {

MacroAssembler* const _masm;
GrowableArray<Register> _gp_registers;
GrowableArray<KRegister> _opmask_registers;
GrowableArray<XMMRegisterData> _xmm_registers;
int _spill_size;
int _spill_offset;
Expand Down Expand Up @@ -451,11 +451,21 @@ class ZSaveLiveRegisters {
__ movq(Address(rsp, _spill_offset), reg);
}

void opmask_register_save(KRegister reg) {
_spill_offset -= 8;
__ kmovql(Address(rsp, _spill_offset), reg);
}

void gp_register_restore(Register reg) {
__ movq(reg, Address(rsp, _spill_offset));
_spill_offset += 8;
}

void opmask_register_restore(KRegister reg) {
__ kmovql(reg, Address(rsp, _spill_offset));
_spill_offset += 8;
}

void initialize(ZLoadBarrierStubC2* stub) {
// Create mask of caller saved registers that need to
// be saved/restored if live
Expand All @@ -478,6 +488,7 @@ class ZSaveLiveRegisters {
}

int gp_spill_size = 0;
int opmask_spill_size = 0;
int xmm_spill_size = 0;

// Record registers that needs to be saved/restored
Expand All @@ -492,6 +503,13 @@ class ZSaveLiveRegisters {
_gp_registers.append(vm_reg->as_Register());
gp_spill_size += 8;
}
} else if (vm_reg->is_KRegister()) {
// All opmask registers are caller saved, thus spill the ones
// which are live.
if (_opmask_registers.find(vm_reg->as_KRegister()) == -1) {
_opmask_registers.append(vm_reg->as_KRegister());
opmask_spill_size += 8;
}
} else if (vm_reg->is_XMMRegister()) {
// We encode in the low order 4 bits of the opto_reg, how large part of the register is live
const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
Expand Down Expand Up @@ -519,13 +537,14 @@ class ZSaveLiveRegisters {
_xmm_registers.sort(xmm_compare_register_size);

// Stack pointer must be 16 bytes aligned for the call
_spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size, 16);
_spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + opmask_spill_size, 16);
}

public:
ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
_masm(masm),
_gp_registers(),
_opmask_registers(),
_xmm_registers(),
_spill_size(0),
_spill_offset(0) {
Expand Down Expand Up @@ -575,9 +594,19 @@ class ZSaveLiveRegisters {
for (int i = 0; i < _gp_registers.length(); i++) {
gp_register_save(_gp_registers.at(i));
}

// Save opmask registers
for (int i = 0; i < _opmask_registers.length(); i++) {
opmask_register_save(_opmask_registers.at(i));
}
}

~ZSaveLiveRegisters() {
// Restore opmask registers
for (int i = _opmask_registers.length() - 1; i >= 0; i--) {
opmask_register_restore(_opmask_registers.at(i));
}

// Restore general purpose registers
for (int i = _gp_registers.length() - 1; i >= 0; i--) {
gp_register_restore(_gp_registers.at(i));
Expand Down
Loading

0 comments on commit 630752b

Please sign in to comment.