diff --git a/src/hotspot/cpu/s390/assembler_s390.hpp b/src/hotspot/cpu/s390/assembler_s390.hpp index 9bb143001b944..89eb07b6b8260 100644 --- a/src/hotspot/cpu/s390/assembler_s390.hpp +++ b/src/hotspot/cpu/s390/assembler_s390.hpp @@ -1238,6 +1238,9 @@ class Assembler : public AbstractAssembler { // NOR #define VNO_ZOPC (unsigned long)(0xe7L << 40 | 0x6bL << 0) // V1 := !(V2 | V3), element size = 2**m + //NOT-XOR +#define VNX_ZOPC (unsigned long)(0xe7L << 40 | 0x6cL << 0) // V1 := !(V2 | V3), element size = 2**m + // OR #define VO_ZOPC (unsigned long)(0xe7L << 40 | 0x6aL << 0) // V1 := V2 | V3, element size = 2**m @@ -1289,6 +1292,12 @@ class Assembler : public AbstractAssembler { #define VSTRC_ZOPC (unsigned long)(0xe7L << 40 | 0x8aL << 0) // String range compare #define VISTR_ZOPC (unsigned long)(0xe7L << 40 | 0x5cL << 0) // Isolate String +#define VFA_ZOPC (unsigned long)(0xe7L << 40 | 0xE3L << 0) // V1 := V2 + V3, element size = 2**m +#define VFS_ZOPC (unsigned long)(0xe7L << 40 | 0xE2L << 0) // V1 := V2 - V3, element size = 2**m +#define VFM_ZOPC (unsigned long)(0xe7L << 40 | 0xE7L << 0) // V1 := V2 * V3, element size = 2**m +#define VFD_ZOPC (unsigned long)(0xe7L << 40 | 0xE5L << 0) // V1 := V2 / V3, element size = 2**m +#define VFSQ_ZOPC (unsigned long)(0xe7L << 40 | 0xCEL << 0) // V1 := sqrt of V2, element size = 2**m + //-------------------------------- //-- Miscellaneous Operations -- @@ -2485,6 +2494,7 @@ class Assembler : public AbstractAssembler { inline void z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3); inline void z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3); inline void z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3); + inline void z_vl(VectorRegister v1, const Address& a); // Gather/Scatter inline void z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3); @@ -2519,10 +2529,10 @@ class Assembler : public AbstractAssembler { inline void z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2); inline void z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4); - inline void z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2); - inline void z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2); - inline void z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2); - inline void z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2); + inline void z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2=Z_R0); + inline void z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2=Z_R0); + inline void z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2=Z_R0); + inline void z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2=Z_R0); inline void z_vlvgp( VectorRegister v1, Register r2, Register r3); @@ -2614,6 +2624,7 @@ class Assembler : public AbstractAssembler { inline void z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3); inline void z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3); inline void z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2); + inline void z_vst(VectorRegister v1, const Address& a); // Misc inline void z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4); @@ -2670,6 +2681,9 @@ class Assembler : public AbstractAssembler { // MULTIPLY inline void z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4); + inline void z_vmlb( VectorRegister v1, VectorRegister v2, VectorRegister v3); + inline void z_vmlhw( VectorRegister v1, VectorRegister v2, VectorRegister v3); + inline void z_vmlf( VectorRegister v1, VectorRegister v2, VectorRegister v3); inline void z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4); inline void z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4); inline void z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4); @@ -2734,6 +2748,9 @@ class Assembler : public AbstractAssembler { // NOR inline void z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3); + //NOT-XOR + inline void z_vnx( VectorRegister v1, VectorRegister v2, VectorRegister v3); + // OR inline void z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3); @@ -2890,6 +2907,30 @@ class Assembler : public AbstractAssembler { // Floatingpoint instructions // ========================== + // Add + inline void z_vfa(VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4); + inline void z_vfasb(VectorRegister v1, VectorRegister v2, VectorRegister v3); + inline void z_vfadb(VectorRegister v1, VectorRegister v2, VectorRegister v3); + + //SUB + inline void z_vfs(VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4); + inline void z_vfssb(VectorRegister v1, VectorRegister v2, VectorRegister v3); + inline void z_vfsdb(VectorRegister v1, VectorRegister v2, VectorRegister v3); + + //MUL + inline void z_vfm(VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4); + inline void z_vfmsb(VectorRegister v1, VectorRegister v2, VectorRegister v3); + inline void z_vfmdb(VectorRegister v1, VectorRegister v2, VectorRegister v3); + + //DIV + inline void z_vfd(VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4); + inline void z_vfdsb(VectorRegister v1, VectorRegister v2, VectorRegister v3); + inline void z_vfddb(VectorRegister v1, VectorRegister v2, VectorRegister v3); + + //square root + inline void z_vfsq(VectorRegister v1, VectorRegister v2, int64_t m3); + inline void z_vfsqsb(VectorRegister v1, VectorRegister v2); + inline void z_vfsqdb(VectorRegister v1, VectorRegister v2); // compare instructions inline void z_cebr(FloatRegister r1, FloatRegister r2); // compare (r1, r2) ; float diff --git a/src/hotspot/cpu/s390/assembler_s390.inline.hpp b/src/hotspot/cpu/s390/assembler_s390.inline.hpp index 126dd83ee22f4..ddd4ec8fa367b 100644 --- a/src/hotspot/cpu/s390/assembler_s390.inline.hpp +++ b/src/hotspot/cpu/s390/assembler_s390.inline.hpp @@ -778,6 +778,7 @@ inline void Assembler::z_vleb( VectorRegister v1, int64_t d2, Register x2, Reg inline void Assembler::z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VLEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); } inline void Assembler::z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VLEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); } inline void Assembler::z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VLEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); } +inline void Assembler::z_vl(VectorRegister v1, const Address& a) { z_vl(v1, a.disp(), a.indexOrR0(), a.baseOrR0()); } // Gather/Scatter inline void Assembler::z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t ix3) {emit_48(VGEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | uimm4(ix3, 32, 48)); } @@ -811,7 +812,7 @@ inline void Assembler::z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Reg inline void Assembler::z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_FW); } // load FW from VR element (index d2(b2)) into GR (logical) inline void Assembler::z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_DW); } // load DW from VR element (index d2(b2)) into GR. -inline void Assembler::z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4) {emit_48(VLVG_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); } +inline void Assembler::z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4) {emit_48(VLVG_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmaskt_32(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); } inline void Assembler::z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_BYTE); } inline void Assembler::z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_HW); } inline void Assembler::z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_FW); } @@ -907,6 +908,7 @@ inline void Assembler::z_vsteh( VectorRegister v1, int64_t d2, Register x2, Reg inline void Assembler::z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VSTEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); } inline void Assembler::z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VSTEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); } inline void Assembler::z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VSTL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); } +inline void Assembler::z_vst(VectorRegister v1, const Address& a) { z_vst(v1, a.disp(), a.indexOrR0(), a.baseOrR0()); } // Misc inline void Assembler::z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4) {emit_48(VGM_ZOPC | vreg(v1, 8) | uimm8( imm2, 16, 48) | uimm8(imm3, 24, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); } @@ -946,6 +948,8 @@ inline void Assembler::z_vacch( VectorRegister v1, VectorRegister v2, VectorReg inline void Assembler::z_vaccf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_FW); } // vector element type 'F' inline void Assembler::z_vaccg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_DW); } // vector element type 'G' inline void Assembler::z_vaccq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_QW); } // vector element type 'Q' + // + // SUB inline void Assembler::z_vs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); } @@ -964,6 +968,9 @@ inline void Assembler::z_vscbiq( VectorRegister v1, VectorRegister v2, VectorReg // MULTIPLY inline void Assembler::z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VML_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); } inline void Assembler::z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); } +inline void Assembler::z_vmlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vml(v1, v2, v3, VRET_BYTE);} // vector element type 'B' +inline void Assembler::z_vmlhw( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vml(v1, v2, v3, VRET_HW);} // vector element type 'H' +inline void Assembler::z_vmlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vml(v1, v2, v3, VRET_FW);} // vector element type 'F' inline void Assembler::z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); } inline void Assembler::z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VME_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); } inline void Assembler::z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); } @@ -1026,6 +1033,9 @@ inline void Assembler::z_vx( VectorRegister v1, VectorRegister v2, VectorReg // NOR inline void Assembler::z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); } +//NOT-XOR +inline void Assembler::z_vnx( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); } + // OR inline void Assembler::z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); } @@ -1179,12 +1189,39 @@ inline void Assembler::z_vistrbs(VectorRegister v1, VectorRegister v2) inline void Assembler::z_vistrhs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_HW, VOPRC_CCSET); } inline void Assembler::z_vistrfs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_FW, VOPRC_CCSET); } - //------------------------------- -// FLOAT INSTRUCTIONS +// Vector FLOAT INSTRUCTIONS //------------------------------- +//Add +inline void Assembler::z_vfa( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VFA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_QW, 32)); } +inline void Assembler::z_vfasb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfa(v1, v2, v3, VRET_FW); } // vector element type 'F' +inline void Assembler::z_vfadb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfa(v1, v2, v3, VRET_DW); } // vector element type 'G' +//SUB //---------------- +inline void Assembler::z_vfs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VFS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_QW, 32)); } +inline void Assembler::z_vfssb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfs(v1, v2, v3, VRET_FW); } // vector element type 'F' +inline void Assembler::z_vfsdb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfs(v1, v2, v3, VRET_DW); } // vector element type 'G' + // +//MUL +inline void Assembler::z_vfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VFM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_QW, 32)); } +inline void Assembler::z_vfmsb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfm(v1, v2, v3, VRET_FW); } // vector element type 'F' +inline void Assembler::z_vfmdb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfm(v1, v2, v3, VRET_DW); } // vector element type 'G' + +//DIV +inline void Assembler::z_vfd( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VFD_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_QW, 32)); } +inline void Assembler::z_vfdsb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfd(v1, v2, v3, VRET_FW); } // vector element type 'F' +inline void Assembler::z_vfddb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfd(v1, v2, v3, VRET_DW); } // vector element type 'G' + +// square root +//--------------- +inline void Assembler::z_vfsq( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VFSQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_FW, VRET_QW, 32)); } +inline void Assembler::z_vfsqsb( VectorRegister v1, VectorRegister v2) {z_vfsq(v1, v2, VRET_FW); } +inline void Assembler::z_vfsqdb( VectorRegister v1, VectorRegister v2) {z_vfsq(v1, v2, VRET_DW); } + +//------------------------------- +// FLOAT INSTRUCTIONS +//------------------------------- // LOAD //---------------- inline void Assembler::z_ler( FloatRegister r1, FloatRegister r2) { emit_16( LER_ZOPC | fregt(r1,8,16) | freg(r2,12,16)); } diff --git a/src/hotspot/cpu/s390/c2_globals_s390.hpp b/src/hotspot/cpu/s390/c2_globals_s390.hpp index 0192cb716baab..1de38f100f627 100644 --- a/src/hotspot/cpu/s390/c2_globals_s390.hpp +++ b/src/hotspot/cpu/s390/c2_globals_s390.hpp @@ -60,7 +60,7 @@ define_pd_global(bool, UseCISCSpill, true); define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoRegScheduling, false); -define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); // On s390x, we can clear the array with a single instruction, // so don't idealize it. define_pd_global(bool, IdealizeClearArrayNode, false); diff --git a/src/hotspot/cpu/s390/globals_s390.hpp b/src/hotspot/cpu/s390/globals_s390.hpp index df38f3133d715..8542cf4ddbddb 100644 --- a/src/hotspot/cpu/s390/globals_s390.hpp +++ b/src/hotspot/cpu/s390/globals_s390.hpp @@ -108,6 +108,9 @@ define_pd_global(intx, InitArrayShortSize, 1*BytesPerLong); /* Seems to pay off with 2 pages already. */ \ product(size_t, MVCLEThreshold, +2*(4*K), DIAGNOSTIC, \ "Threshold above which page-aligned MVCLE copy/init is used.") \ + /* special instructions */ \ + product(bool, SuperwordUseVX, false, \ + "Use Z15 Vector instructions for superword optimization.") \ \ product(bool, PreferLAoverADD, false, DIAGNOSTIC, \ "Use LA/LAY instructions over ADD instructions (z/Architecture).") \ diff --git a/src/hotspot/cpu/s390/registerSaver_s390.hpp b/src/hotspot/cpu/s390/registerSaver_s390.hpp index 97883685384ca..5c8e13daacea2 100644 --- a/src/hotspot/cpu/s390/registerSaver_s390.hpp +++ b/src/hotspot/cpu/s390/registerSaver_s390.hpp @@ -47,10 +47,10 @@ class RegisterSaver { // Boolean flags to force only argument registers to be saved. static int live_reg_save_size(RegisterSet reg_set); - static int live_reg_frame_size(RegisterSet reg_set); + static int live_reg_frame_size(RegisterSet reg_set, bool save_vectors = false); // Specify the register that should be stored as the return pc in the current frame. - static OopMap* save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc = Z_R14); - static void restore_live_registers(MacroAssembler* masm, RegisterSet reg_set); + static OopMap* save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc = Z_R14, bool save_vectors = false); + static void restore_live_registers(MacroAssembler* masm, RegisterSet reg_set, bool save_vectors = false); // Generate the OopMap (again, regs where saved before). static OopMap* generate_oop_map(MacroAssembler* masm, RegisterSet reg_set); @@ -65,11 +65,13 @@ class RegisterSaver { int_reg = 0, float_reg = 1, excluded_reg = 2, // Not saved/restored. + v_reg = 3 } RegisterType; typedef enum { reg_size = 8, half_reg_size = reg_size / 2, + v_reg_size = 16 } RegisterConstants; // Remember type, number, and VMReg. diff --git a/src/hotspot/cpu/s390/register_s390.cpp b/src/hotspot/cpu/s390/register_s390.cpp index f055a1c013441..c0840add5d6e4 100644 --- a/src/hotspot/cpu/s390/register_s390.cpp +++ b/src/hotspot/cpu/s390/register_s390.cpp @@ -26,11 +26,6 @@ #include "precompiled.hpp" #include "register_s390.hpp" - -const int ConcreteRegisterImpl::max_gpr = Register::number_of_registers * 2; -const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + - FloatRegister::number_of_registers * 2; - const char* Register::name() const { const char* names[number_of_registers] = { "Z_R0", "Z_R1", "Z_R2", "Z_R3", "Z_R4", "Z_R5", "Z_R6", "Z_R7", @@ -54,5 +49,11 @@ const char* VectorRegister::name() const { "Z_V16", "Z_V17", "Z_V18", "Z_V19", "Z_V20", "Z_V21", "Z_V22", "Z_V23", "Z_V24", "Z_V25", "Z_V26", "Z_V27", "Z_V28", "Z_V29", "Z_V30", "Z_V31" }; - return is_valid() ? names[encoding()] : "fnoreg"; + return is_valid() ? names[encoding()] : "vnoreg"; +} + +// Method to convert a FloatRegister to a VectorRegister (VectorRegister) +VectorRegister FloatRegister::to_vr() const { + if (*this == fnoreg) { return vnoreg; } + return as_VectorRegister(encoding()); } diff --git a/src/hotspot/cpu/s390/register_s390.hpp b/src/hotspot/cpu/s390/register_s390.hpp index 931e899257e92..11a5a4b4cf3a0 100644 --- a/src/hotspot/cpu/s390/register_s390.hpp +++ b/src/hotspot/cpu/s390/register_s390.hpp @@ -64,6 +64,7 @@ class Register { public: enum { number_of_registers = 16, + max_slots_per_register = 2, number_of_arg_registers = 5 }; @@ -164,12 +165,13 @@ constexpr ConditionRegister Z_CR = as_ConditionRegister(0); //========================= // The implementation of float registers for the z/Architecture. - +class VectorRegister; class FloatRegister { int _encoding; public: enum { number_of_registers = 16, + max_slots_per_register = 2, number_of_arg_registers = 4 }; @@ -192,6 +194,8 @@ class FloatRegister { constexpr bool is_nonvolatile() const { return (8 <= _encoding && _encoding <= 15); } const char* name() const; + // convert to VR + VectorRegister to_vr() const; }; inline constexpr FloatRegister as_FloatRegister(int encoding) { @@ -285,6 +289,7 @@ class VectorRegister { public: enum { number_of_registers = 32, + max_slots_per_register = 4, number_of_arg_registers = 0 }; @@ -379,21 +384,21 @@ constexpr VectorRegister Z_V31 = as_VectorRegister(31); // Need to know the total number of registers of all sorts for SharedInfo. // Define a class that exports it. - class ConcreteRegisterImpl : public AbstractRegisterImpl { public: enum { - number_of_registers = - (Register::number_of_registers + - FloatRegister::number_of_registers) - * 2 // register halves - + 1 // condition code register + max_gpr = Register::number_of_registers * Register::max_slots_per_register, + max_fpr = max_gpr + FloatRegister::number_of_registers * FloatRegister::max_slots_per_register, + max_vr = max_fpr + VectorRegister::number_of_registers * VectorRegister::max_slots_per_register, + + // A big enough number for C2: all the registers plus flags + // This number must be large enough to cover REG_COUNT (defined by c2) registers. + // There is no requirement that any ordering here matches any ordering c2 gives + // it's optoregs. + number_of_registers = max_vr + 1 // gpr/fpr/vr + flags }; - static const int max_gpr; - static const int max_fpr; }; - // Common register declarations used in assembler code. constexpr Register Z_EXC_OOP = Z_R2; constexpr Register Z_EXC_PC = Z_R3; diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index 9f4e182a9e4b7..b535e20c323d7 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -182,6 +182,165 @@ register %{ reg_def Z_F15 (SOC, SOE, Op_RegF, 15, Z_F15->as_VMReg()); reg_def Z_F15_H(SOC, SOE, Op_RegF, 99, Z_F15->as_VMReg()->next()); + reg_def Z_VR0 ( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + reg_def Z_VR0_H ( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + reg_def Z_VR0_J ( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + reg_def Z_VR0_K ( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + + reg_def Z_VR1 ( SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + reg_def Z_VR1_H ( SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + reg_def Z_VR1_J ( SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + reg_def Z_VR1_K ( SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + + reg_def Z_VR2 ( SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + reg_def Z_VR2_H ( SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + reg_def Z_VR2_J ( SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + reg_def Z_VR2_K ( SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + + reg_def Z_VR3 ( SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + reg_def Z_VR3_H ( SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + reg_def Z_VR3_J ( SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + reg_def Z_VR3_K ( SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + + reg_def Z_VR4 ( SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + reg_def Z_VR4_H ( SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + reg_def Z_VR4_J ( SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + reg_def Z_VR4_K ( SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + + reg_def Z_VR5 ( SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + reg_def Z_VR5_H ( SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + reg_def Z_VR5_J ( SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + reg_def Z_VR5_K ( SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + + reg_def Z_VR6 ( SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + reg_def Z_VR6_H ( SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + reg_def Z_VR6_J ( SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + reg_def Z_VR6_K ( SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + + reg_def Z_VR7 ( SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + reg_def Z_VR7_H ( SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + reg_def Z_VR7_J ( SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + reg_def Z_VR7_K ( SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + + reg_def Z_VR8 ( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + reg_def Z_VR8_H ( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + reg_def Z_VR8_J ( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + reg_def Z_VR8_K ( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + + reg_def Z_VR9 ( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + reg_def Z_VR9_H ( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + reg_def Z_VR9_J ( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + reg_def Z_VR9_K ( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + + reg_def Z_VR10 ( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + reg_def Z_VR10_H ( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + reg_def Z_VR10_J ( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + reg_def Z_VR10_K ( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + + reg_def Z_VR11 ( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + reg_def Z_VR11_H ( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + reg_def Z_VR11_J ( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + reg_def Z_VR11_K ( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + + reg_def Z_VR12 ( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + reg_def Z_VR12_H ( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + reg_def Z_VR12_J ( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + reg_def Z_VR12_K ( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + + reg_def Z_VR13 ( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + reg_def Z_VR13_H ( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + reg_def Z_VR13_J ( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + reg_def Z_VR13_K ( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + + reg_def Z_VR14 ( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + reg_def Z_VR14_H ( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + reg_def Z_VR14_J ( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + reg_def Z_VR14_K ( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + + reg_def Z_VR15 ( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + reg_def Z_VR15_H ( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + reg_def Z_VR15_J ( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + reg_def Z_VR15_K ( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + + reg_def Z_VR16 ( SOC, SOC, Op_RegF, 16, Z_V16->as_VMReg() ); + reg_def Z_VR16_H ( SOC, SOC, Op_RegF, 16, Z_V16->as_VMReg()->next() ); + reg_def Z_VR16_J ( SOC, SOC, Op_RegF, 16, Z_V16->as_VMReg()->next(2) ); + reg_def Z_VR16_K ( SOC, SOC, Op_RegF, 16, Z_V16->as_VMReg()->next(3) ); + + reg_def Z_VR17 ( SOC, SOC, Op_RegF, 17, Z_V17->as_VMReg() ); + reg_def Z_VR17_H ( SOC, SOC, Op_RegF, 17, Z_V17->as_VMReg()->next() ); + reg_def Z_VR17_J ( SOC, SOC, Op_RegF, 17, Z_V17->as_VMReg()->next(2) ); + reg_def Z_VR17_K ( SOC, SOC, Op_RegF, 17, Z_V17->as_VMReg()->next(3) ); + + reg_def Z_VR18 ( SOC, SOC, Op_RegF, 18, Z_V18->as_VMReg() ); + reg_def Z_VR18_H ( SOC, SOC, Op_RegF, 18, Z_V18->as_VMReg()->next() ); + reg_def Z_VR18_J ( SOC, SOC, Op_RegF, 18, Z_V18->as_VMReg()->next(2) ); + reg_def Z_VR18_K ( SOC, SOC, Op_RegF, 18, Z_V18->as_VMReg()->next(3) ); + + reg_def Z_VR19 ( SOC, SOC, Op_RegF, 19, Z_V19->as_VMReg() ); + reg_def Z_VR19_H ( SOC, SOC, Op_RegF, 19, Z_V19->as_VMReg()->next() ); + reg_def Z_VR19_J ( SOC, SOC, Op_RegF, 19, Z_V19->as_VMReg()->next(2) ); + reg_def Z_VR19_K ( SOC, SOC, Op_RegF, 19, Z_V19->as_VMReg()->next(3) ); + + reg_def Z_VR20 ( SOC, SOC, Op_RegF, 20, Z_V20->as_VMReg() ); + reg_def Z_VR20_H ( SOC, SOC, Op_RegF, 20, Z_V20->as_VMReg()->next() ); + reg_def Z_VR20_J ( SOC, SOC, Op_RegF, 20, Z_V20->as_VMReg()->next(2) ); + reg_def Z_VR20_K ( SOC, SOC, Op_RegF, 20, Z_V20->as_VMReg()->next(3) ); + + reg_def Z_VR21 ( SOC, SOC, Op_RegF, 21, Z_V21->as_VMReg() ); + reg_def Z_VR21_H ( SOC, SOC, Op_RegF, 21, Z_V21->as_VMReg()->next() ); + reg_def Z_VR21_J ( SOC, SOC, Op_RegF, 21, Z_V21->as_VMReg()->next(2) ); + reg_def Z_VR21_K ( SOC, SOC, Op_RegF, 21, Z_V21->as_VMReg()->next(3) ); + + reg_def Z_VR22 ( SOC, SOC, Op_RegF, 22, Z_V22->as_VMReg() ); + reg_def Z_VR22_H ( SOC, SOC, Op_RegF, 22, Z_V22->as_VMReg()->next() ); + reg_def Z_VR22_J ( SOC, SOC, Op_RegF, 22, Z_V22->as_VMReg()->next(2) ); + reg_def Z_VR22_K ( SOC, SOC, Op_RegF, 22, Z_V22->as_VMReg()->next(3) ); + + reg_def Z_VR23 ( SOC, SOC, Op_RegF, 23, Z_V23->as_VMReg() ); + reg_def Z_VR23_H ( SOC, SOC, Op_RegF, 23, Z_V23->as_VMReg()->next() ); + reg_def Z_VR23_J ( SOC, SOC, Op_RegF, 23, Z_V23->as_VMReg()->next(2) ); + reg_def Z_VR23_K ( SOC, SOC, Op_RegF, 23, Z_V23->as_VMReg()->next(3) ); + + reg_def Z_VR24 ( SOC, SOC, Op_RegF, 24, Z_V24->as_VMReg() ); + reg_def Z_VR24_H ( SOC, SOC, Op_RegF, 24, Z_V24->as_VMReg()->next() ); + reg_def Z_VR24_J ( SOC, SOC, Op_RegF, 24, Z_V24->as_VMReg()->next(2) ); + reg_def Z_VR24_K ( SOC, SOC, Op_RegF, 24, Z_V24->as_VMReg()->next(3) ); + + reg_def Z_VR25 ( SOC, SOC, Op_RegF, 25, Z_V25->as_VMReg() ); + reg_def Z_VR25_H ( SOC, SOC, Op_RegF, 25, Z_V25->as_VMReg()->next() ); + reg_def Z_VR25_J ( SOC, SOC, Op_RegF, 25, Z_V25->as_VMReg()->next(2) ); + reg_def Z_VR25_K ( SOC, SOC, Op_RegF, 25, Z_V25->as_VMReg()->next(3) ); + + reg_def Z_VR26 ( SOC, SOC, Op_RegF, 26, Z_V26->as_VMReg() ); + reg_def Z_VR26_H ( SOC, SOC, Op_RegF, 26, Z_V26->as_VMReg()->next() ); + reg_def Z_VR26_J ( SOC, SOC, Op_RegF, 26, Z_V26->as_VMReg()->next(2) ); + reg_def Z_VR26_K ( SOC, SOC, Op_RegF, 26, Z_V26->as_VMReg()->next(3) ); + + reg_def Z_VR27 ( SOC, SOC, Op_RegF, 27, Z_V27->as_VMReg() ); + reg_def Z_VR27_H ( SOC, SOC, Op_RegF, 27, Z_V27->as_VMReg()->next() ); + reg_def Z_VR27_J ( SOC, SOC, Op_RegF, 27, Z_V27->as_VMReg()->next(2) ); + reg_def Z_VR27_K ( SOC, SOC, Op_RegF, 27, Z_V27->as_VMReg()->next(3) ); + + reg_def Z_VR28 ( SOC, SOC, Op_RegF, 28, Z_V28->as_VMReg() ); + reg_def Z_VR28_H ( SOC, SOC, Op_RegF, 28, Z_V28->as_VMReg()->next() ); + reg_def Z_VR28_J ( SOC, SOC, Op_RegF, 28, Z_V28->as_VMReg()->next(2) ); + reg_def Z_VR28_K ( SOC, SOC, Op_RegF, 28, Z_V28->as_VMReg()->next(3) ); + + reg_def Z_VR29 ( SOC, SOC, Op_RegF, 29, Z_V29->as_VMReg() ); + reg_def Z_VR29_H ( SOC, SOC, Op_RegF, 29, Z_V29->as_VMReg()->next() ); + reg_def Z_VR29_J ( SOC, SOC, Op_RegF, 29, Z_V29->as_VMReg()->next(2) ); + reg_def Z_VR29_K ( SOC, SOC, Op_RegF, 29, Z_V29->as_VMReg()->next(3) ); + + reg_def Z_VR30 ( SOC, SOC, Op_RegF, 30, Z_V30->as_VMReg() ); + reg_def Z_VR30_H ( SOC, SOC, Op_RegF, 30, Z_V30->as_VMReg()->next() ); + reg_def Z_VR30_J ( SOC, SOC, Op_RegF, 30, Z_V30->as_VMReg()->next(2) ); + reg_def Z_VR30_K ( SOC, SOC, Op_RegF, 30, Z_V30->as_VMReg()->next(3) ); + + reg_def Z_VR31 ( SOC, SOC, Op_RegF, 31, Z_V31->as_VMReg() ); + reg_def Z_VR31_H ( SOC, SOC, Op_RegF, 31, Z_V31->as_VMReg()->next() ); + reg_def Z_VR31_J ( SOC, SOC, Op_RegF, 31, Z_V31->as_VMReg()->next(2) ); + reg_def Z_VR31_K ( SOC, SOC, Op_RegF, 31, Z_V31->as_VMReg()->next(3) ); // Special Registers @@ -195,6 +354,7 @@ register %{ reg_def Z_CR(SOC, SOC, Op_RegFlags, 0, Z_CR->as_VMReg()); // volatile + // Specify priority of register selection within phases of register // allocation. Highest priority is first. A useful heuristic is to // give registers a low priority when they are required by machine @@ -268,6 +428,41 @@ alloc_class chunk1( ); alloc_class chunk2( + Z_VR0, Z_VR0_H, Z_VR0_J, Z_VR0_K, + Z_VR1, Z_VR1_H, Z_VR1_J, Z_VR1_K, + Z_VR2, Z_VR2_H, Z_VR2_J, Z_VR2_K, + Z_VR3, Z_VR3_H, Z_VR3_J, Z_VR3_K, + Z_VR4, Z_VR4_H, Z_VR4_J, Z_VR4_K, + Z_VR5, Z_VR5_H, Z_VR5_J, Z_VR5_K, + Z_VR6, Z_VR6_H, Z_VR6_J, Z_VR6_K, + Z_VR7, Z_VR7_H, Z_VR7_J, Z_VR7_K, + Z_VR8, Z_VR8_H, Z_VR8_J, Z_VR8_K, + Z_VR9, Z_VR9_H, Z_VR9_J, Z_VR9_K, + Z_VR10, Z_VR10_H, Z_VR10_J, Z_VR10_K, + Z_VR11, Z_VR11_H, Z_VR11_J, Z_VR11_K, + Z_VR12, Z_VR12_H, Z_VR12_J, Z_VR12_K, + Z_VR13, Z_VR13_H, Z_VR13_J, Z_VR13_K, + Z_VR14, Z_VR14_H, Z_VR14_J, Z_VR14_K, + Z_VR15, Z_VR15_H, Z_VR15_J, Z_VR15_K, + Z_VR16, Z_VR16_H, Z_VR16_J, Z_VR16_K, + Z_VR17, Z_VR17_H, Z_VR17_J, Z_VR17_K, + Z_VR18, Z_VR18_H, Z_VR18_J, Z_VR18_K, + Z_VR19, Z_VR19_H, Z_VR19_J, Z_VR19_K, + Z_VR20, Z_VR20_H, Z_VR20_J, Z_VR20_K, + Z_VR21, Z_VR21_H, Z_VR21_J, Z_VR21_K, + Z_VR22, Z_VR22_H, Z_VR22_J, Z_VR22_K, + Z_VR23, Z_VR23_H, Z_VR23_J, Z_VR23_K, + Z_VR24, Z_VR24_H, Z_VR24_J, Z_VR24_K, + Z_VR25, Z_VR25_H, Z_VR25_J, Z_VR25_K, + Z_VR26, Z_VR26_H, Z_VR26_J, Z_VR26_K, + Z_VR27, Z_VR27_H, Z_VR27_J, Z_VR27_K, + Z_VR28, Z_VR28_H, Z_VR28_J, Z_VR28_K, + Z_VR29, Z_VR29_H, Z_VR29_J, Z_VR29_K, + Z_VR30, Z_VR30_H, Z_VR30_J, Z_VR30_K, + Z_VR31, Z_VR31_H, Z_VR31_J, Z_VR31_K, +); + +alloc_class chunk3( Z_CR ); @@ -540,8 +735,29 @@ reg_class z_dbl_reg( ); reg_class z_rscratch1_dbl_reg(Z_F1,Z_F1_H); +reg_class z_v_reg( + Z_VR16, Z_VR16_H, Z_VR16_J, Z_VR16_K, + Z_VR17, Z_VR17_H, Z_VR17_J, Z_VR17_K, + Z_VR18, Z_VR18_H, Z_VR18_J, Z_VR18_K, + Z_VR19, Z_VR19_H, Z_VR19_J, Z_VR19_K, + Z_VR20, Z_VR20_H, Z_VR20_J, Z_VR20_K, + Z_VR21, Z_VR21_H, Z_VR21_J, Z_VR21_K, + Z_VR22, Z_VR22_H, Z_VR22_J, Z_VR22_K, + Z_VR23, Z_VR23_H, Z_VR23_J, Z_VR23_K, + Z_VR24, Z_VR24_H, Z_VR24_J, Z_VR24_K, + Z_VR25, Z_VR25_H, Z_VR25_J, Z_VR25_K, + Z_VR26, Z_VR26_H, Z_VR26_J, Z_VR26_K, + Z_VR27, Z_VR27_H, Z_VR27_J, Z_VR27_K, + Z_VR28, Z_VR28_H, Z_VR28_J, Z_VR28_K, + Z_VR29, Z_VR29_H, Z_VR29_J, Z_VR29_K, + Z_VR30, Z_VR30_H, Z_VR30_J, Z_VR30_K, + Z_VR31, Z_VR31_H, Z_VR31_J, Z_VR31_K, +); + %} + + //----------DEFINITION BLOCK--------------------------------------------------- // Define 'name --> value' mappings to inform the ADLC of an integer valued name. // Current support includes integer values in the range [0, 0x7FFFFFFF]. @@ -962,8 +1178,8 @@ const Pipeline * MachEpilogNode::pipeline() const { //============================================================================= -// Figure out which register class each belongs in: rc_int, rc_float, rc_stack. -enum RC { rc_bad, rc_int, rc_float, rc_stack }; +// Figure out which register class each belongs in: rc_int, rc_float, rc_vector, rc_stack. +enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack }; static enum RC rc_class(OptoReg::Name reg) { // Return the register class for the given register. The given register @@ -984,7 +1200,13 @@ static enum RC rc_class(OptoReg::Name reg) { return rc_float; } + // we have 32 vector register * 4 halves + if (reg < 32+32+128) { + return rc_vector; + } + // Between float regs & stack are the flags regs. + //assert(OptoReg::is_stack(reg) || reg < 64+64+128, "blow up if spilling flags"); assert(reg >= OptoReg::stack0(), "blow up if spilling flags"); return rc_stack; } @@ -1045,7 +1267,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo "expected aligned-adjacent pairs"); // Generate spill code! - + int size = 0; if (src_lo == dst_lo && src_hi == dst_hi) { return 0; // Self copy, no move. } @@ -1059,6 +1281,42 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo const char *mnemo = NULL; unsigned long opc = 0; + if (bottom_type()->isa_vect() != NULL && ideal_reg() == Op_VecX) { + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ z_mvc(Address(Z_SP, 0, dst_offset), Address(Z_SP, 0, src_offset), 16); + } + size += 6; + } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) { + VectorRegister Rsrc = as_VectorRegister(Matcher::_regEncode[src_lo]); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ z_vst(Rsrc, + Address(Z_SP, 0, dst_offset)); + } + size += 6; + } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) { + VectorRegister Rdst = as_VectorRegister(Matcher::_regEncode[dst_lo]); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ z_vl(Rdst, + Address(Z_SP, 0, src_offset)); + } + size += 6; + } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { + VectorRegister Rsrc = as_VectorRegister(Matcher::_regEncode[src_lo]); + VectorRegister Rdst = as_VectorRegister(Matcher::_regEncode[dst_lo]); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ z_vlr(Rdst, Rsrc); + } + size += 6; + } else { + ShouldNotReachHere(); + } + return size; + } // Memory->Memory Spill. Use Z_R0 to hold the value. if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { @@ -1505,6 +1763,28 @@ bool Matcher::match_rule_supported(int opcode) { case Op_PopCountL: // PopCount supported by H/W from z/Architecture G5 (z196) on. return (UsePopCountInstruction && VM_Version::has_PopCount()); + case Op_AddVB: + case Op_AddVS: + case Op_AddVI: + case Op_AddVL: + case Op_AddVF: + case Op_AddVD: + case Op_SubVB: + case Op_SubVS: + case Op_SubVI: + case Op_SubVL: + case Op_SubVF: + case Op_SubVD: + case Op_MulVB: + case Op_MulVS: + case Op_MulVI: + case Op_MulVF: + case Op_MulVD: + case Op_DivVF: + case Op_DivVD: + case Op_SqrtVF: + case Op_SqrtVD: + return SuperwordUseVX; case Op_FmaF: case Op_FmaD: return UseFMA; @@ -1554,14 +1834,24 @@ OptoRegPair Matcher::vector_return_value(uint ideal_reg) { // Vector width in bytes. int Matcher::vector_width_in_bytes(BasicType bt) { - assert(MaxVectorSize == 8, ""); - return 8; + if (SuperwordUseVX) { + assert(MaxVectorSize == 16, ""); + return 16; + } else { + assert(MaxVectorSize == 8, ""); + return 8; + } } // Vector ideal reg. uint Matcher::vector_ideal_reg(int size) { - assert(MaxVectorSize == 8 && size == 8, ""); - return Op_RegL; + if (SuperwordUseVX) { + assert(MaxVectorSize == 16 && size == 16, ""); + return Op_VecX; + } else { + assert(MaxVectorSize == 8 && size == 8, ""); + return Op_RegL; + } } // Limits on vector size (number of elements) loaded into vector. @@ -2468,6 +2758,14 @@ ins_attrib ins_should_rematerialize(false); // Immediate Operands // Please note: // Formats are generated automatically for constants and base registers. +operand vecX() %{ + constraint(ALLOC_IN_RC(z_v_reg)); + match(VecX); + + format %{ %} + interface(REG_INTER); +%} + //---------------------------------------------- // SIGNED (shorter than INT) immediate operands @@ -7451,7 +7749,7 @@ instruct negD_reg(regD dst, regD src, flagsReg cr) %{ // Sqrt float precision instruct sqrtF_reg(regF dst, regF src) %{ - match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + match(Set dst (SqrtF src)); // CC remains unchanged. ins_cost(ALU_REG_COST); size(4); @@ -7474,7 +7772,7 @@ instruct sqrtD_reg(regD dst, regD src) %{ %} instruct sqrtF_mem(regF dst, memoryRX src) %{ - match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + match(Set dst (SqrtF src)); // CC remains unchanged. ins_cost(ALU_MEMORY_COST); size(6); @@ -10554,6 +10852,45 @@ instruct Repl4S_immm1(iRegL dst, immS_minus1 src) %{ ins_pipe(pipe_class_dummy); %} +instruct repl8S_reg_Ex(vecX dst, iRegI src) %{ + match(Set dst (Replicate src)); + predicate(n->as_Vector()->length() == 8 && + Matcher::vector_element_basic_type(n) == T_SHORT); + + size(12); + ins_encode %{ + __ z_vlvgh($dst$$VectorRegister, $src$$Register, 0); + __ z_vreph($dst$$VectorRegister, $dst$$VectorRegister, 0); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{ + match(Set dst (Replicate src)); + predicate(n->as_Vector()->length() == 8 && + Matcher::vector_element_basic_type(n) == T_SHORT); + + format %{ "VONE $dst, $src \t// replicate8S" %} + size(6); + ins_encode %{ + __ z_vone($dst$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl8S_immI0(vecX dst, immI_0 zero) %{ + match(Set dst (Replicate zero)); + predicate(n->as_Vector()->length() == 8 && + Matcher::vector_element_basic_type(n) == T_SHORT); + + format %{ "VZERO $dst, $zero \t// replicate8S" %} + size(6); + ins_encode %{ + __ z_vzero($dst$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + // Exploit rotate_then_insert, if available. // Replicate scalar int to packed int values (8 Bytes). instruct Repl2I_reg_risbg(iRegL dst, iRegI src, flagsReg cr) %{ @@ -10606,7 +10943,44 @@ instruct Repl2I_immm1(iRegL dst, immI_minus1 src) %{ ins_pipe(pipe_class_dummy); %} -// +instruct repl4I_reg_Ex(vecX dst, iRegI src) %{ + match(Set dst (Replicate src)); + predicate(n->as_Vector()->length() == 4 && + Matcher::vector_element_basic_type(n) == T_INT); + + size(12); + ins_encode %{ + __ z_vlvgf($dst$$VectorRegister, $src$$Register, 0); + __ z_vrepf($dst$$VectorRegister, $dst$$VectorRegister, 0); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl4I_immI0(vecX dst, immI_0 zero) %{ + match(Set dst (Replicate zero)); + predicate(n->as_Vector()->length() == 4 && + Matcher::vector_element_basic_type(n) == T_INT); + + format %{ "VZERO $dst, $zero \t// replicate4I" %} + size(6); + ins_encode %{ + __ z_vzero($dst$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{ + match(Set dst (Replicate src)); + predicate(n->as_Vector()->length() == 4 && + Matcher::vector_element_basic_type(n) == T_INT); + + format %{ "VONE $dst, $dst, $dst \t// replicate4I" %} + size(6); + ins_encode %{ + __ z_vone($dst$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} instruct Repl2F_reg_indirect(iRegL dst, regF src, flagsReg cr) %{ match(Set dst (Replicate src)); @@ -10670,6 +11044,139 @@ instruct Repl2F_imm0(iRegL dst, immFp0 src) %{ ins_pipe(pipe_class_dummy); %} +instruct repl4F_reg_Ex(vecX dst, regF src) %{ + match(Set dst (Replicate src)); + predicate(n->as_Vector()->length() == 4 && + Matcher::vector_element_basic_type(n) == T_FLOAT); + + format %{ "VREP $dst, $src \t// replicate4F" %} + size(6); + + ins_encode %{ + __ z_vrepf($dst$$VectorRegister, $src$$FloatRegister->to_vr(), 0); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl4F_immF0(vecX dst, immFp0 zero) %{ + match(Set dst (Replicate zero)); + predicate(n->as_Vector()->length() == 4 && + Matcher::vector_element_basic_type(n) == T_FLOAT); + + format %{ "VZERO $dst, $zero \t// replicate4F" %} + size(6); + ins_encode %{ + __ z_vzero($dst$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl2D_reg_Ex(vecX dst, regD src) %{ + match(Set dst (Replicate src)); + predicate(n->as_Vector()->length() == 2 && + Matcher::vector_element_basic_type(n) == T_DOUBLE); + + format %{ "VREP $dst, $src \t// replicate2D" %} + size(6); + + ins_encode %{ + __ z_vrepg($dst$$VectorRegister, $src$$FloatRegister->to_vr(), 0); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl2D_immD0(vecX dst, immDp0 zero) %{ + match(Set dst (Replicate zero)); + predicate(n->as_Vector()->length() == 2 && + Matcher::vector_element_basic_type(n) == T_DOUBLE); + + format %{ "VZERO $dst, $zero \t// replicate2D" %} + size(6); + ins_encode %{ + __ z_vzero($dst$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl16B_reg_Ex(vecX dst, iRegI src) %{ + match(Set dst (Replicate src)); + predicate(n->as_Vector()->length() == 16 && + Matcher::vector_element_basic_type(n) == T_BYTE); + + size(12); + ins_encode %{ + __ z_vlvgb($dst$$VectorRegister, $src$$Register, 0); + __ z_vrepb($dst$$VectorRegister, $dst$$VectorRegister, 0); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{ + match(Set dst (Replicate src)); + predicate(n->as_Vector()->length() == 16 && + Matcher::vector_element_basic_type(n) == T_BYTE); + + format %{ "VONE $dst, $src \t// replicate16B" %} + size(6); + ins_encode %{ + __ z_vone($dst$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl16B_immI0(vecX dst, immI_0 zero) %{ + match(Set dst (Replicate zero)); + predicate(n->as_Vector()->length() == 16 && + Matcher::vector_element_basic_type(n) == T_BYTE); + + format %{ "VZERO $dst, $zero \t// replicate16B" %} + size(6); + ins_encode %{ + __ z_vzero($dst$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl2L_reg_Ex(vecX dst, iRegL src) %{ + match(Set dst (Replicate src)); + predicate(n->as_Vector()->length() == 2 && + Matcher::vector_element_basic_type(n) == T_LONG); + + size(12); + ins_encode %{ + __ z_vlvgg($dst$$VectorRegister, $src$$Register, 0); + __ z_vrepg($dst$$VectorRegister, $dst$$VectorRegister, 0); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{ + match(Set dst (Replicate src)); + predicate(n->as_Vector()->length() == 2 && + Matcher::vector_element_basic_type(n) == T_LONG); + + format %{ "VONE $dst, $src \t// replicate2L" %} + size(6); + ins_encode %{ + __ z_vone($dst$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct repl2L_immI0(vecX dst, immI_0 zero) %{ + match(Set dst (Replicate zero)); + predicate(n->as_Vector()->length() == 2 && + Matcher::vector_element_basic_type(n) == T_LONG); + + format %{ "VZERO $dst, $zero \t// replicate16B" %} + size(6); + ins_encode %{ + __ z_vzero($dst$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + + // Load/Store vector // Store Aligned Packed Byte register to memory (8 Bytes). @@ -10684,6 +11191,21 @@ instruct storeA8B(memory mem, iRegL src) %{ ins_pipe(pipe_class_dummy); %} +// Store Packed Byte long register to memory +instruct storeV16(memoryRX mem, vecX src) %{ + predicate(n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "VST $mem, $src \t// store 16-byte Vector" %} + size(6); + ins_encode %{ + __ z_vst($src$$VectorRegister, + Address(reg_to_register_object($mem$$base), $mem$$index$$Register, $mem$$disp)); + %} + ins_pipe(pipe_class_dummy); +%} + instruct loadV8(iRegL dst, memory mem) %{ match(Set dst (LoadVector mem)); predicate(n->as_LoadVector()->memory_size() == 8); @@ -10695,6 +11217,21 @@ instruct loadV8(iRegL dst, memory mem) %{ ins_pipe(pipe_class_dummy); %} +// Load Aligned Packed Byte +instruct loadV16(vecX dst, memoryRX mem) %{ + predicate(n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "VL $dst, $mem \t// load 16-byte Vector" %} + size(6); + ins_encode %{ + __ z_vl($dst$$VectorRegister, + Address(reg_to_register_object($mem$$base), $mem$$index$$Register, $mem$$disp)); + %} + ins_pipe(pipe_class_dummy); +%} + // Reinterpret: only one vector size used instruct reinterpret(iRegL dst) %{ match(Set dst (VectorReinterpret dst)); @@ -10704,6 +11241,248 @@ instruct reinterpret(iRegL dst) %{ ins_pipe(pipe_class_dummy); %} +instruct reinterpretX(vecX dst) %{ + match(Set dst (VectorReinterpret dst)); + ins_cost(0); + format %{ "reinterpret $dst" %} + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_dummy); +%} +//----------Vector Arithmetic Instructions-------------------------------------- + +// Vector Addition Instructions + +instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (AddVB src1 src2)); + predicate(n->as_Vector()->length() == 16); + format %{ "VAB $dst,$src1,$src2\t// add packed16B" %} + size(6); + ins_encode %{ + __ z_vab($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (AddVS src1 src2)); + predicate(n->as_Vector()->length() == 8); + format %{ "VAH $dst,$src1,$src2\t// add packed8S" %} + size(6); + ins_encode %{ + __ z_vah($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (AddVI src1 src2)); + predicate(n->as_Vector()->length() == 4); + format %{ "VAF $dst,$src1,$src2\t// add packed4I" %} + size(6); + ins_encode %{ + __ z_vaf($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vmul16B_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (MulVB src1 src2)); + predicate(n->as_Vector()->length() == 16); + format %{ "VMLB $dst,$src1,$src2\t// mul packed16B" %} + size(6); + ins_encode %{ + __ z_vmlb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (MulVS src1 src2)); + predicate(n->as_Vector()->length() == 8); + format %{ "VMLHW $dst,$src1,$src2\t// mul packed8S" %} + size(6); + ins_encode %{ + __ z_vmlhw($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (MulVI src1 src2)); + predicate(n->as_Vector()->length() == 4); + format %{ "VMLF $dst,$src1,$src2\t// mul packed4I" %} + size(6); + ins_encode %{ + __ z_vmlf($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (AddVL src1 src2)); + predicate(n->as_Vector()->length() == 2); + format %{ "VAG $dst,$src1,$src2\t// add packed2L" %} + size(6); + ins_encode %{ + __ z_vag($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vsub416B_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (SubVB src1 src2)); + predicate(n->as_Vector()->length() == 16); + format %{ "VSB $dst,$src1,$src2\t// sub packed16B" %} + size(6); + ins_encode %{ + __ z_vsb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (SubVS src1 src2)); + predicate(n->as_Vector()->length() == 8); + format %{ "VSH $dst,$src1,$src2\t// sub packed8S" %} + size(6); + ins_encode %{ + __ z_vsh($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} +instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (SubVI src1 src2)); + predicate(n->as_Vector()->length() == 4); + format %{ "VSF $dst,$src1,$src2\t// sub packed4I" %} + size(6); + ins_encode %{ + __ z_vsf($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (SubVL src1 src2)); + predicate(n->as_Vector()->length() == 2); + format %{ "VSG $dst,$src1,$src2\t// sub packed2L" %} + size(6); + ins_encode %{ + __ z_vsg($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (AddVF src1 src2)); + predicate(n->as_Vector()->length() == 4); + format %{ "VFASB $dst,$src1,$src2\t// add packed4F" %} + size(6); + ins_encode %{ + __ z_vfasb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (AddVD src1 src2)); + predicate(n->as_Vector()->length() == 2); + format %{ "VFADB $dst,$src1,$src2\t// add packed2D" %} + size(6); + ins_encode %{ + __ z_vfadb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (SubVF src1 src2)); + predicate(n->as_Vector()->length() == 4); + format %{ "VFSSB $dst,$src1,$src2\t// sub packed4F" %} + size(6); + ins_encode %{ + __ z_vfssb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (SubVD src1 src2)); + predicate(n->as_Vector()->length() == 2); + format %{ "VFSDB $dst,$src1,$src2\t// sub packed2D" %} + size(6); + ins_encode %{ + __ z_vfsdb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (MulVF src1 src2)); + predicate(n->as_Vector()->length() == 4); + format %{ "VFMSB $dst,$src1,$src2\t// mul packed4F" %} + size(6); + ins_encode %{ + __ z_vfmsb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (MulVD src1 src2)); + predicate(n->as_Vector()->length() == 2); + format %{ "VFMDB $dst,$src1,$src2\t// mul packed2D" %} + size(6); + ins_encode %{ + __ z_vfmdb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (DivVF src1 src2)); + predicate(n->as_Vector()->length() == 4); + format %{ "VFDSB $dst,$src1,$src2\t// div packed4F" %} + size(6); + ins_encode %{ + __ z_vfdsb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (DivVD src1 src2)); + predicate(n->as_Vector()->length() == 2); + format %{ "VFDDB $dst,$src1,$src2\t// div packed2D" %} + size(6); + ins_encode %{ + __ z_vfddb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +// Vector Square Root Instructions + +instruct vsqrt4F_reg(vecX dst, vecX src) %{ + match(Set dst (SqrtVF src)); + predicate(n->as_Vector()->length() == 4); + format %{ "VFSQSB $dst,$src\t// sqrt packed4F" %} + size(6); + ins_encode %{ + __ z_vfsqsb($dst$$VectorRegister, $src$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct vsqrt2D_reg(vecX dst, vecX src) %{ + match(Set dst (SqrtVD src)); + predicate(n->as_Vector()->length() == 2); + format %{ "VFSQDB $dst,$src\t// sqrt packed2D" %} + size(6); + ins_encode %{ + __ z_vfsqdb($dst$$VectorRegister, $src$$VectorRegister); + %} + ins_pipe(pipe_class_dummy); +%} //----------POPULATION COUNT RULES-------------------------------------------- // Byte reverse diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp index ed1795cfa339f..4353159d9f2b8 100644 --- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp +++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp @@ -81,6 +81,9 @@ #define RegisterSaver_ExcludedFloatReg(regname) \ { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() } +#define RegisterSaver_LiveVReg(regname) \ + { RegisterSaver::v_reg, regname->encoding(), regname->as_VMReg() } + static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = { // Live registers which get spilled to the stack. Register positions // in this array correspond directly to the stack layout. @@ -258,6 +261,26 @@ static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = { // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer }; +static const RegisterSaver::LiveRegType RegisterSaver_LiveVRegs[] = { + // live vector registers (optional, only these ones are used by C2): + RegisterSaver_LiveVReg( Z_V16 ), + RegisterSaver_LiveVReg( Z_V17 ), + RegisterSaver_LiveVReg( Z_V18 ), + RegisterSaver_LiveVReg( Z_V19 ), + RegisterSaver_LiveVReg( Z_V20 ), + RegisterSaver_LiveVReg( Z_V21 ), + RegisterSaver_LiveVReg( Z_V22 ), + RegisterSaver_LiveVReg( Z_V23 ), + RegisterSaver_LiveVReg( Z_V24 ), + RegisterSaver_LiveVReg( Z_V25 ), + RegisterSaver_LiveVReg( Z_V26 ), + RegisterSaver_LiveVReg( Z_V27 ), + RegisterSaver_LiveVReg( Z_V28 ), + RegisterSaver_LiveVReg( Z_V29 ), + RegisterSaver_LiveVReg( Z_V30 ), + RegisterSaver_LiveVReg( Z_V31 ) +}; + int RegisterSaver::live_reg_save_size(RegisterSet reg_set) { int reg_space = -1; switch (reg_set) { @@ -272,22 +295,30 @@ int RegisterSaver::live_reg_save_size(RegisterSet reg_set) { } -int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) { - return live_reg_save_size(reg_set) + frame::z_abi_160_size; +int RegisterSaver::live_reg_frame_size(RegisterSet reg_set, bool save_vectors) { + const int vregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVRegs) / + sizeof(RegisterSaver::LiveRegType)) + : 0; + + return live_reg_save_size(reg_set) + vregstosave_num * v_reg_size + frame::z_abi_160_size; } // return_pc: Specify the register that should be stored as the return pc in the current frame. -OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) { +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc, bool save_vectors) { // Record volatile registers as callee-save values in an OopMap so // their save locations will be propagated to the caller frame's // RegisterMap during StackFrameStream construction (needed for // deoptimization; see compiledVFrame::create_stack_value). // Calculate frame size. - const int frame_size_in_bytes = live_reg_frame_size(reg_set); + const int vregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVRegs) / + sizeof(RegisterSaver::LiveRegType)) + : 0; + const int register_save_size = live_reg_save_size(reg_set) + vregstosave_num * v_reg_size; + const int frame_size_in_bytes = frame::z_abi_160_size + register_save_size; const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); - const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set); + const int register_save_offset = frame_size_in_bytes - register_save_size; // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words. OopMap* map = new OopMap(frame_size_in_slots, 0); @@ -382,6 +413,25 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg assert(first != noreg, "Should spill at least one int reg."); __ z_stmg(first, last, first_offset, Z_SP); + for (int i = 0; i < vregstosave_num; i++) { + int reg_num = RegisterSaver_LiveVRegs[i].reg_num; + //int reg_type = RegisterSaver_LiveVRegs[i].reg_type; + + __ z_vst(as_VectorRegister(reg_num), Address(Z_SP, offset)); + + map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), + RegisterSaver_LiveVRegs[i].vmreg); + map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size ) >> 2), + RegisterSaver_LiveVRegs[i].vmreg->next()); + map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 2)) >> 2), + RegisterSaver_LiveVRegs[i].vmreg->next(2)); + map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 3)) >> 2), + RegisterSaver_LiveVRegs[i].vmreg->next(3)); + offset += v_reg_size; + } + + assert(offset == frame_size_in_bytes, "consistency check"); + // And we're done. return map; } @@ -433,14 +483,22 @@ OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_se } offset += reg_size; } + assert(offset == frame_size_in_bytes, "consistency check"); return map; } // Pop the current frame and restore all the registers that we saved. -void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) { +void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set, bool save_vectors) { int offset; - const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set); + //const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set); + // Calculate frame size. + const int vregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVRegs) / + sizeof(RegisterSaver::LiveRegType)) + : 0; + const int register_save_size = live_reg_save_size(reg_set) + vregstosave_num * v_reg_size; + const int frame_size_in_bytes = frame::z_abi_160_size + register_save_size; + const int register_save_offset = frame_size_in_bytes - register_save_size; Register first = noreg; Register last = noreg; @@ -517,6 +575,16 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg assert(first != noreg, "Should spill at least one int reg."); __ z_lmg(first, last, first_offset, Z_SP); + for (int i = 0; i < vregstosave_num; i++) { + int reg_num = RegisterSaver_LiveVRegs[i].reg_num; + //int reg_type = RegisterSaver_LiveVRegs[i].reg_type; + + __ z_vl(as_VectorRegister(reg_num), Address(Z_SP, offset)); + + offset += v_reg_size; + } + + assert(offset == frame_size_in_bytes, "consistency check"); // Pop the frame. __ pop_frame(); @@ -527,14 +595,12 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg // Pop the current frame and restore the registers that might be holding a result. void RegisterSaver::restore_result_registers(MacroAssembler* masm) { - int i; - int offset; const int regstosave_num = sizeof(RegisterSaver_LiveRegs) / sizeof(RegisterSaver::LiveRegType); const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers); // Restore all result registers (ints and floats). - offset = register_save_offset; + int offset = register_save_offset; for (int i = 0; i < regstosave_num; i++, offset += reg_size) { int reg_num = RegisterSaver_LiveRegs[i].reg_num; int reg_type = RegisterSaver_LiveRegs[i].reg_type; @@ -557,6 +623,7 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) { ShouldNotReachHere(); } } + assert(offset == live_reg_frame_size(all_registers), "consistency check"); } // --------------------------------------------------------------------------- @@ -980,8 +1047,8 @@ static void gen_special_dispatch(MacroAssembler *masm, // Is the size of a vector size (in bytes) bigger than a size saved by default? // 8 bytes registers are saved by default on z/Architecture. bool SharedRuntime::is_wide_vector(int size) { - // Note, MaxVectorSize == 8 on this platform. - assert(size <= 8, "%d bytes vectors are not supported", size); + // Note, MaxVectorSize == 8/16 on this platform. + assert(size <= (SuperwordUseVX ? 16 : 8), "%d bytes vectors are not supported", size); return size > 8; } @@ -2876,8 +2943,9 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset())); } + bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); // Save registers, fpu state, and flags - map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers); + map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R14, save_vectors); if (!cause_return) { // Keep a copy of the return pc to detect if it gets modified. @@ -2909,7 +2977,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t // Pending exception case, used (sporadically) by // api/java_lang/Thread.State/index#ThreadState et al. - RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers); + RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors); // Jump to forward_exception_entry, with the issuing PC in Z_R14 // so it looks like the original nmethod called forward_exception_entry. @@ -2922,7 +2990,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t if (!cause_return) { Label no_adjust; // If our stashed return pc was modified by the runtime we avoid touching it - const int offset_of_return_pc = _z_common_abi(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers); + const int offset_of_return_pc = _z_common_abi(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors); __ z_cg(Z_R6, offset_of_return_pc, Z_SP); __ z_brne(no_adjust); @@ -2935,7 +3003,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t } // Normal exit, restore registers and exit. - RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers); + RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors); __ z_br(Z_R14); @@ -2943,7 +3011,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t masm->flush(); // Fill-out other meta info - return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize); + return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors)/wordSize); } diff --git a/src/hotspot/cpu/s390/vm_version_s390.cpp b/src/hotspot/cpu/s390/vm_version_s390.cpp index af0903884fb4f..05e1b86c816d5 100644 --- a/src/hotspot/cpu/s390/vm_version_s390.cpp +++ b/src/hotspot/cpu/s390/vm_version_s390.cpp @@ -97,7 +97,18 @@ void VM_Version::initialize() { intx cache_line_size = Dcache_lineSize(0); #ifdef COMPILER2 - MaxVectorSize = 8; + if ( get_model_index() >= 7 ) { + if (FLAG_IS_DEFAULT(SuperwordUseVX)) { + FLAG_SET_ERGO(SuperwordUseVX, true); + } + } else { + if (SuperwordUseVX) { + warning("SuperwordUseVX specified, but needs at least Z13."); + FLAG_SET_DEFAULT(SuperwordUseVX, false); + } + } + + MaxVectorSize = SuperwordUseVX ? 16 : 8; #endif if (has_PrefetchRaw()) { diff --git a/src/hotspot/cpu/s390/vmreg_s390.cpp b/src/hotspot/cpu/s390/vmreg_s390.cpp index 239b68513b96c..5bec8313a48c4 100644 --- a/src/hotspot/cpu/s390/vmreg_s390.cpp +++ b/src/hotspot/cpu/s390/vmreg_s390.cpp @@ -43,6 +43,16 @@ void VMRegImpl::set_regName() { regName[i++] = freg->name(); freg = freg->successor(); } + + VectorRegister vreg = ::as_VectorRegister(0); + for (; i < ConcreteRegisterImpl::max_vr;) { + regName[i++] = vreg->name(); + regName[i++] = vreg->name(); + regName[i++] = vreg->name(); + regName[i++] = vreg->name(); + vreg = vreg->successor(); + } + for (; i < ConcreteRegisterImpl::number_of_registers; i ++) { regName[i] = "NON-GPR-XMM"; } diff --git a/src/hotspot/cpu/s390/vmreg_s390.hpp b/src/hotspot/cpu/s390/vmreg_s390.hpp index 3dd1bd9a16cbd..9b6848a5b2120 100644 --- a/src/hotspot/cpu/s390/vmreg_s390.hpp +++ b/src/hotspot/cpu/s390/vmreg_s390.hpp @@ -35,19 +35,40 @@ inline bool is_FloatRegister() { value() < ConcreteRegisterImpl::max_fpr; } +inline bool is_VectorRegister() { + return value() >= ConcreteRegisterImpl::max_fpr && + value() < ConcreteRegisterImpl::max_vr; +} + inline Register as_Register() { assert(is_Register() && is_even(value()), "even-aligned GPR name"); - return ::as_Register(value() >> 1); + return ::as_Register(value() / Register::max_slots_per_register); } inline FloatRegister as_FloatRegister() { assert(is_FloatRegister() && is_even(value()), "must be"); - return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) + / FloatRegister::max_slots_per_register); +} + +inline VectorRegister as_VectorRegister() { + assert(is_VectorRegister() + && (value() % VectorRegister::max_slots_per_register == 0), "must be"); + return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) + / VectorRegister::max_slots_per_register); } inline bool is_concrete() { assert(is_reg(), "must be"); - return is_even(value()); + if (is_FloatRegister()) { + int base = value() - ConcreteRegisterImpl::max_gpr; + return (base % FloatRegister::max_slots_per_register) == 0; + } else if (is_VectorRegister()) { + int base = value() - ConcreteRegisterImpl::max_fpr; + return (base % VectorRegister::max_slots_per_register) == 0; + } else { + return is_even(value()); + } } #endif // CPU_S390_VMREG_S390_HPP diff --git a/src/hotspot/cpu/s390/vmreg_s390.inline.hpp b/src/hotspot/cpu/s390/vmreg_s390.inline.hpp index 593a0d480454e..22a55befcf31e 100644 --- a/src/hotspot/cpu/s390/vmreg_s390.inline.hpp +++ b/src/hotspot/cpu/s390/vmreg_s390.inline.hpp @@ -27,15 +27,21 @@ #define CPU_S390_VMREG_S390_INLINE_HPP inline VMReg Register::as_VMReg() const { - return VMRegImpl::as_VMReg(encoding() << 1); + return VMRegImpl::as_VMReg(encoding() * Register::max_slots_per_register); } inline VMReg FloatRegister::as_VMReg() const { - return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); + return VMRegImpl::as_VMReg((encoding() * FloatRegister::max_slots_per_register) + + ConcreteRegisterImpl::max_gpr); +} + +inline VMReg VectorRegister::as_VMReg() const { + return VMRegImpl::as_VMReg((encoding() * VectorRegister::max_slots_per_register) + + ConcreteRegisterImpl::max_fpr); } inline VMReg ConditionRegister::as_VMReg() const { - return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_fpr); + return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_vr); } #endif // CPU_S390_VMREG_S390_INLINE_HPP diff --git a/src/hotspot/share/adlc/output_c.cpp b/src/hotspot/share/adlc/output_c.cpp index b54e62663e752..e09f4e965c85a 100644 --- a/src/hotspot/share/adlc/output_c.cpp +++ b/src/hotspot/share/adlc/output_c.cpp @@ -2363,6 +2363,9 @@ class DefineEmitState { if (strcmp(rep_var,"$VectorRegister") == 0) return "as_VectorRegister"; if (strcmp(rep_var,"$VectorSRegister") == 0) return "as_VectorSRegister"; #endif +#if defined(S390) + if (strcmp(rep_var,"$VectorRegister") == 0) return "as_VectorRegister"; +#endif #if defined(AARCH64) if (strcmp(rep_var,"$PRegister") == 0) return "as_PRegister"; #endif diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp index b834f994df45a..ec60c85e8f200 100644 --- a/src/hotspot/share/opto/machnode.hpp +++ b/src/hotspot/share/opto/machnode.hpp @@ -135,6 +135,14 @@ class MachOper : public ResourceObj { return ::as_VectorSRegister(reg(ra_, node, idx)); } #endif +#if defined(S390) + VectorRegister as_VectorRegister(PhaseRegAlloc *ra_, const Node *node) const { + return ::as_VectorRegister(reg(ra_, node)); + } + VectorRegister as_VectorRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const { + return ::as_VectorRegister(reg(ra_, node, idx)); + } +#endif #if defined(AARCH64) PRegister as_PRegister(PhaseRegAlloc* ra_, const Node* node) const { return ::as_PRegister(reg(ra_, node)); diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp index 84d092f2ffd59..847f8dbea44c8 100644 --- a/src/hotspot/share/opto/type.cpp +++ b/src/hotspot/share/opto/type.cpp @@ -77,7 +77,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA. { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS { Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD - { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX + { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ #else // all other