@@ -841,26 +841,54 @@ BEGIN_TEST(to_hw_instr.swap_linear_vgpr)
841
841
finish_to_hw_instr_test ();
842
842
END_TEST
843
843
844
- BEGIN_TEST (to_hw_instr.pack2x16_alignbyte_constant )
844
+ BEGIN_TEST (to_hw_instr.pack2x16_constant )
845
845
PhysReg v0_lo{256 };
846
846
PhysReg v0_hi{256 };
847
+ PhysReg v1_lo{257 };
847
848
PhysReg v1_hi{257 };
848
849
v0_hi.reg_b += 2 ;
849
850
v1_hi.reg_b += 2 ;
850
851
851
- if (!setup_cs(NULL , GFX10))
852
- return ;
852
+ for (amd_gfx_level lvl : {GFX10, GFX11}) {
853
+ if (!setup_cs (NULL , lvl))
854
+ continue ;
853
855
854
- /* prevent usage of v_pack_b32_f16 */
855
- program->blocks[0 ].fp_mode.denorm16_64 = fp_denorm_flush;
856
+ /* prevent usage of v_pack_b32_f16 */
857
+ program->blocks [0 ].fp_mode .denorm16_64 = fp_denorm_flush;
856
858
857
- // >> p_unit_test 0
858
- // ! v1: %_:v[0] = v_alignbyte_b32 0x3800, %_:v[1][16:32], 2
859
- bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
860
- bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
861
- Operand (v1_hi, v2b), Operand::c16(0x3800 ));
859
+ // >> p_unit_test 0
860
+ // ! v1: %_:v[0] = v_alignbyte_b32 0x3800, %_:v[1][16:32], 2
861
+ bld.pseudo (aco_opcode::p_unit_test, Operand::zero ());
862
+ bld.pseudo (aco_opcode::p_parallelcopy, Definition (v0_lo, v2b), Definition (v0_hi, v2b),
863
+ Operand (v1_hi, v2b), Operand::c16 (0x3800 ));
862
864
863
- // ! s_endpgm
865
+ // ! p_unit_test 1
866
+ // ! v2b: %_:v[0][0:16] = v_lshrrev_b32 16, %_:v[1][16:32]
867
+ bld.pseudo (aco_opcode::p_unit_test, Operand::c32 (1 ));
868
+ bld.pseudo (aco_opcode::p_parallelcopy, Definition (v0_lo, v2b), Definition (v0_hi, v2b),
869
+ Operand (v1_hi, v2b), Operand::zero (2 ));
864
870
865
- finish_to_hw_instr_test ();
871
+ // ! p_unit_test 2
872
+ // ~gfx10! v2b: %_:v[0][0:16] = v_and_b32 0xffff, %_:v[1][0:16]
873
+ // ~gfx11! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1][0:16]
874
+ bld.pseudo (aco_opcode::p_unit_test, Operand::c32 (2 ));
875
+ bld.pseudo (aco_opcode::p_parallelcopy, Definition (v0_lo, v2b), Definition (v0_hi, v2b),
876
+ Operand (v1_lo, v2b), Operand::zero (2 ));
877
+
878
+ // ! p_unit_test 3
879
+ // ! v2b: %_:v[0][16:32] = v_and_b32 0xffff0000, %_:v[1][16:32]
880
+ bld.pseudo (aco_opcode::p_unit_test, Operand::c32 (3 ));
881
+ bld.pseudo (aco_opcode::p_parallelcopy, Definition (v0_lo, v2b), Definition (v0_hi, v2b),
882
+ Operand::zero (2 ), Operand (v1_hi, v2b));
883
+
884
+ // ! p_unit_test 4
885
+ // ! v2b: %_:v[0][16:32] = v_lshlrev_b32 16, %_:v[1][0:16]
886
+ bld.pseudo (aco_opcode::p_unit_test, Operand::c32 (4 ));
887
+ bld.pseudo (aco_opcode::p_parallelcopy, Definition (v0_lo, v2b), Definition (v0_hi, v2b),
888
+ Operand::zero (2 ), Operand (v1_lo, v2b));
889
+
890
+ // ! s_endpgm
891
+
892
+ finish_to_hw_instr_test ();
893
+ }
866
894
END_TEST
0 commit comments