Skip to content

Commit 7a27724

Browse files
authored
x64: convert more logical operations (#10753)
* x64: convert more logical operations This uses the new assembler for packed operations like `pand`, `pandn`, `por`, and `pxor`. * review: remove unnecessary `is_xmm_mem`
1 parent 0e05566 commit 7a27724

21 files changed

+55
-81
lines changed

cranelift/assembler-x64/meta/src/instructions/and.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,5 +45,7 @@ pub fn list() -> Vec<Inst> {
4545
inst("andpd", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x54]).r(), _64b | compat | sse2),
4646
inst("andnps", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x0F, 0x55]).r(), _64b | compat | sse),
4747
inst("andnpd", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x55]).r(), _64b | compat | sse2),
48+
inst("pand", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xDB]).r(), _64b | compat | sse2),
49+
inst("pandn", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xDF]).r(), _64b | compat | sse2),
4850
]
4951
}

cranelift/assembler-x64/meta/src/instructions/or.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,6 @@ pub fn list() -> Vec<Inst> {
3636
// Vector instructions.
3737
inst("orps", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x0F, 0x56]).r(), _64b | compat | sse),
3838
inst("orpd", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x56]).r(), _64b | compat | sse2),
39+
inst("por", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xEB]).r(), _64b | compat | sse2),
3940
]
4041
}

cranelift/assembler-x64/meta/src/instructions/xor.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,6 @@ pub fn list() -> Vec<Inst> {
3636
// Vector instructions.
3737
inst("xorps", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x0F, 0x57]).r(), _64b | compat | sse),
3838
inst("xorpd", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x57]).r(), _64b | compat | sse2),
39+
inst("pxor", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xEF]).r(), _64b | compat | sse2),
3940
]
4041
}

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -926,8 +926,6 @@
926926
Packusdw
927927
Packuswb
928928
Palignr
929-
Pand
930-
Pandn
931929
Pavgb
932930
Pavgw
933931
Pblendvb
@@ -980,7 +978,6 @@
980978
Pmulld
981979
Pmullw
982980
Pmuludq
983-
Por
984981
Pshufb
985982
Pshufd
986983
Psllw
@@ -996,7 +993,6 @@
996993
Punpckhwd
997994
Punpcklbw
998995
Punpcklwd
999-
Pxor
1000996
Rcpss
1001997
Roundps
1002998
Roundpd
@@ -3575,11 +3571,10 @@
35753571

35763572
;; Helper for creating `pand` instructions.
35773573
(decl x64_pand (Xmm XmmMem) Xmm)
3578-
(rule 0 (x64_pand src1 src2)
3579-
(xmm_rm_r (SseOpcode.Pand) src1 src2))
35803574
(rule 1 (x64_pand src1 src2)
35813575
(if-let true (use_avx))
35823576
(xmm_rmir_vex (AvxOpcode.Vpand) src1 src2))
3577+
(rule 0 (x64_pand src1 src2) (x64_pand_a src1 src2))
35833578

35843579
;; Helper for creating `andps` instructions.
35853580
(decl x64_andps (Xmm XmmMem) Xmm)
@@ -3597,11 +3592,10 @@
35973592

35983593
;; Helper for creating `por` instructions.
35993594
(decl x64_por (Xmm XmmMem) Xmm)
3600-
(rule 0 (x64_por src1 src2)
3601-
(xmm_rm_r (SseOpcode.Por) src1 src2))
36023595
(rule 1 (x64_por src1 src2)
36033596
(if-let true (use_avx))
36043597
(xmm_rmir_vex (AvxOpcode.Vpor) src1 src2))
3598+
(rule 0 (x64_por src1 src2) (x64_por_a src1 src2))
36053599

36063600
;; Helper for creating `orps` instructions.
36073601
(decl x64_orps (Xmm XmmMem) Xmm)
@@ -3619,11 +3613,10 @@
36193613

36203614
;; Helper fxor creating `pxor` instructions.
36213615
(decl x64_pxor (Xmm XmmMem) Xmm)
3622-
(rule 0 (x64_pxor src1 src2)
3623-
(xmm_rm_r (SseOpcode.Pxor) src1 src2))
36243616
(rule 1 (x64_pxor src1 src2)
36253617
(if-let true (use_avx))
36263618
(xmm_rmir_vex (AvxOpcode.Vpxor) src1 src2))
3619+
(rule 0 (x64_pxor src1 src2) (x64_pxor_a src1 src2))
36273620

36283621
;; Helper fxor creating `xorps` instructions.
36293622
(decl x64_xorps (Xmm XmmMem) Xmm)
@@ -3783,25 +3776,24 @@
37833776

37843777
;; Helper for creating `pandn` instructions.
37853778
(decl x64_pandn (Xmm XmmMem) Xmm)
3786-
(rule 0 (x64_pandn src1 src2)
3787-
(xmm_rm_r (SseOpcode.Pandn) src1 src2))
37883779
(rule 1 (x64_pandn src1 src2)
37893780
(if-let true (use_avx))
37903781
(xmm_rmir_vex (AvxOpcode.Vpandn) src1 src2))
3782+
(rule 0 (x64_pandn src1 src2) (x64_pandn_a src1 src2))
37913783

37923784
;; Helper for creating `addss` instructions.
37933785
(decl x64_addss (Xmm XmmMem) Xmm)
37943786
(rule 1 (x64_addss src1 src2)
37953787
(if-let true (use_avx))
37963788
(xmm_rmir_vex (AvxOpcode.Vaddss) src1 src2))
3797-
(rule (x64_addss src1 src2) (x64_addss_a src1 src2))
3789+
(rule 0 (x64_addss src1 src2) (x64_addss_a src1 src2))
37983790

37993791
;; Helper for creating `addsd` instructions.
38003792
(decl x64_addsd (Xmm XmmMem) Xmm)
38013793
(rule 1 (x64_addsd src1 src2)
38023794
(if-let true (use_avx))
38033795
(xmm_rmir_vex (AvxOpcode.Vaddsd) src1 src2))
3804-
(rule (x64_addsd src1 src2) (x64_addsd_a src1 src2))
3796+
(rule 0 (x64_addsd src1 src2) (x64_addsd_a src1 src2))
38053797

38063798
;; Helper for creating `addps` instructions.
38073799
(decl x64_addps (Xmm XmmMem) Xmm)
@@ -3822,14 +3814,14 @@
38223814
(rule 1 (x64_subss src1 src2)
38233815
(if-let true (use_avx))
38243816
(xmm_rmir_vex (AvxOpcode.Vsubss) src1 src2))
3825-
(rule (x64_subss src1 src2) (x64_subss_a src1 src2))
3817+
(rule 0 (x64_subss src1 src2) (x64_subss_a src1 src2))
38263818

38273819
;; Helper for creating `subsd` instructions.
38283820
(decl x64_subsd (Xmm XmmMem) Xmm)
38293821
(rule 1 (x64_subsd src1 src2)
38303822
(if-let true (use_avx))
38313823
(xmm_rmir_vex (AvxOpcode.Vsubsd) src1 src2))
3832-
(rule (x64_subsd src1 src2) (x64_subsd_a src1 src2))
3824+
(rule 0 (x64_subsd src1 src2) (x64_subsd_a src1 src2))
38333825

38343826
;; Helper for creating `subps` instructions.
38353827
(decl x64_subps (Xmm XmmMem) Xmm)

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,8 +1001,6 @@ pub enum SseOpcode {
10011001
Packusdw,
10021002
Packuswb,
10031003
Palignr,
1004-
Pand,
1005-
Pandn,
10061004
Pavgb,
10071005
Pavgw,
10081006
Pblendvb,
@@ -1055,7 +1053,6 @@ pub enum SseOpcode {
10551053
Pmulld,
10561054
Pmullw,
10571055
Pmuludq,
1058-
Por,
10591056
Pshufb,
10601057
Pshufd,
10611058
Psllw,
@@ -1071,7 +1068,6 @@ pub enum SseOpcode {
10711068
Punpckhwd,
10721069
Punpcklbw,
10731070
Punpcklwd,
1074-
Pxor,
10751071
Rcpss,
10761072
Roundps,
10771073
Roundpd,
@@ -1164,8 +1160,6 @@ impl SseOpcode {
11641160
| SseOpcode::Packssdw
11651161
| SseOpcode::Packsswb
11661162
| SseOpcode::Packuswb
1167-
| SseOpcode::Pand
1168-
| SseOpcode::Pandn
11691163
| SseOpcode::Pavgb
11701164
| SseOpcode::Pavgw
11711165
| SseOpcode::Pcmpeqb
@@ -1186,7 +1180,6 @@ impl SseOpcode {
11861180
| SseOpcode::Pmulhuw
11871181
| SseOpcode::Pmullw
11881182
| SseOpcode::Pmuludq
1189-
| SseOpcode::Por
11901183
| SseOpcode::Pshufd
11911184
| SseOpcode::Psllw
11921185
| SseOpcode::Pslld
@@ -1200,7 +1193,6 @@ impl SseOpcode {
12001193
| SseOpcode::Punpckhwd
12011194
| SseOpcode::Punpcklbw
12021195
| SseOpcode::Punpcklwd
1203-
| SseOpcode::Pxor
12041196
| SseOpcode::Sqrtpd
12051197
| SseOpcode::Sqrtsd
12061198
| SseOpcode::Ucomisd
@@ -1357,8 +1349,6 @@ impl fmt::Debug for SseOpcode {
13571349
SseOpcode::Packusdw => "packusdw",
13581350
SseOpcode::Packuswb => "packuswb",
13591351
SseOpcode::Palignr => "palignr",
1360-
SseOpcode::Pand => "pand",
1361-
SseOpcode::Pandn => "pandn",
13621352
SseOpcode::Pavgb => "pavgb",
13631353
SseOpcode::Pavgw => "pavgw",
13641354
SseOpcode::Pblendvb => "pblendvb",
@@ -1411,7 +1401,6 @@ impl fmt::Debug for SseOpcode {
14111401
SseOpcode::Pmulld => "pmulld",
14121402
SseOpcode::Pmullw => "pmullw",
14131403
SseOpcode::Pmuludq => "pmuludq",
1414-
SseOpcode::Por => "por",
14151404
SseOpcode::Pshufb => "pshufb",
14161405
SseOpcode::Pshufd => "pshufd",
14171406
SseOpcode::Psllw => "psllw",
@@ -1427,7 +1416,6 @@ impl fmt::Debug for SseOpcode {
14271416
SseOpcode::Punpckhwd => "punpckhwd",
14281417
SseOpcode::Punpcklbw => "punpcklbw",
14291418
SseOpcode::Punpcklwd => "punpcklwd",
1430-
SseOpcode::Pxor => "pxor",
14311419
SseOpcode::Rcpss => "rcpss",
14321420
SseOpcode::Roundps => "roundps",
14331421
SseOpcode::Roundpd => "roundpd",

cranelift/codegen/src/isa/x64/inst/emit.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2194,8 +2194,6 @@ pub(crate) fn emit(
21942194
SseOpcode::Packusdw => (LegacyPrefixes::_66, 0x0F382B, 3),
21952195
SseOpcode::Packuswb => (LegacyPrefixes::_66, 0x0F67, 2),
21962196
SseOpcode::Pmaddubsw => (LegacyPrefixes::_66, 0x0F3804, 3),
2197-
SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
2198-
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
21992197
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
22002198
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
22012199
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
@@ -2226,7 +2224,6 @@ pub(crate) fn emit(
22262224
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
22272225
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
22282226
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
2229-
SseOpcode::Por => (LegacyPrefixes::_66, 0x0FEB, 2),
22302227
SseOpcode::Pshufb => (LegacyPrefixes::_66, 0x0F3800, 3),
22312228
SseOpcode::Punpckhbw => (LegacyPrefixes::_66, 0x0F68, 2),
22322229
SseOpcode::Punpckhwd => (LegacyPrefixes::_66, 0x0F69, 2),
@@ -2236,7 +2233,6 @@ pub(crate) fn emit(
22362233
SseOpcode::Punpcklqdq => (LegacyPrefixes::_66, 0x0F6C, 2),
22372234
SseOpcode::Punpckhdq => (LegacyPrefixes::_66, 0x0F6A, 2),
22382235
SseOpcode::Punpckhqdq => (LegacyPrefixes::_66, 0x0F6D, 2),
2239-
SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
22402236
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
22412237
SseOpcode::Unpckhps => (LegacyPrefixes::None, 0x0F15, 2),
22422238
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10, 2),

cranelift/codegen/src/isa/x64/inst/emit_tests.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3348,12 +3348,6 @@ fn test_x64_emit() {
33483348
"pminud %xmm2, %xmm3, %xmm2",
33493349
));
33503350

3351-
insns.push((
3352-
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
3353-
"66410FEFD3",
3354-
"pxor %xmm2, %xmm11, %xmm2",
3355-
));
3356-
33573351
insns.push((
33583352
Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2),
33593353
"66410F3800D3",

cranelift/filetests/filetests/isa/x64/fcvt.clif

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,9 +1074,9 @@ block0(v0: f32x4):
10741074
; subps %xmm7, %xmm0
10751075
; cmpps $2, %xmm7, %xmm0, %xmm7
10761076
; cvttps2dq %xmm0, %xmm0
1077-
; pxor %xmm0, %xmm7, %xmm0
1077+
; pxor %xmm7, %xmm0
10781078
; uninit %xmm1
1079-
; pxor %xmm1, %xmm1, %xmm1
1079+
; pxor %xmm1, %xmm1
10801080
; pmaxsd %xmm0, %xmm1, %xmm0
10811081
; paddd %xmm6, %xmm0
10821082
; movq %rbp, %rsp
@@ -1118,12 +1118,12 @@ block0(v0: f32x4):
11181118
; movdqa %xmm0, %xmm4
11191119
; cmpps $0, %xmm4, %xmm0, %xmm4
11201120
; andps %xmm4, %xmm0
1121-
; pxor %xmm4, %xmm0, %xmm4
1121+
; pxor %xmm0, %xmm4
11221122
; cvttps2dq %xmm0, %xmm1
11231123
; movdqa %xmm1, %xmm0
1124-
; pand %xmm0, %xmm4, %xmm0
1124+
; pand %xmm4, %xmm0
11251125
; psrad %xmm0, $31, %xmm0
1126-
; pxor %xmm0, %xmm1, %xmm0
1126+
; pxor %xmm1, %xmm0
11271127
; movq %rbp, %rsp
11281128
; popq %rbp
11291129
; ret
@@ -1157,12 +1157,12 @@ block0(v0: i64x2):
11571157
; movq %rsp, %rbp
11581158
; block0:
11591159
; movdqa %xmm0, %xmm7
1160-
; pand %xmm7, const(0), %xmm7
1160+
; pand (%rip), %xmm7
11611161
; movdqa %xmm7, %xmm1
1162-
; por %xmm1, const(1), %xmm1
1162+
; por (%rip), %xmm1
11631163
; movdqa %xmm1, %xmm7
11641164
; psrlq %xmm0, $32, %xmm0
1165-
; por %xmm0, const(2), %xmm0
1165+
; por (%rip), %xmm0
11661166
; subpd (%rip), %xmm0
11671167
; movdqa %xmm0, %xmm1
11681168
; movdqa %xmm7, %xmm0

cranelift/filetests/filetests/isa/x64/shuffle.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ block0(v0: i8x16, v1: i8x16):
626626
; movq %rsp, %rbp
627627
; block0:
628628
; uninit %xmm4
629-
; pxor %xmm4, %xmm4, %xmm4
629+
; pxor %xmm4, %xmm4
630630
; pshufb %xmm0, %xmm4, %xmm0
631631
; movq %rbp, %rsp
632632
; popq %rbp

cranelift/filetests/filetests/isa/x64/simd-bitselect.clif

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,12 @@ block0(v0: i8x16, v1: i8x16, v2: i32x4):
8282
; pushq %rbp
8383
; movq %rsp, %rbp
8484
; block0:
85-
; pand %xmm0, %xmm2, %xmm0
85+
; pand %xmm2, %xmm0
8686
; movdqa %xmm0, %xmm7
87-
; pandn %xmm2, %xmm1, %xmm2
87+
; pandn %xmm1, %xmm2
8888
; movdqa %xmm7, %xmm1
8989
; movdqa %xmm2, %xmm0
90-
; por %xmm0, %xmm1, %xmm0
90+
; por %xmm1, %xmm0
9191
; movq %rbp, %rsp
9292
; popq %rbp
9393
; ret
@@ -216,9 +216,9 @@ block0(v0: i8x16, v1: i8x16):
216216
; movdqa %xmm0, %xmm7
217217
; movdqu const(0), %xmm0
218218
; movdqa %xmm7, %xmm2
219-
; pand %xmm2, %xmm0, %xmm2
220-
; pandn %xmm0, %xmm1, %xmm0
221-
; por %xmm0, %xmm2, %xmm0
219+
; pand %xmm0, %xmm2
220+
; pandn %xmm1, %xmm0
221+
; por %xmm2, %xmm0
222222
; movq %rbp, %rsp
223223
; popq %rbp
224224
; ret

0 commit comments

Comments
 (0)