diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll index 075397afdce791b..9c573c7eb49c790 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll @@ -1,97 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=COST -; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -; COST-LABEL: sel.v8i8 -; COST: Found an estimated cost of 28 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> -; CODE-LABEL: sel.v8i8 -; CODE: tbl v0.8b, { v0.16b }, v1.8b -define <8 x i8> @sel.v8i8(<8 x i8> %v0, <8 x i8> %v1) { +define <8 x i8> @sel_v8i8(<8 x i8> %v0, <8 x i8> %v1) { +; COST-LABEL: 'sel_v8i8' +; COST-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %tmp0 +; %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ret <8 x i8> %tmp0 } -; COST-LABEL: sel.v16i8 -; COST: Found an estimated cost of 60 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> -; CODE-LABEL: sel.v16i8 -; CODE: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -define <16 x i8> @sel.v16i8(<16 x i8> %v0, <16 x i8> %v1) { +define <16 x i8> @sel_v16i8(<16 x i8> %v0, <16 x i8> %v1) { +; COST-LABEL: 'sel_v16i8' +; COST-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %tmp0 +; %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ret <16 x i8> %tmp0 } -; COST-LABEL: sel.v4i16 -; COST: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> -; CODE-LABEL: sel.v4i16 -; CODE: rev32 v0.4h, v0.4h -; CODE: trn2 v0.4h, v0.4h, v1.4h -define <4 x i16> @sel.v4i16(<4 x i16> %v0, <4 x i16> %v1) { +define <4 x i16> @sel_v4i16(<4 x i16> %v0, <4 x i16> %v1) { +; COST-LABEL: 'sel_v4i16' +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %tmp0 +; %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ret <4 x i16> %tmp0 } -; COST-LABEL: sel.v8i16 -; COST: Found an estimated cost of 28 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> -; CODE-LABEL: sel.v8i16 -; CODE: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -define <8 x i16> @sel.v8i16(<8 x i16> %v0, <8 x i16> %v1) { +define <8 x i16> @sel_v8i16(<8 x i16> %v0, <8 x i16> %v1) { +; COST-LABEL: 'sel_v8i16' +; COST-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %tmp0 +; %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ret <8 x i16> %tmp0 } -; COST-LABEL: sel.v2i32 -; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> -; CODE-LABEL: sel.v2i32 -; CODE: mov v0.s[1], v1.s[1] -define <2 x i32> @sel.v2i32(<2 x i32> %v0, <2 x i32> %v1) { +define <2 x i32> @sel_v2i32(<2 x i32> %v0, <2 x i32> %v1) { +; COST-LABEL: 'sel_v2i32' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %tmp0 +; %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> ret <2 x i32> %tmp0 } -; COST-LABEL: sel.v4i32 -; COST: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> -; CODE-LABEL: sel.v4i32 -; CODE: rev64 v0.4s, v0.4s -; CODE: trn2 v0.4s, v0.4s, v1.4s -define <4 x i32> @sel.v4i32(<4 x i32> %v0, <4 x i32> %v1) { +define <4 x i32> @sel_v4i32(<4 x i32> %v0, <4 x i32> %v1) { +; COST-LABEL: 'sel_v4i32' +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %tmp0 +; %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ret <4 x i32> %tmp0 } -; COST-LABEL: sel.v2i64 -; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> -; CODE-LABEL: sel.v2i64 -; CODE: mov v0.d[1], v1.d[1] -define <2 x i64> @sel.v2i64(<2 x i64> %v0, <2 x i64> %v1) { +define <2 x i64> @sel_v2i64(<2 x i64> %v0, <2 x i64> %v1) { +; COST-LABEL: 'sel_v2i64' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %tmp0 +; %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> ret <2 x i64> %tmp0 } -; COST-LABEL: sel.v2f32 -; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> -; CODE-LABEL: sel.v2f32 -; CODE: mov v0.s[1], v1.s[1] -define <2 x float> @sel.v2f32(<2 x float> %v0, <2 x float> %v1) { +define <4 x half> @sel_v4f16(<4 x half> %v0, <4 x half> %v1) { +; COST-LABEL: 'sel_v4f16' +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %tmp0 +; + %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> + ret <4 x half> %tmp0 +} + +define <8 x half> @sel_v8f16(<8 x half> %v0, <8 x half> %v1) { +; COST-LABEL: 'sel_v8f16' +; COST-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x half> %tmp0 +; + %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> + ret <8 x half> %tmp0 +} + +define <2 x float> @sel_v2f32(<2 x float> %v0, <2 x float> %v1) { +; COST-LABEL: 'sel_v2f32' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %tmp0 +; %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> ret <2 x float> %tmp0 } -; COST-LABEL: sel.v4f32 -; COST: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> -; CODE-LABEL: sel.v4f32 -; CODE: rev64 v0.4s, v0.4s -; CODE: trn2 v0.4s, v0.4s, v1.4s -define <4 x float> @sel.v4f32(<4 x float> %v0, <4 x float> %v1) { +define <4 x float> @sel_v4f32(<4 x float> %v0, <4 x float> %v1) { +; COST-LABEL: 'sel_v4f32' +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %tmp0 +; %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ret <4 x float> %tmp0 } -; COST-LABEL: sel.v2f64 -; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> -; CODE-LABEL: sel.v2f64 -; CODE: mov v0.d[1], v1.d[1] -define <2 x double> @sel.v2f64(<2 x double> %v0, <2 x double> %v1) { +define <2 x double> @sel_v2f64(<2 x double> %v0, <2 x double> %v1) { +; COST-LABEL: 'sel_v2f64' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %tmp0 +; %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> ret <2 x double> %tmp0 } diff --git a/llvm/test/CodeGen/AArch64/shuffle-select.ll b/llvm/test/CodeGen/AArch64/shuffle-select.ll new file mode 100644 index 000000000000000..25a935f067bd6c1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/shuffle-select.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s + +define <8 x i8> @sel_v8i8(<8 x i8> %v0, <8 x i8> %v1) { +; CHECK-LABEL: sel_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b +; CHECK-NEXT: ret + %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> + ret <8 x i8> %tmp0 +} + +define <16 x i8> @sel_v16i8(<16 x i8> %v0, <16 x i8> %v1) { +; CHECK-LABEL: sel_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ret + %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> + ret <16 x i8> %tmp0 +} + +define <4 x i16> @sel_v4i16(<4 x i16> %v0, <4 x i16> %v1) { +; CHECK-LABEL: sel_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: rev32 v0.4h, v0.4h +; CHECK-NEXT: trn2 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> + ret <4 x i16> %tmp0 +} + +define <8 x i16> @sel_v8i16(<8 x i16> %v0, <8 x i16> %v1) { +; CHECK-LABEL: sel_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ret + %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> + ret <8 x i16> %tmp0 +} + +define <2 x i32> @sel_v2i32(<2 x i32> %v0, <2 x i32> %v1) { +; CHECK-LABEL: sel_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.s[1], v1.s[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> + ret <2 x i32> %tmp0 +} + +define <4 x i32> @sel_v4i32(<4 x i32> %v0, <4 x i32> %v1) { +; CHECK-LABEL: sel_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: rev64 v0.4s, v0.4s +; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> + ret <4 x i32> %tmp0 +} + +define <2 x i64> @sel_v2i64(<2 x i64> %v0, <2 x i64> %v1) { +; CHECK-LABEL: sel_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.d[1], v1.d[1] +; CHECK-NEXT: ret + %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> + ret <2 x i64> %tmp0 +} + +define <4 x half> @sel_v4f16(<4 x half> %v0, <4 x half> %v1) { +; CHECK-LABEL: sel_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: rev32 v0.4h, v0.4h +; CHECK-NEXT: trn2 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> + ret <4 x half> %tmp0 +} + +define <8 x half> @sel_v8f16(<8 x half> %v0, <8 x half> %v1) { +; CHECK-LABEL: sel_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ret + %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> + ret <8 x half> %tmp0 +} + +define <2 x float> @sel_v2f32(<2 x float> %v0, <2 x float> %v1) { +; CHECK-LABEL: sel_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.s[1], v1.s[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> + ret <2 x float> %tmp0 +} + +define <4 x float> @sel_v4f32(<4 x float> %v0, <4 x float> %v1) { +; CHECK-LABEL: sel_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: rev64 v0.4s, v0.4s +; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> + ret <4 x float> %tmp0 +} + +define <2 x double> @sel_v2f64(<2 x double> %v0, <2 x double> %v1) { +; CHECK-LABEL: sel_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.d[1], v1.d[1] +; CHECK-NEXT: ret + %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> + ret <2 x double> %tmp0 +}