diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 46021c9c17feac..d7b47cd2e2ee26 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -624,6 +624,9 @@ X86 Support - All intrinsics in tbmintrin.h can now be used in constant expressions. +- Supported intrinsics for ``MOVRS AND AVX10.2``. + * Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``. + Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def index 2c591edb2835cd..e1e613560167ac 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.def +++ b/clang/include/clang/Basic/BuiltinsX86_64.def @@ -161,6 +161,20 @@ TARGET_BUILTIN(__builtin_ia32_aand64, "vv*SOi", "n", "raoint") TARGET_BUILTIN(__builtin_ia32_aor64, "vv*SOi", "n", "raoint") TARGET_BUILTIN(__builtin_ia32_axor64, "vv*SOi", "n", "raoint") +// MOVRS and AVX10.2 +TARGET_BUILTIN(__builtin_ia32_vmovrsb128, "V16cV16cC*", "nV:128:", "movrs,avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmovrsb256, "V32cV32cC*", "nV:256:", "movrs,avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmovrsb512, "V64cV64cC*", "nV:512:", "movrs,avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vmovrsd128, "V4iV4iC*", "nV:128:", "movrs,avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmovrsd256, "V8iV8iC*", "nV:256:", "movrs,avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmovrsd512, "V16iV16iC*", "nV:512:", "movrs,avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vmovrsq128, "V2OiV2OiC*", "nV:128:", "movrs,avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmovrsq256, "V4OiV4OiC*", "nV:256:", "movrs,avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmovrsq512, "V8OiV8OiC*", "nV:512:", "movrs,avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vmovrsw128, "V8sV8sC*", "nV:128:", "movrs,avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmovrsw256, "V16sV16sC*", "nV:256:", "movrs,avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vmovrsw512, "V32sV32sC*", "nV:512:", "movrs,avx10.2-512") + #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2ddb2f5312148e..62603969eaf8c3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6424,6 +6424,8 @@ def mmovdiri : Flag<["-"], "mmovdiri">, Group; def mno_movdiri : Flag<["-"], "mno-movdiri">, Group; def mmovdir64b : Flag<["-"], "mmovdir64b">, Group; def mno_movdir64b : Flag<["-"], "mno-movdir64b">, Group; +def mmovrs : Flag<["-"], "mmovrs">, Group; +def mno_movrs : Flag<["-"], "mno-movrs">, Group; def mmwaitx : Flag<["-"], "mmwaitx">, Group; def mno_mwaitx : Flag<["-"], "mno-mwaitx">, Group; def mpku : Flag<["-"], "mpku">, Group; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 5448bd841959f4..d067ec218b5270 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -348,6 +348,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasSM4 = true; } else if (Feature == "+movbe") { HasMOVBE = true; + } else if (Feature == "+movrs") { + HasMOVRS = true; } else if (Feature == "+sgx") { HasSGX = true; } else if (Feature == "+cx8") { @@ -915,6 +917,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__MOVDIRI__"); if (HasMOVDIR64B) Builder.defineMacro("__MOVDIR64B__"); + if (HasMOVRS) + Builder.defineMacro("__MOVRS__"); if (HasPCONFIG) Builder.defineMacro("__PCONFIG__"); if (HasPTWRITE) @@ -1116,6 +1120,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("lzcnt", true) .Case("mmx", true) .Case("movbe", true) + .Case("movrs", true) .Case("movdiri", true) .Case("movdir64b", true) .Case("mwaitx", true) @@ -1233,6 +1238,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("lzcnt", HasLZCNT) .Case("mmx", HasMMX) .Case("movbe", HasMOVBE) + .Case("movrs", HasMOVRS) .Case("movdiri", HasMOVDIRI) .Case("movdir64b", HasMOVDIR64B) .Case("mwaitx", HasMWAITX) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 2affb659a160b6..e8aad3ec5a74b1 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -130,6 +130,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasCLFLUSHOPT = false; bool HasCLWB = false; bool HasMOVBE = false; + bool HasMOVRS = false; bool HasPREFETCHI = false; bool HasRDPID = false; bool HasRDPRU = false; diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index ff392e7122a448..e97953d87a2ff9 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -221,6 +221,8 @@ set(x86_files mm3dnow.h mmintrin.h movdirintrin.h + movrs_avx10_2_512intrin.h + movrs_avx10_2intrin.h mwaitxintrin.h nmmintrin.h pconfigintrin.h diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 3fbabffa98df20..5f296d0a3324d0 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -605,6 +605,16 @@ _storebe_i64(void * __P, long long __D) { #include #endif +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX10_2__) && defined(__MOVRS__)) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX10_2_512__) && defined(__MOVRS__)) +#include +#endif + #if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__) #include #endif diff --git a/clang/lib/Headers/movrs_avx10_2_512intrin.h b/clang/lib/Headers/movrs_avx10_2_512intrin.h new file mode 100644 index 00000000000000..5cd907a5973494 --- /dev/null +++ b/clang/lib/Headers/movrs_avx10_2_512intrin.h @@ -0,0 +1,98 @@ +/*===----- movrs_avx10_2_512intrin.h - AVX10.2-512-MOVRS intrinsics --------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifndef __MOVRS_AVX10_2_512INTRIN_H +#define __MOVRS_AVX10_2_512INTRIN_H +#ifdef __x86_64__ + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("movrs, avx10.2-512"), __min_vector_width__(512))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_loadrs_epi8(void const *__A) { + return (__m512i)__builtin_ia32_vmovrsb512((const __v64qi *)(__A)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_loadrs_epi8(__m512i __W, __mmask64 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_loadrs_epi8(__A), (__v64qi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_loadrs_epi8(__mmask64 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_loadrs_epi8(__A), + (__v64qi)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_loadrs_epi32(void const *__A) { + return (__m512i)__builtin_ia32_vmovrsd512((const __v16si *)(__A)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_loadrs_epi32(__m512i __W, __mmask16 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_loadrs_epi32(__A), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_loadrs_epi32(__mmask16 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_loadrs_epi32(__A), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_loadrs_epi64(void const *__A) { + return (__m512i)__builtin_ia32_vmovrsq512((const __v8di *)(__A)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_loadrs_epi64(__m512i __W, __mmask8 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_loadrs_epi64(__A), (__v8di)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_loadrs_epi64(__mmask8 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_loadrs_epi64(__A), + (__v8di)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_loadrs_epi16(void const *__A) { + return (__m512i)__builtin_ia32_vmovrsw512((const __v32hi *)(__A)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_loadrs_epi16(__m512i __W, __mmask32 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_loadrs_epi16(__A), (__v32hi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_loadrs_epi16(__mmask32 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_loadrs_epi16(__A), + (__v32hi)_mm512_setzero_si512()); +} + +#undef __DEFAULT_FN_ATTRS512 + +#endif /* __x86_64__ */ +#endif /* __MOVRS_AVX10_2_512INTRIN_H */ diff --git a/clang/lib/Headers/movrs_avx10_2intrin.h b/clang/lib/Headers/movrs_avx10_2intrin.h new file mode 100644 index 00000000000000..27b625b6b43139 --- /dev/null +++ b/clang/lib/Headers/movrs_avx10_2intrin.h @@ -0,0 +1,174 @@ +/*===--------- movrs_avx10_2intrin.h - AVX10.2-MOVRS intrinsics ------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifndef __MOVRS_AVX10_2INTRIN_H +#define __MOVRS_AVX10_2INTRIN_H +#ifdef __x86_64__ + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("movrs,avx10.2-256"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("movrs,avx10.2-256"), __min_vector_width__(256))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_loadrs_epi8(void const *__A) { + return (__m128i)__builtin_ia32_vmovrsb128((const __v16qi *)(__A)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_loadrs_epi8(__m128i __W, __mmask16 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_loadrs_epi8(__A), (__v16qi)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_loadrs_epi8(__mmask16 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, + (__v16qi)_mm_loadrs_epi8(__A), + (__v16qi)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_loadrs_epi8(void const *__A) { + return (__m256i)__builtin_ia32_vmovrsb256((const __v32qi *)(__A)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_loadrs_epi8(__m256i __W, __mmask32 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask32)__U, (__v32qi)_mm256_loadrs_epi8(__A), (__v32qi)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_loadrs_epi8(__mmask32 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, + (__v32qi)_mm256_loadrs_epi8(__A), + (__v32qi)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_loadrs_epi32(void const *__A) { + return (__m128i)__builtin_ia32_vmovrsd128((const __v4si *)(__A)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_loadrs_epi32(__m128i __W, __mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_loadrs_epi32(__A), (__v4si)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_loadrs_epi32(__mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_loadrs_epi32(__A), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_loadrs_epi32(void const *__A) { + return (__m256i)__builtin_ia32_vmovrsd256((const __v8si *)(__A)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_loadrs_epi32(__m256i __W, __mmask8 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_loadrs_epi32(__A), (__v8si)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_loadrs_epi32(__mmask8 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_loadrs_epi32(__A), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_loadrs_epi64(void const *__A) { + return (__m128i)__builtin_ia32_vmovrsq128((const __v2di *)(__A)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_loadrs_epi64(__m128i __W, __mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_loadrs_epi64(__A), (__v2di)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_loadrs_epi64(__mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_loadrs_epi64(__A), + (__v2di)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_loadrs_epi64(void const *__A) { + return (__m256i)__builtin_ia32_vmovrsq256((const __v4di *)(__A)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_loadrs_epi64(__m256i __W, __mmask8 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectq_256( + (__mmask8)__U, (__v4di)_mm256_loadrs_epi64(__A), (__v4di)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_loadrs_epi64(__mmask8 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_loadrs_epi64(__A), + (__v4di)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_loadrs_epi16(void const *__A) { + return (__m128i)__builtin_ia32_vmovrsw128((const __v8hi *)(__A)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_loadrs_epi16(__m128i __W, __mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectw_128( + (__mmask8)__U, (__v8hi)_mm_loadrs_epi16(__A), (__v8hi)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_loadrs_epi16(__mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, + (__v8hi)_mm_loadrs_epi16(__A), + (__v8hi)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_loadrs_epi16(void const *__A) { + return (__m256i)__builtin_ia32_vmovrsw256((const __v16hi *)(__A)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_loadrs_epi16(__m256i __W, __mmask16 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_loadrs_epi16(__A), (__v16hi)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_loadrs_epi16(__mmask16 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, + (__v16hi)_mm256_loadrs_epi16(__A), + (__v16hi)_mm256_setzero_si256()); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif /* __x86_64__ */ +#endif /* __MOVRS_AVX10_2INTRIN_H */ diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c new file mode 100644 index 00000000000000..944033724a6a2b --- /dev/null +++ b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -verify + +#include +__m512i test_mm512_loadrs_epi8(const __m512i * __A) { + return _mm512_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} + +__m512i test_mm512_mask_loadrs_epi8(__m512i __A, __mmask64 __B, const __m512i * __C) { + return _mm512_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} + +__m512i test_mm512_maskz_loadrs_epi8(__mmask64 __A, const __m512i * __B) { + return _mm512_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} + +__m512i test_mm512_loadrs_epi32(const __m512i * __A) { + return _mm512_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} + +__m512i test_mm512_mask_loadrs_epi32(__m512i __A, __mmask16 __B, const __m512i * __C) { + return _mm512_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} + +__m512i test_mm512_maskz_loadrs_epi32(__mmask16 __A, const __m512i * __B) { + return _mm512_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} + +__m512i test_mm512_loadrs_epi64(const __m512i * __A) { + return _mm512_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} + +__m512i test_mm512_mask_loadrs_epi64(__m512i __A, __mmask8 __B, const __m512i * __C) { + return _mm512_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} + +__m512i test_mm512_maskz_loadrs_epi64(__mmask8 __A, const __m512i * __B) { + return _mm512_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} + +__m512i test_mm512_loadrs_epi16(const __m512i * __A) { + return _mm512_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} + +__m512i test_mm512_mask_loadrs_epi16(__m512i __A, __mmask32 __B, const __m512i * __C) { + return _mm512_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} + +__m512i test_mm512_maskz_loadrs_epi16(__mmask32 __A, const __m512i * __B) { + return _mm512_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}} +} diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c new file mode 100644 index 00000000000000..997d6dbc53a8b0 --- /dev/null +++ b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c @@ -0,0 +1,87 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m512i test_mm512_loadrs_epi8(const __m512i * __A) { + // CHECK-LABEL: @test_mm512_loadrs_epi8( + // CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512( + return _mm512_loadrs_epi8(__A); +} + +__m512i test_mm512_mask_loadrs_epi8(__m512i __A, __mmask64 __B, const __m512i * __C) { + // CHECK-LABEL: @test_mm512_mask_loadrs_epi8( + // CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512( + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} + return _mm512_mask_loadrs_epi8(__A, __B, __C); +} + +__m512i test_mm512_maskz_loadrs_epi8(__mmask64 __A, const __m512i * __B) { + // CHECK-LABEL: @test_mm512_maskz_loadrs_epi8( + // CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512( + // CHECK: store <8 x i64> zeroinitializer + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} + return _mm512_maskz_loadrs_epi8(__A, __B); +} + +__m512i test_mm512_loadrs_epi32(const __m512i * __A) { + // CHECK-LABEL: @test_mm512_loadrs_epi32( + // CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512( + return _mm512_loadrs_epi32(__A); +} + +__m512i test_mm512_mask_loadrs_epi32(__m512i __A, __mmask16 __B, const __m512i * __C) { + // CHECK-LABEL: @test_mm512_mask_loadrs_epi32( + // CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512( + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + return _mm512_mask_loadrs_epi32(__A, __B, __C); +} + +__m512i test_mm512_maskz_loadrs_epi32(__mmask16 __A, const __m512i * __B) { + // CHECK-LABEL: @test_mm512_maskz_loadrs_epi32( + // CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512( + // CHECK: store <8 x i64> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + return _mm512_maskz_loadrs_epi32(__A, __B); +} + +__m512i test_mm512_loadrs_epi64(const __m512i * __A) { + // CHECK-LABEL: @test_mm512_loadrs_epi64( + // CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512( + return _mm512_loadrs_epi64(__A); +} + +__m512i test_mm512_mask_loadrs_epi64(__m512i __A, __mmask8 __B, const __m512i * __C) { + // CHECK-LABEL: @test_mm512_mask_loadrs_epi64( + // CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512( + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + return _mm512_mask_loadrs_epi64(__A, __B, __C); +} + +__m512i test_mm512_maskz_loadrs_epi64(__mmask8 __A, const __m512i * __B) { + // CHECK-LABEL: @test_mm512_maskz_loadrs_epi64( + // CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512( + // CHECK: store <8 x i64> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + return _mm512_maskz_loadrs_epi64(__A, __B); +} + +__m512i test_mm512_loadrs_epi16(const __m512i * __A) { + // CHECK-LABEL: @test_mm512_loadrs_epi16( + // CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512( + return _mm512_loadrs_epi16(__A); +} + +__m512i test_mm512_mask_loadrs_epi16(__m512i __A, __mmask32 __B, const __m512i * __C) { + // CHECK-LABEL: @test_mm512_mask_loadrs_epi16( + // CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512( + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} + return _mm512_mask_loadrs_epi16(__A, __B, __C); +} + +__m512i test_mm512_maskz_loadrs_epi16(__mmask32 __A, const __m512i * __B) { + // CHECK-LABEL: @test_mm512_maskz_loadrs_epi16( + // CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512( + // CHECK: store <8 x i64> zeroinitializer + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} + return _mm512_maskz_loadrs_epi16(__A, __B); +} diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c new file mode 100644 index 00000000000000..68608b0cbff09f --- /dev/null +++ b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c @@ -0,0 +1,98 @@ +// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-unknown-unknown -target-feature +movrs -target-feature +avx10.2-256 -emit-llvm -verify + +#include +__m128i test_mm_loadrs_epi8(const __m128i * __A) { + return _mm_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m128i test_mm_mask_loadrs_epi8(__m128i __A, __mmask16 __B, const __m128i * __C) { + return _mm_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m128i test_mm_maskz_loadrs_epi8(__mmask16 __A, const __m128i * __B) { + return _mm_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m256i test_mm256_loadrs_epi8(const __m256i * __A) { + return _mm256_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} + +__m256i test_mm256_mask_loadrs_epi8(__m256i __A, __mmask32 __B, const __m256i * __C) { + return _mm256_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} + +__m256i test_mm256_maskz_loadrs_epi8(__mmask32 __A, const __m256i * __B) { + return _mm256_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} + +__m128i test_mm_loadrs_epi32(const __m128i * __A) { + return _mm_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m128i test_mm_mask_loadrs_epi32(__m128i __A, __mmask8 __B, const __m128i * __C) { + return _mm_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m128i test_mm_maskz_loadrs_epi32(__mmask8 __A, const __m128i * __B) { + return _mm_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m256i test_mm256_loadrs_epi32(const __m256i * __A) { + return _mm256_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} + +__m256i test_mm256_mask_loadrs_epi32(__m256i __A, __mmask8 __B, const __m256i * __C) { + return _mm256_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} + +__m256i test_mm256_maskz_loadrs_epi32(__mmask8 __A, const __m256i * __B) { + return _mm256_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} + +__m128i test_mm_loadrs_epi64(const __m128i * __A) { + return _mm_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m128i test_mm_mask_loadrs_epi64(__m128i __A, __mmask8 __B, const __m128i * __C) { + return _mm_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m128i test_mm_maskz_loadrs_epi64(__mmask8 __A, const __m128i * __B) { + return _mm_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m256i test_mm256_loadrs_epi64(const __m256i * __A) { + return _mm256_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} + +__m256i test_mm256_mask_loadrs_epi64(__m256i __A, __mmask8 __B, const __m256i * __C) { + return _mm256_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} + +__m256i test_mm256_maskz_loadrs_epi64(__mmask8 __A, const __m256i * __B) { + return _mm256_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} + +__m128i test_mm_loadrs_epi16(const __m128i * __A) { + return _mm_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m128i test_mm_mask_loadrs_epi16(__m128i __A, __mmask8 __B, const __m128i * __C) { + return _mm_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m128i test_mm_maskz_loadrs_epi16(__mmask8 __A, const __m128i * __B) { + return _mm_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}} +} + +__m256i test_mm256_loadrs_epi16(const __m256i * __A) { + return _mm256_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} + +__m256i test_mm256_mask_loadrs_epi16(__m256i __A, __mmask16 __B, const __m256i * __C) { + return _mm256_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} + +__m256i test_mm256_maskz_loadrs_epi16(__mmask16 __A, const __m256i * __B) { + return _mm256_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}} +} diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c b/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c new file mode 100644 index 00000000000000..2011b2a8624738 --- /dev/null +++ b/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c @@ -0,0 +1,171 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m128i test_mm_loadrs_epi8(const __m128i * __A) { + // CHECK-LABEL: @test_mm_loadrs_epi8( + // CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128( + return _mm_loadrs_epi8(__A); +} + +__m128i test_mm_mask_loadrs_epi8(__m128i __A, __mmask16 __B, const __m128i * __C) { + // CHECK-LABEL: @test_mm_mask_loadrs_epi8( + // CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128( + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_mask_loadrs_epi8(__A, __B, __C); +} + +__m128i test_mm_maskz_loadrs_epi8(__mmask16 __A, const __m128i * __B) { + // CHECK-LABEL: @test_mm_maskz_loadrs_epi8( + // CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128( + // CHECK: store <2 x i64> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_maskz_loadrs_epi8(__A, __B); +} + +__m256i test_mm256_loadrs_epi8(const __m256i * __A) { + // CHECK-LABEL: @test_mm256_loadrs_epi8( + // CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256( + return _mm256_loadrs_epi8(__A); +} + +__m256i test_mm256_mask_loadrs_epi8(__m256i __A, __mmask32 __B, const __m256i * __C) { + // CHECK-LABEL: @test_mm256_mask_loadrs_epi8( + // CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256( + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + return _mm256_mask_loadrs_epi8(__A, __B, __C); +} + +__m256i test_mm256_maskz_loadrs_epi8(__mmask32 __A, const __m256i * __B) { + // CHECK-LABEL: @test_mm256_maskz_loadrs_epi8( + // CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256( + // CHECK: store <4 x i64> zeroinitializer + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + return _mm256_maskz_loadrs_epi8(__A, __B); +} + +__m128i test_mm_loadrs_epi32(const __m128i * __A) { + // CHECK-LABEL: @test_mm_loadrs_epi32( + // CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128( + return _mm_loadrs_epi32(__A); +} + +__m128i test_mm_mask_loadrs_epi32(__m128i __A, __mmask8 __B, const __m128i * __C) { + // CHECK-LABEL: @test_mm_mask_loadrs_epi32( + // CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128( + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + return _mm_mask_loadrs_epi32(__A, __B, __C); +} + +__m128i test_mm_maskz_loadrs_epi32(__mmask8 __A, const __m128i * __B) { + // CHECK-LABEL: @test_mm_maskz_loadrs_epi32( + // CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128( + // CHECK: store <2 x i64> zeroinitializer + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + return _mm_maskz_loadrs_epi32(__A, __B); +} + +__m256i test_mm256_loadrs_epi32(const __m256i * __A) { + // CHECK-LABEL: @test_mm256_loadrs_epi32( + // CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256( + return _mm256_loadrs_epi32(__A); +} + +__m256i test_mm256_mask_loadrs_epi32(__m256i __A, __mmask8 __B, const __m256i * __C) { + // CHECK-LABEL: @test_mm256_mask_loadrs_epi32( + // CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256( + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + return _mm256_mask_loadrs_epi32(__A, __B, __C); +} + +__m256i test_mm256_maskz_loadrs_epi32(__mmask8 __A, const __m256i * __B) { + // CHECK-LABEL: @test_mm256_maskz_loadrs_epi32( + // CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256( + // CHECK: store <4 x i64> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + return _mm256_maskz_loadrs_epi32(__A, __B); +} + +__m128i test_mm_loadrs_epi64(const __m128i * __A) { + // CHECK-LABEL: @test_mm_loadrs_epi64( + // CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128( + return _mm_loadrs_epi64(__A); +} + +__m128i test_mm_mask_loadrs_epi64(__m128i __A, __mmask8 __B, const __m128i * __C) { + // CHECK-LABEL: @test_mm_mask_loadrs_epi64( + // CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128( + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + return _mm_mask_loadrs_epi64(__A, __B, __C); +} + +__m128i test_mm_maskz_loadrs_epi64(__mmask8 __A, const __m128i * __B) { + // CHECK-LABEL: @test_mm_maskz_loadrs_epi64( + // CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128( + // CHECK: store <2 x i64> zeroinitializer + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + return _mm_maskz_loadrs_epi64(__A, __B); +} + +__m256i test_mm256_loadrs_epi64(const __m256i * __A) { + // CHECK-LABEL: @test_mm256_loadrs_epi64( + // CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256( + return _mm256_loadrs_epi64(__A); +} + +__m256i test_mm256_mask_loadrs_epi64(__m256i __A, __mmask8 __B, const __m256i * __C) { + // CHECK-LABEL: @test_mm256_mask_loadrs_epi64( + // CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256( + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + return _mm256_mask_loadrs_epi64(__A, __B, __C); +} + +__m256i test_mm256_maskz_loadrs_epi64(__mmask8 __A, const __m256i * __B) { + // CHECK-LABEL: @test_mm256_maskz_loadrs_epi64( + // CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256( + // CHECK: store <4 x i64> zeroinitializer + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + return _mm256_maskz_loadrs_epi64(__A, __B); +} + +__m128i test_mm_loadrs_epi16(const __m128i * __A) { + // CHECK-LABEL: @test_mm_loadrs_epi16( + // CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128( + return _mm_loadrs_epi16(__A); +} + +__m128i test_mm_mask_loadrs_epi16(__m128i __A, __mmask8 __B, const __m128i * __C) { + // CHECK-LABEL: @test_mm_mask_loadrs_epi16( + // CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128( + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + return _mm_mask_loadrs_epi16(__A, __B, __C); +} + +__m128i test_mm_maskz_loadrs_epi16(__mmask8 __A, const __m128i * __B) { + // CHECK-LABEL: @test_mm_maskz_loadrs_epi16( + // CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128( + // CHECK: store <2 x i64> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + return _mm_maskz_loadrs_epi16(__A, __B); +} + +__m256i test_mm256_loadrs_epi16(const __m256i * __A) { + // CHECK-LABEL: @test_mm256_loadrs_epi16( + // CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256( + return _mm256_loadrs_epi16(__A); +} + +__m256i test_mm256_mask_loadrs_epi16(__m256i __A, __mmask16 __B, const __m256i * __C) { + // CHECK-LABEL: @test_mm256_mask_loadrs_epi16( + // CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256( + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + return _mm256_mask_loadrs_epi16(__A, __B, __C); +} + +__m256i test_mm256_maskz_loadrs_epi16(__mmask16 __A, const __m256i * __B) { + // CHECK-LABEL: @test_mm256_maskz_loadrs_epi16( + // CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256( + // CHECK: store <4 x i64> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + return _mm256_maskz_loadrs_epi16(__A, __B); +} diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c index 2a05074d7c2b68..1e53621bc6b5ae 100644 --- a/clang/test/CodeGen/target-builtin-noerror.c +++ b/clang/test/CodeGen/target-builtin-noerror.c @@ -145,6 +145,7 @@ void verifyfeaturestrings(void) { (void)__builtin_cpu_supports("avx10.1-512"); (void)__builtin_cpu_supports("avx10.2-256"); (void)__builtin_cpu_supports("avx10.2-512"); + (void)__builtin_cpu_supports("movrs"); } void verifycpustrings(void) { diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index ddfbb29a48f8d5..02370ef60b7feb 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -404,6 +404,11 @@ // USERMSR: "-target-feature" "+usermsr" // NO-USERMSR: "-target-feature" "-usermsr" +// RUN: %clang --target=i386 -mmovrs %s -### -o %t.o 2>&1 | FileCheck -check-prefix=MOVRS %s +// RUN: %clang --target=i386 -mno-movrs %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-MOVRS %s +// MOVRS: "-target-feature" "+movrs" +// NO-MOVRS: "-target-feature" "-movrs" + // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s // CRC32: "-target-feature" "+crc32" diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 8b4e6bdc09226a..2d1d2e57bdc772 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -740,6 +740,12 @@ // RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-usermsr -x c -E -dM -o - %s | FileCheck -check-prefix=NO-USERMSR %s // NO-USERMSR-NOT: #define __USERMSR__ 1 +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mmovrs -x c -E -dM -o - %s | FileCheck -check-prefix=MOVRS %s +// MOVRS: #define __MOVRS__ 1 + +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-movrs -x c -E -dM -o - %s | FileCheck -check-prefix=NO-MOVRS %s +// NO-MOVRS-NOT: #define __MOVRS__ 1 + // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s // CRC32: #define __CRC32__ 1 diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index 7fa4b9e2b66082..bfa478c4427a5b 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -229,6 +229,7 @@ enum ProcessorFeatures { FEATURE_AVX10_1_512, FEATURE_AVX10_2_256, FEATURE_AVX10_2_512, + FEATURE_MOVRS, CPU_FEATURE_MAX }; @@ -972,6 +973,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(FEATURE_HRESET); if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave) setFeature(FEATURE_AVXIFMA); + if (HasLeaf7Subleaf1 && ((EAX >> 31) & 1)) + setFeature(FEATURE_MOVRS); if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave) setFeature(FEATURE_AVXVNNIINT8); diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 504fce4160a583..706546980cf671 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -196,6 +196,8 @@ Changes to the X86 Backend * Support ISA of `AVX10.2-256` and `AVX10.2-512`. +* Supported instructions of `MOVRS AND AVX10.2` + Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 5262e3154ff721..d0083017fb9383 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -7572,3 +7572,42 @@ def int_x86_avx10_vfnmsub231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmsub231 DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ], [IntrNoMem]>; } + +let TargetPrefix = "x86" in { +def int_x86_avx10_vmovrsb128 : ClangBuiltin<"__builtin_ia32_vmovrsb128">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], + [IntrReadMem]>; +def int_x86_avx10_vmovrsb256 : ClangBuiltin<"__builtin_ia32_vmovrsb256">, + DefaultAttrsIntrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], + [IntrReadMem]>; +def int_x86_avx10_vmovrsb512 : ClangBuiltin<"__builtin_ia32_vmovrsb512">, + DefaultAttrsIntrinsic<[llvm_v64i8_ty], [llvm_ptr_ty], + [IntrReadMem]>; +def int_x86_avx10_vmovrsd128 : ClangBuiltin<"__builtin_ia32_vmovrsd128">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], + [IntrReadMem]>; +def int_x86_avx10_vmovrsd256 : ClangBuiltin<"__builtin_ia32_vmovrsd256">, + DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_ptr_ty], + [IntrReadMem]>; +def int_x86_avx10_vmovrsd512 : ClangBuiltin<"__builtin_ia32_vmovrsd512">, + DefaultAttrsIntrinsic<[llvm_v16i32_ty], [llvm_ptr_ty], + [IntrReadMem]>; +def int_x86_avx10_vmovrsq128 : ClangBuiltin<"__builtin_ia32_vmovrsq128">, + DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_ptr_ty], + [IntrReadMem]>; +def int_x86_avx10_vmovrsq256 : ClangBuiltin<"__builtin_ia32_vmovrsq256">, + DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], + [IntrReadMem]>; +def int_x86_avx10_vmovrsq512 : ClangBuiltin<"__builtin_ia32_vmovrsq512">, + DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], + [IntrReadMem]>; +def int_x86_avx10_vmovrsw128 : ClangBuiltin<"__builtin_ia32_vmovrsw128">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], [llvm_ptr_ty], + [IntrReadMem]>; +def int_x86_avx10_vmovrsw256 : ClangBuiltin<"__builtin_ia32_vmovrsw256">, + DefaultAttrsIntrinsic<[llvm_v16i16_ty], [llvm_ptr_ty], + [IntrReadMem]>; +def int_x86_avx10_vmovrsw512 : ClangBuiltin<"__builtin_ia32_vmovrsw512">, + DefaultAttrsIntrinsic<[llvm_v32i16_ty], [llvm_ptr_ty], + [IntrReadMem]>; +} diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index e5bf196559ba63..073e19f8187c65 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -261,6 +261,8 @@ X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 36) X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37) X86_FEATURE_COMPAT(AVX10_2, "avx10.2-256", 0) X86_FEATURE_COMPAT(AVX10_2_512, "avx10.2-512", 0) +//FIXME: make MOVRS _COMPAT defined when gcc landed relate patch. +X86_FEATURE (MOVRS, "movrs") X86_FEATURE (ZU, "zu") // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index d57450d91ea2dd..6bedf9e1d13ac3 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -351,6 +351,8 @@ def FeatureZU : SubtargetFeature<"zu", "HasZU", "true", def FeatureUseGPR32InInlineAsm : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true", "Enable use of GPR32 in inline assembly for APX">; +def FeatureMOVRS : SubtargetFeature<"movrs", "HasMOVRS", "true", + "Enable MOVRS", []>; // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka // "string operations"). See "REP String Enhancement" in the Intel Software diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 625f2e01d47218..9ef2debb57fa00 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1647,3 +1647,31 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; } + +// MOVRS +multiclass vmovrs_p opc, string OpStr, X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain in { + defm m: AVX512_maskable("int_x86_avx10_"#OpStr#_.Size) + addr:$src))>, EVEX; + } +} + +multiclass vmovrs_p_vl opc, string OpStr, AVX512VLVectorVTInfo _Vec> { + let Predicates = [HasMOVRS, HasAVX10_2_512] in + defm Z : vmovrs_p, EVEX_V512; + let Predicates = [HasMOVRS, HasAVX10_2] in { + defm Z128 : vmovrs_p, EVEX_V128; + defm Z256 : vmovrs_p, EVEX_V256; + } +} + +defm VMOVRSB : vmovrs_p_vl<0x6f, "vmovrsb", avx512vl_i8_info>, + T_MAP5, XD, EVEX_CD8<8, CD8VF>, Sched<[WriteVecLoad]>; +defm VMOVRSW : vmovrs_p_vl<0x6f, "vmovrsw", avx512vl_i16_info>, + T_MAP5, XD, REX_W, EVEX_CD8<16, CD8VF>, Sched<[WriteVecLoad]>; +defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>, + T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>; +defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>, + T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>; diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index a815ddc9714f0c..7fb566fba51818 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -152,6 +152,7 @@ def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">; def HasCLDEMOTE : Predicate<"Subtarget->hasCLDEMOTE()">; def HasMOVDIRI : Predicate<"Subtarget->hasMOVDIRI()">; def HasMOVDIR64B : Predicate<"Subtarget->hasMOVDIR64B()">; +def HasMOVRS : Predicate<"Subtarget->hasMOVRS()">; def HasPTWRITE : Predicate<"Subtarget->hasPTWRITE()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 1f608f47ef79f4..5c4e3a9dc52b0f 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1841,6 +1841,7 @@ const StringMap sys::getHostCPUFeatures() { Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1); Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave; + Features["movrs"] = HasLeaf7Subleaf1 && ((EAX >> 31) & 1); Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave; Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave; Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave; diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 09d4312918acfe..586df5748aa822 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -639,6 +639,8 @@ constexpr FeatureBitset ImpliedFeaturesNF = {}; constexpr FeatureBitset ImpliedFeaturesCF = {}; constexpr FeatureBitset ImpliedFeaturesZU = {}; +constexpr FeatureBitset ImpliedFeaturesMOVRS = {}; + constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = { #define X86_FEATURE(ENUM, STR) {{"+" STR}, ImpliedFeatures##ENUM}, #include "llvm/TargetParser/X86TargetParser.def" diff --git a/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll b/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll new file mode 100644 index 00000000000000..a730ef519c015e --- /dev/null +++ b/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll @@ -0,0 +1,163 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-512 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK + +declare <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr) +declare <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr) +declare <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr) +declare <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr) + +define <8 x i64> @test_mm512_movrsb_epi8(ptr %__A) { +; CHECK-LABEL: test_mm512_movrsb_epi8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsb (%rdi), %zmm0 # encoding: [0x62,0xf5,0x7f,0x48,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__A) + %1 = bitcast <64 x i8> %0 to <8 x i64> + ret <8 x i64> %1 +} + +define <8 x i64> @test_mm512_mask_movrsb_epi8(<8 x i64> %__A, i64 %__B, ptr %__C) { +; CHECK-LABEL: test_mm512_mask_movrsb_epi8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsb (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__C) + %1 = bitcast <8 x i64> %__A to <64 x i8> + %2 = bitcast i64 %__B to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %0, <64 x i8> %1 + %4 = bitcast <64 x i8> %3 to <8 x i64> + ret <8 x i64> %4 +} + +define dso_local <8 x i64> @test_mm512_maskz_movrsb_epi8(i64 %__A, ptr %__B) { +; CHECK-LABEL: test_mm512_maskz_movrsb_epi8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsb (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__B) + %1 = bitcast i64 %__A to <64 x i1> + %2 = select <64 x i1> %1, <64 x i8> %0, <64 x i8> zeroinitializer + %3 = bitcast <64 x i8> %2 to <8 x i64> + ret <8 x i64> %3 +} + +define <8 x i64> @test_mm512_movrsd_epi32(ptr %__A) { +; CHECK-LABEL: test_mm512_movrsd_epi32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsd (%rdi), %zmm0 # encoding: [0x62,0xf5,0x7e,0x48,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__A) + %1 = bitcast <16 x i32> %0 to <8 x i64> + ret <8 x i64> %1 +} + +define <8 x i64> @test_mm512_mask_movrsd_epi32(<8 x i64> %__A, i16 zeroext %__B, ptr %__C) { +; CHECK-LABEL: test_mm512_mask_movrsd_epi32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsd (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x49,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__C) + %1 = bitcast <8 x i64> %__A to <16 x i32> + %2 = bitcast i16 %__B to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 + %4 = bitcast <16 x i32> %3 to <8 x i64> + ret <8 x i64> %4 +} + +define <8 x i64> @test_mm512_maskz_movrsd_epi32(i16 zeroext %__A, ptr %__B) { +; CHECK-LABEL: test_mm512_maskz_movrsd_epi32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsd (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0xc9,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__B) + %1 = bitcast i16 %__A to <16 x i1> + %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer + %3 = bitcast <16 x i32> %2 to <8 x i64> + ret <8 x i64> %3 +} + +define <8 x i64> @test_mm512_movrsq_epi64(ptr %__A) { +; CHECK-LABEL: test_mm512_movrsq_epi64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsq (%rdi), %zmm0 # encoding: [0x62,0xf5,0xfe,0x48,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__A) + ret <8 x i64> %0 +} + +define <8 x i64> @test_mm512_mask_movrsq_epi64(<8 x i64> %__A, i8 zeroext %__B, ptr %__C) { +; CHECK-LABEL: test_mm512_mask_movrsq_epi64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsq (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x49,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__C) + %1 = bitcast i8 %__B to <8 x i1> + %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A + ret <8 x i64> %2 +} + +define <8 x i64> @test_mm512_maskz_movrsq_epi64(i8 zeroext %__A, ptr %__B) { +; CHECK-LABEL: test_mm512_maskz_movrsq_epi64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsq (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0xc9,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__B) + %1 = bitcast i8 %__A to <8 x i1> + %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer + ret <8 x i64> %2 +} + +define <8 x i64> @test_mm512_movrsw_epi16(ptr %__A) { +; CHECK-LABEL: test_mm512_movrsw_epi16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsw (%rdi), %zmm0 # encoding: [0x62,0xf5,0xff,0x48,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__A) + %1 = bitcast <32 x i16> %0 to <8 x i64> + ret <8 x i64> %1 +} + +define <8 x i64> @test_mm512_mask_movrsw_epi16(<8 x i64> %__A, i32 %__B, ptr %__C) { +; CHECK-LABEL: test_mm512_mask_movrsw_epi16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsw (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0xff,0x49,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__C) + %1 = bitcast <8 x i64> %__A to <32 x i16> + %2 = bitcast i32 %__B to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %0, <32 x i16> %1 + %4 = bitcast <32 x i16> %3 to <8 x i64> + ret <8 x i64> %4 +} + +define <8 x i64> @test_mm512_maskz_movrsw_epi16(i32 %__A, ptr %__B) { +; CHECK-LABEL: test_mm512_maskz_movrsw_epi16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsw (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0xc9,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__B) + %1 = bitcast i32 %__A to <32 x i1> + %2 = select <32 x i1> %1, <32 x i16> %0, <32 x i16> zeroinitializer + %3 = bitcast <32 x i16> %2 to <8 x i64> + ret <8 x i64> %3 +} diff --git a/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll b/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll new file mode 100644 index 00000000000000..583e16351652b2 --- /dev/null +++ b/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll @@ -0,0 +1,329 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-256 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK + +define <2 x i64> @test_mm_movrsb_epu8(ptr %__A) { +; CHECK-LABEL: test_mm_movrsb_epu8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsb (%rdi), %xmm0 # encoding: [0x62,0xf5,0x7f,0x08,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__A) + %1 = bitcast <16 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @test_mm_mask_movrsb_epu8(<2 x i64> %__A, i16 zeroext %__B, ptr %__C) { +; CHECK-LABEL: test_mm_mask_movrsb_epu8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsb (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__C) + %1 = bitcast <2 x i64> %__A to <16 x i8> + %2 = bitcast i16 %__B to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1 + %4 = bitcast <16 x i8> %3 to <2 x i64> + ret <2 x i64> %4 +} + +define <2 x i64> @test_mm_maskz_movrsb_epu8(i16 zeroext %__A, ptr %__B) { +; CHECK-LABEL: test_mm_maskz_movrsb_epu8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsb (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__B ) + %1 = bitcast i16 %__A to <16 x i1> + %2 = select <16 x i1> %1, <16 x i8> %0, <16 x i8> zeroinitializer + %3 = bitcast <16 x i8> %2 to <2 x i64> + ret <2 x i64> %3 +} + +define <4 x i64> @test_mm256_movrsb_epu8(ptr %__A) { +; CHECK-LABEL: test_mm256_movrsb_epu8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsb (%rdi), %ymm0 # encoding: [0x62,0xf5,0x7f,0x28,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__A) + %1 = bitcast <32 x i8> %0 to <4 x i64> + ret <4 x i64> %1 +} + +define <4 x i64> @test_mm256_mask_movrsb_epu8(<4 x i64> %__A, i32 %__B, ptr %__C) { +; CHECK-LABEL: test_mm256_mask_movrsb_epu8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsb (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__C) + %1 = bitcast <4 x i64> %__A to <32 x i8> + %2 = bitcast i32 %__B to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1 + %4 = bitcast <32 x i8> %3 to <4 x i64> + ret <4 x i64> %4 +} + +define <4 x i64> @test_mm256_maskz_movrsb_epu8(i32 %__A, ptr %__B) { +; CHECK-LABEL: test_mm256_maskz_movrsb_epu8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsb (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__B) + %1 = bitcast i32 %__A to <32 x i1> + %2 = select <32 x i1> %1, <32 x i8> %0, <32 x i8> zeroinitializer + %3 = bitcast <32 x i8> %2 to <4 x i64> + ret <4 x i64> %3 +} + +define <2 x i64> @test_mm_movrsd_epu32(ptr %__A) { +; CHECK-LABEL: test_mm_movrsd_epu32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsd (%rdi), %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__A) + %1 = bitcast <4 x i32> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @test_mm_mask_movrsd_epu32(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) { +; CHECK-LABEL: test_mm_mask_movrsd_epu32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsd (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x09,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__C) + %1 = bitcast <2 x i64> %__A to <4 x i32> + %2 = bitcast i8 %__B to <8 x i1> + %extract.i = shufflevector <8 x i1> %2, <8 x i1> poison, <4 x i32> + %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1 + %4 = bitcast <4 x i32> %3 to <2 x i64> + ret <2 x i64> %4 +} + +define <2 x i64> @test_mm_maskz_movrsd_epu32(i8 zeroext %__A, ptr %__B) { +; CHECK-LABEL: test_mm_maskz_movrsd_epu32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsd (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0x89,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__B) + %1 = bitcast i8 %__A to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> + %2 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> zeroinitializer + %3 = bitcast <4 x i32> %2 to <2 x i64> + ret <2 x i64> %3 +} + +define <4 x i64> @test_mm256_movrsd_epu32(ptr %__A) { +; CHECK-LABEL: test_mm256_movrsd_epu32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsd (%rdi), %ymm0 # encoding: [0x62,0xf5,0x7e,0x28,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__A) + %1 = bitcast <8 x i32> %0 to <4 x i64> + ret <4 x i64> %1 +} + +define <4 x i64> @test_mm256_mask_movrsd_epu32(<4 x i64> %__A, i8 zeroext %__B, ptr %__C) { +; CHECK-LABEL: test_mm256_mask_movrsd_epu32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsd (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x29,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__C) + %1 = bitcast <4 x i64> %__A to <8 x i32> + %2 = bitcast i8 %__B to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1 + %4 = bitcast <8 x i32> %3 to <4 x i64> + ret <4 x i64> %4 +} + +define <4 x i64> @test_mm256_maskz_movrsd_epu32(i8 zeroext %__A, ptr %__B) { +; CHECK-LABEL: test_mm256_maskz_movrsd_epu32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsd (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0xa9,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__B) + %1 = bitcast i8 %__A to <8 x i1> + %2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer + %3 = bitcast <8 x i32> %2 to <4 x i64> + ret <4 x i64> %3 +} + +define <2 x i64> @test_mm_movrsq_epu64(ptr %__A) { +; CHECK-LABEL: test_mm_movrsq_epu64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsq (%rdi), %xmm0 # encoding: [0x62,0xf5,0xfe,0x08,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__A) + ret <2 x i64> %0 +} + +define <2 x i64> @test_mm_mask_movrsq_epu64(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) { +; CHECK-LABEL: test_mm_mask_movrsq_epu64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsq (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x09,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__C) + %1 = bitcast i8 %__B to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <2 x i32> + %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__A + ret <2 x i64> %2 +} + +define <2 x i64> @test_mm_maskz_movrsq_epu64(i8 zeroext %__A, ptr %__B) { +; CHECK-LABEL: test_mm_maskz_movrsq_epu64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsq (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0x89,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__B) + %1 = bitcast i8 %__A to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <2 x i32> + %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer + ret <2 x i64> %2 +} + +define <4 x i64> @test_mm256_movrsq_epu64(ptr %__A) { +; CHECK-LABEL: test_mm256_movrsq_epu64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsq (%rdi), %ymm0 # encoding: [0x62,0xf5,0xfe,0x28,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__A) + ret <4 x i64> %0 +} + +define <4 x i64> @test_mm256_mask_movrsq_epu64(<4 x i64> %__A, i8 zeroext %__B, ptr %__C) { +; CHECK-LABEL: test_mm256_mask_movrsq_epu64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsq (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x29,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__C) + %1 = bitcast i8 %__B to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> + %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__A + ret <4 x i64> %2 +} + +define <4 x i64> @test_mm256_maskz_movrsq_epu64(i8 zeroext %__A, ptr %__B) { +; CHECK-LABEL: test_mm256_maskz_movrsq_epu64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsq (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0xa9,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__B) + %1 = bitcast i8 %__A to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> + %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer + ret <4 x i64> %2 +} + +define <2 x i64> @test_mm_movrsw_epu16(ptr %__A) { +; CHECK-LABEL: test_mm_movrsw_epu16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsw (%rdi), %xmm0 # encoding: [0x62,0xf5,0xff,0x08,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__A) + %1 = bitcast <8 x i16> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @test_mm_mask_movrsw_epu16(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) { +; CHECK-LABEL: test_mm_mask_movrsw_epu16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsw (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0xff,0x09,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__C) + %1 = bitcast <2 x i64> %__A to <8 x i16> + %2 = bitcast i8 %__B to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1 + %4 = bitcast <8 x i16> %3 to <2 x i64> + ret <2 x i64> %4 +} + +define <2 x i64> @test_mm_maskz_movrsw_epu16(i8 zeroext %__A, ptr %__B) { +; CHECK-LABEL: test_mm_maskz_movrsw_epu16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsw (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0x89,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__B) + %1 = bitcast i8 %__A to <8 x i1> + %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer + %3 = bitcast <8 x i16> %2 to <2 x i64> + ret <2 x i64> %3 +} + +define <4 x i64> @test_mm256_movrsw_epu16(ptr %__A) { +; CHECK-LABEL: test_mm256_movrsw_epu16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovrsw (%rdi), %ymm0 # encoding: [0x62,0xf5,0xff,0x28,0x6f,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__A) + %1 = bitcast <16 x i16> %0 to <4 x i64> + ret <4 x i64> %1 +} + +define <4 x i64> @test_mm256_mask_movrsw_epu16(<4 x i64> %__A, i16 zeroext %__B, ptr %__C) { +; CHECK-LABEL: test_mm256_mask_movrsw_epu16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsw (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0xff,0x29,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__C) + %1 = bitcast <4 x i64> %__A to <16 x i16> + %2 = bitcast i16 %__B to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1 + %4 = bitcast <16 x i16> %3 to <4 x i64> + ret <4 x i64> %4 +} + +define <4 x i64> @test_mm256_maskz_movrsw_epu16(i16 zeroext %__A, ptr %__B) { +; CHECK-LABEL: test_mm256_maskz_movrsw_epu16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vmovrsw (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0xa9,0x6f,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__B) + %1 = bitcast i16 %__A to <16 x i1> + %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer + %3 = bitcast <16 x i16> %2 to <4 x i64> + ret <4 x i64> %3 +} + +declare <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr) +declare <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr) +declare <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr) +declare <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr) +declare <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr) +declare <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr) +declare <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr) +declare <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr) diff --git a/llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt b/llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt new file mode 100644 index 00000000000000..e25e66ae577438 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt @@ -0,0 +1,98 @@ +# RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding --disassemble < %s | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vmovrsb 268435456(%rbp,%r14,8), %zmm22 +# INTEL: vmovrsb zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x7f,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vmovrsb 291(%r8,%rax,4), %zmm22 {%k7} +# INTEL: vmovrsb zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x7f,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vmovrsb (%rip), %zmm22 +# INTEL: vmovrsb zmm22, zmmword ptr [rip] +0x62,0xe5,0x7f,0x48,0x6f,0x35,0x00,0x00,0x00,0x00 + +# ATT: vmovrsb -2048(,%rbp,2), %zmm22 +# INTEL: vmovrsb zmm22, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0x7f,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vmovrsb 8128(%rcx), %zmm22 {%k7} {z} +# INTEL: vmovrsb zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +0x62,0xe5,0x7f,0xcf,0x6f,0x71,0x7f + +# ATT: vmovrsb -8192(%rdx), %zmm22 {%k7} {z} +# INTEL: vmovrsb zmm22 {k7} {z}, zmmword ptr [rdx - 8192] +0x62,0xe5,0x7f,0xcf,0x6f,0x72,0x80 + +# ATT: vmovrsd 268435456(%rbp,%r14,8), %zmm22 +# INTEL: vmovrsd zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x7e,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vmovrsd 291(%r8,%rax,4), %zmm22 {%k7} +# INTEL: vmovrsd zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0x7e,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vmovrsd (%rip), %zmm22 +# INTEL: vmovrsd zmm22, zmmword ptr [rip] +0x62,0xe5,0x7e,0x48,0x6f,0x35,0x00,0x00,0x00,0x00 + +# ATT: vmovrsd -2048(,%rbp,2), %zmm22 +# INTEL: vmovrsd zmm22, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0x7e,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vmovrsd 8128(%rcx), %zmm22 {%k7} {z} +# INTEL: vmovrsd zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +0x62,0xe5,0x7e,0xcf,0x6f,0x71,0x7f + +# ATT: vmovrsd -8192(%rdx), %zmm22 {%k7} {z} +# INTEL: vmovrsd zmm22 {k7} {z}, zmmword ptr [rdx - 8192] +0x62,0xe5,0x7e,0xcf,0x6f,0x72,0x80 + +# ATT: vmovrsq 268435456(%rbp,%r14,8), %zmm22 +# INTEL: vmovrsq zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0xfe,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vmovrsq 291(%r8,%rax,4), %zmm22 {%k7} +# INTEL: vmovrsq zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0xfe,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vmovrsq (%rip), %zmm22 +# INTEL: vmovrsq zmm22, zmmword ptr [rip] +0x62,0xe5,0xfe,0x48,0x6f,0x35,0x00,0x00,0x00,0x00 + +# ATT: vmovrsq -2048(,%rbp,2), %zmm22 +# INTEL: vmovrsq zmm22, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0xfe,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vmovrsq 8128(%rcx), %zmm22 {%k7} {z} +# INTEL: vmovrsq zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +0x62,0xe5,0xfe,0xcf,0x6f,0x71,0x7f + +# ATT: vmovrsq -8192(%rdx), %zmm22 {%k7} {z} +# INTEL: vmovrsq zmm22 {k7} {z}, zmmword ptr [rdx - 8192] +0x62,0xe5,0xfe,0xcf,0x6f,0x72,0x80 + +# ATT: vmovrsw 268435456(%rbp,%r14,8), %zmm22 +# INTEL: vmovrsw zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0xff,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vmovrsw 291(%r8,%rax,4), %zmm22 {%k7} +# INTEL: vmovrsw zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc5,0xff,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vmovrsw (%rip), %zmm22 +# INTEL: vmovrsw zmm22, zmmword ptr [rip] +0x62,0xe5,0xff,0x48,0x6f,0x35,0x00,0x00,0x00,0x00 + +# ATT: vmovrsw -2048(,%rbp,2), %zmm22 +# INTEL: vmovrsw zmm22, zmmword ptr [2*rbp - 2048] +0x62,0xe5,0xff,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vmovrsw 8128(%rcx), %zmm22 {%k7} {z} +# INTEL: vmovrsw zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +0x62,0xe5,0xff,0xcf,0x6f,0x71,0x7f + +# ATT: vmovrsw -8192(%rdx), %zmm22 {%k7} {z} +# INTEL: vmovrsw zmm22 {k7} {z}, zmmword ptr [rdx - 8192] +0x62,0xe5,0xff,0xcf,0x6f,0x72,0x80 \ No newline at end of file diff --git a/llvm/test/MC/X86/movrs-avx10-att-64.s b/llvm/test/MC/X86/movrs-avx10-att-64.s new file mode 100644 index 00000000000000..982b7a1d41c039 --- /dev/null +++ b/llvm/test/MC/X86/movrs-avx10-att-64.s @@ -0,0 +1,98 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s | FileCheck %s + +// CHECK: vmovrsb 268435456(%rbp,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa5,0x7f,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovrsb 268435456(%rbp,%r14,8), %zmm22 + +// CHECK: vmovrsb 291(%r8,%rax,4), %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x7f,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovrsb 291(%r8,%rax,4), %zmm22 {%k7} + +// CHECK: vmovrsb (%rip), %zmm22 +// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x35,0x00,0x00,0x00,0x00] + vmovrsb (%rip), %zmm22 + +// CHECK: vmovrsb -2048(,%rbp,2), %zmm22 +// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmovrsb -2048(,%rbp,2), %zmm22 + +// CHECK: vmovrsb 8128(%rcx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x71,0x7f] + vmovrsb 8128(%rcx), %zmm22 {%k7} {z} + +// CHECK: vmovrsb -8192(%rdx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x72,0x80] + vmovrsb -8192(%rdx), %zmm22 {%k7} {z} + +// CHECK: vmovrsd 268435456(%rbp,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa5,0x7e,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovrsd 268435456(%rbp,%r14,8), %zmm22 + +// CHECK: vmovrsd 291(%r8,%rax,4), %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0x7e,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovrsd 291(%r8,%rax,4), %zmm22 {%k7} + +// CHECK: vmovrsd (%rip), %zmm22 +// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x35,0x00,0x00,0x00,0x00] + vmovrsd (%rip), %zmm22 + +// CHECK: vmovrsd -2048(,%rbp,2), %zmm22 +// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmovrsd -2048(,%rbp,2), %zmm22 + +// CHECK: vmovrsd 8128(%rcx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x71,0x7f] + vmovrsd 8128(%rcx), %zmm22 {%k7} {z} + +// CHECK: vmovrsd -8192(%rdx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x72,0x80] + vmovrsd -8192(%rdx), %zmm22 {%k7} {z} + +// CHECK: vmovrsq 268435456(%rbp,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa5,0xfe,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovrsq 268435456(%rbp,%r14,8), %zmm22 + +// CHECK: vmovrsq 291(%r8,%rax,4), %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0xfe,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovrsq 291(%r8,%rax,4), %zmm22 {%k7} + +// CHECK: vmovrsq (%rip), %zmm22 +// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x35,0x00,0x00,0x00,0x00] + vmovrsq (%rip), %zmm22 + +// CHECK: vmovrsq -2048(,%rbp,2), %zmm22 +// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmovrsq -2048(,%rbp,2), %zmm22 + +// CHECK: vmovrsq 8128(%rcx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x71,0x7f] + vmovrsq 8128(%rcx), %zmm22 {%k7} {z} + +// CHECK: vmovrsq -8192(%rdx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x72,0x80] + vmovrsq -8192(%rdx), %zmm22 {%k7} {z} + +// CHECK: vmovrsw 268435456(%rbp,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa5,0xff,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovrsw 268435456(%rbp,%r14,8), %zmm22 + +// CHECK: vmovrsw 291(%r8,%rax,4), %zmm22 {%k7} +// CHECK: encoding: [0x62,0xc5,0xff,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovrsw 291(%r8,%rax,4), %zmm22 {%k7} + +// CHECK: vmovrsw (%rip), %zmm22 +// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x35,0x00,0x00,0x00,0x00] + vmovrsw (%rip), %zmm22 + +// CHECK: vmovrsw -2048(,%rbp,2), %zmm22 +// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmovrsw -2048(,%rbp,2), %zmm22 + +// CHECK: vmovrsw 8128(%rcx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x71,0x7f] + vmovrsw 8128(%rcx), %zmm22 {%k7} {z} + +// CHECK: vmovrsw -8192(%rdx), %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x72,0x80] + vmovrsw -8192(%rdx), %zmm22 {%k7} {z} + diff --git a/llvm/test/MC/X86/movrs-avx10-intel-64.s b/llvm/test/MC/X86/movrs-avx10-intel-64.s new file mode 100644 index 00000000000000..d61e41abe7d632 --- /dev/null +++ b/llvm/test/MC/X86/movrs-avx10-intel-64.s @@ -0,0 +1,97 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vmovrsb zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7f,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovrsb zmm22, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmovrsb zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7f,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovrsb zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vmovrsb zmm22, zmmword ptr [rip] +// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x35,0x00,0x00,0x00,0x00] + vmovrsb zmm22, zmmword ptr [rip] + +// CHECK: vmovrsb zmm22, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmovrsb zmm22, zmmword ptr [2*rbp - 2048] + +// CHECK: vmovrsb zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x71,0x7f] + vmovrsb zmm22 {k7} {z}, zmmword ptr [rcx + 8128] + +// CHECK: vmovrsb zmm22 {k7} {z}, zmmword ptr [rdx - 8192] +// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x72,0x80] + vmovrsb zmm22 {k7} {z}, zmmword ptr [rdx - 8192] + +// CHECK: vmovrsd zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7e,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovrsd zmm22, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmovrsd zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7e,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovrsd zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vmovrsd zmm22, zmmword ptr [rip] +// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x35,0x00,0x00,0x00,0x00] + vmovrsd zmm22, zmmword ptr [rip] + +// CHECK: vmovrsd zmm22, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmovrsd zmm22, zmmword ptr [2*rbp - 2048] + +// CHECK: vmovrsd zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x71,0x7f] + vmovrsd zmm22 {k7} {z}, zmmword ptr [rcx + 8128] + +// CHECK: vmovrsd zmm22 {k7} {z}, zmmword ptr [rdx - 8192] +// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x72,0x80] + vmovrsd zmm22 {k7} {z}, zmmword ptr [rdx - 8192] + +// CHECK: vmovrsq zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0xfe,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovrsq zmm22, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmovrsq zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0xfe,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovrsq zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vmovrsq zmm22, zmmword ptr [rip] +// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x35,0x00,0x00,0x00,0x00] + vmovrsq zmm22, zmmword ptr [rip] + +// CHECK: vmovrsq zmm22, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmovrsq zmm22, zmmword ptr [2*rbp - 2048] + +// CHECK: vmovrsq zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x71,0x7f] + vmovrsq zmm22 {k7} {z}, zmmword ptr [rcx + 8128] + +// CHECK: vmovrsq zmm22 {k7} {z}, zmmword ptr [rdx - 8192] +// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x72,0x80] + vmovrsq zmm22 {k7} {z}, zmmword ptr [rdx - 8192] + +// CHECK: vmovrsw zmm22, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0xff,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovrsw zmm22, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmovrsw zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0xff,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovrsw zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vmovrsw zmm22, zmmword ptr [rip] +// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x35,0x00,0x00,0x00,0x00] + vmovrsw zmm22, zmmword ptr [rip] + +// CHECK: vmovrsw zmm22, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff] + vmovrsw zmm22, zmmword ptr [2*rbp - 2048] + +// CHECK: vmovrsw zmm22 {k7} {z}, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x71,0x7f] + vmovrsw zmm22 {k7} {z}, zmmword ptr [rcx + 8128] + +// CHECK: vmovrsw zmm22 {k7} {z}, zmmword ptr [rdx - 8192] +// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x72,0x80] + vmovrsw zmm22 {k7} {z}, zmmword ptr [rdx - 8192]