|
| 1 | +/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ |
| 2 | +/* { dg-options "-O0" } */ |
| 3 | +/* { dg-final { scan-assembler-times "paddd.+xmm\[0-9]+" 1 } } */ |
| 4 | +/* { dg-final { scan-assembler-times "vfmadd132ps.+ymm\[0-9]+" 1 } } */ |
| 5 | +/* { dg-final { scan-assembler-times "vpaddw.+zmm\[0-9]+" 1 } } */ |
| 6 | +#ifndef CHECK_DEFINES |
| 7 | +#define CHECK_DEFINES 0 |
| 8 | +#endif |
| 9 | + |
| 10 | +#define N 1024 |
| 11 | + |
| 12 | +/* Optimization flags and tree vectorizer shall be disabled at this point */ |
| 13 | +#if CHECK_DEFINES && defined(__OPTIMIZE__) |
| 14 | +#error "__OPTIMIZE__ is defined (not compiled with -O0?)" |
| 15 | +#endif |
| 16 | + |
| 17 | +#pragma GCC push_options |
| 18 | +#pragma GCC optimize ("O2", "tree-vectorize") |
| 19 | + |
| 20 | +/* Optimization flags and tree vectorizer shall be enabled at this point */ |
| 21 | +#if CHECK_DEFINES && !defined(__OPTIMIZE__) |
| 22 | +#error "__OPTIMIZE__ is not defined" |
| 23 | +#endif |
| 24 | + |
| 25 | +#pragma GCC push_options |
| 26 | +#pragma GCC target ("sse4.2") |
| 27 | +#ifdef __cplusplus |
| 28 | +namespace { |
| 29 | +#endif |
| 30 | + |
| 31 | +/* Target flags up to including SSE4.2 shall be enabled at this point */ |
| 32 | +#if CHECK_DEFINES && !defined(__SSE3__) |
| 33 | +#error "Target flag (SSE3) is not defined" |
| 34 | +#endif |
| 35 | +#if CHECK_DEFINES && !defined(__SSSE3__) |
| 36 | +#error "Target flag (SSSE3) is not defined" |
| 37 | +#endif |
| 38 | +#if CHECK_DEFINES && !defined(__SSE4_1__) |
| 39 | +#error "Target flag (SSE4.1) is not defined" |
| 40 | +#endif |
| 41 | +#if CHECK_DEFINES && !defined(__SSE4_2__) |
| 42 | +#error "Target flag (SSE4.2) is not defined" |
| 43 | +#endif |
| 44 | + |
| 45 | +void |
| 46 | +__attribute__((__noinline__, __used__)) |
| 47 | +vec_saxpy_i32(int y[N], const int a[N], const int x[N]) |
| 48 | +{ |
| 49 | + int i; |
| 50 | + for (i = 0; i < N; i++) |
| 51 | + y[i] += a[i] * x[i]; |
| 52 | +} |
| 53 | + |
| 54 | +#ifdef __cplusplus |
| 55 | +} |
| 56 | +#endif |
| 57 | +#pragma GCC pop_options |
| 58 | + |
| 59 | +/* Target flags up to including SSE4.2 shall be disabled at this point */ |
| 60 | +#if CHECK_DEFINES && defined(__SSE3__) |
| 61 | +#error "Target flag (SSE3) is still defined" |
| 62 | +#endif |
| 63 | +#if CHECK_DEFINES && defined(__SSSE3__) |
| 64 | +#error "Target flag (SSSE3) is still defined" |
| 65 | +#endif |
| 66 | +#if CHECK_DEFINES && defined(__SSE4_1__) |
| 67 | +#error "Target flag (SSE4.1) is still defined" |
| 68 | +#endif |
| 69 | +#if CHECK_DEFINES && defined(__SSE4_2__) |
| 70 | +#error "Target flag (SSE4.2) is still defined" |
| 71 | +#endif |
| 72 | + |
| 73 | +#pragma GCC push_options |
| 74 | +#pragma GCC target ("avx2", "fma") |
| 75 | +#ifdef __cplusplus |
| 76 | +struct A { |
| 77 | +#endif |
| 78 | + |
| 79 | +/* Target flags up to including AVX2+FMA shall be enabled at this point */ |
| 80 | +#if CHECK_DEFINES && !defined(__SSE3__) |
| 81 | +#error "Target flag (SSE3) is not defined" |
| 82 | +#endif |
| 83 | +#if CHECK_DEFINES && !defined(__SSSE3__) |
| 84 | +#error "Target flag (SSSE3) is not defined" |
| 85 | +#endif |
| 86 | +#if CHECK_DEFINES && !defined(__SSE4_1__) |
| 87 | +#error "Target flag (SSE4.1) is not defined" |
| 88 | +#endif |
| 89 | +#if CHECK_DEFINES && !defined(__SSE4_2__) |
| 90 | +#error "Target flag (SSE4.2) is not defined" |
| 91 | +#endif |
| 92 | +#if CHECK_DEFINES && !defined(__AVX__) |
| 93 | +#error "Target flag (AVX) is not defined" |
| 94 | +#endif |
| 95 | +#if CHECK_DEFINES && !defined(__AVX2__) |
| 96 | +#error "Target flag (AVX2) is not defined" |
| 97 | +#endif |
| 98 | +#if CHECK_DEFINES && !defined(__FMA__) |
| 99 | +#error "Target flag (FMA) is not defined" |
| 100 | +#endif |
| 101 | + |
| 102 | +void |
| 103 | +__attribute__((__noinline__, __used__)) |
| 104 | +vec_saxpy_f32(float y[N], const float a[N], const float x[N]) |
| 105 | +{ |
| 106 | + int i; |
| 107 | + for (i = 0; i < N; i++) |
| 108 | + y[i] += a[i] * x[i]; |
| 109 | +} |
| 110 | + |
| 111 | +#ifdef __cplusplus |
| 112 | +}; |
| 113 | +#endif |
| 114 | +#pragma GCC pop_options |
| 115 | + |
| 116 | +/* Target flags up to including AVX2+FMA shall be disabled at this point */ |
| 117 | +#if CHECK_DEFINES && defined(__SSE3__) |
| 118 | +#error "Target flag (SSE3) is still defined" |
| 119 | +#endif |
| 120 | +#if CHECK_DEFINES && defined(__SSSE3__) |
| 121 | +#error "Target flag (SSSE3) is still defined" |
| 122 | +#endif |
| 123 | +#if CHECK_DEFINES && defined(__SSE4_1__) |
| 124 | +#error "Target flag (SSE4.1) is still defined" |
| 125 | +#endif |
| 126 | +#if CHECK_DEFINES && defined(__SSE4_2__) |
| 127 | +#error "Target flag (SSE4.2) is still defined" |
| 128 | +#endif |
| 129 | +#if CHECK_DEFINES && defined(__AVX__) |
| 130 | +#error "Target flag (AVX) is still defined" |
| 131 | +#endif |
| 132 | +#if CHECK_DEFINES && defined(__AVX2__) |
| 133 | +#error "Target flag (AVX2) is still defined" |
| 134 | +#endif |
| 135 | +#if CHECK_DEFINES && defined(__FMA__) |
| 136 | +#error "Target flag (FMA) is still defined" |
| 137 | +#endif |
| 138 | + |
| 139 | +#pragma GCC push_options |
| 140 | +#pragma GCC target ("arch=x86-64-v4") |
| 141 | +#ifdef __cplusplus |
| 142 | +namespace avx512 { |
| 143 | +struct A { |
| 144 | +#endif |
| 145 | + |
| 146 | +/* Essential AVX512 target flags shall be enabled at this point */ |
| 147 | +#if CHECK_DEFINES && !defined(__AVX512F__) |
| 148 | +#error "Target flag (AVX512F) is not defined" |
| 149 | +#endif |
| 150 | +#if CHECK_DEFINES && !defined(__AVX512VL__) |
| 151 | +#error "Target flag (AVX512VL) is not defined" |
| 152 | +#endif |
| 153 | +#if CHECK_DEFINES && !defined(__AVX512DQ__) |
| 154 | +#error "Target flag (AVX512DQ) is not defined" |
| 155 | +#endif |
| 156 | +#if CHECK_DEFINES && !defined(__AVX512BW__) |
| 157 | +#error "Target flag (AVX512BW) is not defined" |
| 158 | +#endif |
| 159 | + |
| 160 | +void |
| 161 | +__attribute__((__noinline__, __used__)) |
| 162 | +vec_saxpy_i16(short y[N], const short a[N], const short x[N]) |
| 163 | +{ |
| 164 | + int i; |
| 165 | + for (i = 0; i < N; i++) |
| 166 | + y[i] += a[i] * x[i]; |
| 167 | +} |
| 168 | + |
| 169 | +#ifdef __cplusplus |
| 170 | +}; |
| 171 | +} |
| 172 | +#endif |
| 173 | +#pragma GCC pop_options |
| 174 | + |
| 175 | +/* Essential AVX512 target flags shall be disabled at this point */ |
| 176 | +#if CHECK_DEFINES && defined(__AVX512F__) |
| 177 | +#error "Target flag (AVX512F) is still defined" |
| 178 | +#endif |
| 179 | +#if CHECK_DEFINES && defined(__AVX512VL__) |
| 180 | +#error "Target flag (AVX512VL) is still defined" |
| 181 | +#endif |
| 182 | +#if CHECK_DEFINES && defined(__AVX512DQ__) |
| 183 | +#error "Target flag (AVX512DQ) is still defined" |
| 184 | +#endif |
| 185 | +#if CHECK_DEFINES && defined(__AVX512BW__) |
| 186 | +#error "Target flag (AVX512BW) is still defined" |
| 187 | +#endif |
| 188 | + |
| 189 | +#pragma GCC pop_options |
| 190 | + |
| 191 | +/* Optimization flags and tree vectorizer shall be disabled at this point */ |
| 192 | +#if CHECK_DEFINES && defined(__OPTIMIZE__) |
| 193 | +#error "__OPTIMIZE__ is still defined" |
| 194 | +#endif |
0 commit comments