clang/test/CodeGen/X86/sse41-builtins.c

   1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
   2 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
   3 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
   4 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
   5
   6
   7 #include <immintrin.h>
   8
   9 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
  10
  11 __m128i test_mm_blend_epi16(__m128i V1, __m128i V2) {
  12   // CHECK-LABEL: test_mm_blend_epi16
  13   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
  14   return _mm_blend_epi16(V1, V2, 42);
  15 }
  16
  17 __m128d test_mm_blend_pd(__m128d V1, __m128d V2) {
  18   // CHECK-LABEL: test_mm_blend_pd
  19   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
  20   return _mm_blend_pd(V1, V2, 2);
  21 }
  22
  23 __m128 test_mm_blend_ps(__m128 V1, __m128 V2) {
  24   // CHECK-LABEL: test_mm_blend_ps
  25   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
  26   return _mm_blend_ps(V1, V2, 6);
  27 }
  28
  29 __m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) {
  30   // CHECK-LABEL: test_mm_blendv_epi8
  31   // CHECK: call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
  32   return _mm_blendv_epi8(V1, V2, V3);
  33 }
  34
  35 __m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) {
  36   // CHECK-LABEL: test_mm_blendv_pd
  37   // CHECK: call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
  38   return _mm_blendv_pd(V1, V2, V3);
  39 }
  40
  41 __m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) {
  42   // CHECK-LABEL: test_mm_blendv_ps
  43   // CHECK: call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
  44   return _mm_blendv_ps(V1, V2, V3);
  45 }
  46
  47 __m128d test_mm_ceil_pd(__m128d x) {
  48   // CHECK-LABEL: test_mm_ceil_pd
  49   // CHECK: call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 2)
  50   return _mm_ceil_pd(x);
  51 }
  52
  53 __m128 test_mm_ceil_ps(__m128 x) {
  54   // CHECK-LABEL: test_mm_ceil_ps
  55   // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 2)
  56   return _mm_ceil_ps(x);
  57 }
  58
  59 __m128d test_mm_ceil_sd(__m128d x, __m128d y) {
  60   // CHECK-LABEL: test_mm_ceil_sd
  61   // CHECK: call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 2)
  62   return _mm_ceil_sd(x, y);
  63 }
  64
  65 __m128 test_mm_ceil_ss(__m128 x, __m128 y) {
  66   // CHECK-LABEL: test_mm_ceil_ss
  67   // CHECK: call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 2)
  68   return _mm_ceil_ss(x, y);
  69 }
  70
  71 __m128i test_mm_cmpeq_epi64(__m128i A, __m128i B) {
  72   // CHECK-LABEL: test_mm_cmpeq_epi64
  73   // CHECK: icmp eq <2 x i64>
  74   // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
  75   return _mm_cmpeq_epi64(A, B);
  76 }
  77
  78 __m128i test_mm_cvtepi8_epi16(__m128i a) {
  79   // CHECK-LABEL: test_mm_cvtepi8_epi16
  80   // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  81   // CHECK: sext <8 x i8> {{.*}} to <8 x i16>
  82   return _mm_cvtepi8_epi16(a);
  83 }
  84
  85 __m128i test_mm_cvtepi8_epi32(__m128i a) {
  86   // CHECK-LABEL: test_mm_cvtepi8_epi32
  87   // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  88   // CHECK: sext <4 x i8> {{.*}} to <4 x i32>
  89   return _mm_cvtepi8_epi32(a);
  90 }
  91
  92 __m128i test_mm_cvtepi8_epi64(__m128i a) {
  93   // CHECK-LABEL: test_mm_cvtepi8_epi64
  94   // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> <i32 0, i32 1>
  95   // CHECK: sext <2 x i8> {{.*}} to <2 x i64>
  96   return _mm_cvtepi8_epi64(a);
  97 }
  98
  99 __m128i test_mm_cvtepi16_epi32(__m128i a) {
 100   // CHECK-LABEL: test_mm_cvtepi16_epi32
 101   // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 102   // CHECK: sext <4 x i16> {{.*}} to <4 x i32>
 103   return _mm_cvtepi16_epi32(a);
 104 }
 105
 106 __m128i test_mm_cvtepi16_epi64(__m128i a) {
 107   // CHECK-LABEL: test_mm_cvtepi16_epi64
 108   // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> <i32 0, i32 1>
 109   // CHECK: sext <2 x i16> {{.*}} to <2 x i64>
 110   return _mm_cvtepi16_epi64(a);
 111 }
 112
 113 __m128i test_mm_cvtepi32_epi64(__m128i a) {
 114   // CHECK-LABEL: test_mm_cvtepi32_epi64
 115   // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> <i32 0, i32 1>
 116   // CHECK: sext <2 x i32> {{.*}} to <2 x i64>
 117   return _mm_cvtepi32_epi64(a);
 118 }
 119
 120 __m128i test_mm_cvtepu8_epi16(__m128i a) {
 121   // CHECK-LABEL: test_mm_cvtepu8_epi16
 122   // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 123   // CHECK: zext <8 x i8> {{.*}} to <8 x i16>
 124   return _mm_cvtepu8_epi16(a);
 125 }
 126
 127 __m128i test_mm_cvtepu8_epi32(__m128i a) {
 128   // CHECK-LABEL: test_mm_cvtepu8_epi32
 129   // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 130   // CHECK: zext <4 x i8> {{.*}} to <4 x i32>
 131   return _mm_cvtepu8_epi32(a);
 132 }
 133
 134 __m128i test_mm_cvtepu8_epi64(__m128i a) {
 135   // CHECK-LABEL: test_mm_cvtepu8_epi64
 136   // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> <i32 0, i32 1>
 137   // CHECK: zext <2 x i8> {{.*}} to <2 x i64>
 138   return _mm_cvtepu8_epi64(a);
 139 }
 140
 141 __m128i test_mm_cvtepu16_epi32(__m128i a) {
 142   // CHECK-LABEL: test_mm_cvtepu16_epi32
 143   // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 144   // CHECK: zext <4 x i16> {{.*}} to <4 x i32>
 145   return _mm_cvtepu16_epi32(a);
 146 }
 147
 148 __m128i test_mm_cvtepu16_epi64(__m128i a) {
 149   // CHECK-LABEL: test_mm_cvtepu16_epi64
 150   // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> <i32 0, i32 1>
 151   // CHECK: zext <2 x i16> {{.*}} to <2 x i64>
 152   return _mm_cvtepu16_epi64(a);
 153 }
 154
 155 __m128i test_mm_cvtepu32_epi64(__m128i a) {
 156   // CHECK-LABEL: test_mm_cvtepu32_epi64
 157   // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> <i32 0, i32 1>
 158   // CHECK: zext <2 x i32> {{.*}} to <2 x i64>
 159   return _mm_cvtepu32_epi64(a);
 160 }
 161
 162 __m128d test_mm_dp_pd(__m128d x, __m128d y) {
 163   // CHECK-LABEL: test_mm_dp_pd
 164   // CHECK: call <2 x double> @llvm.x86.sse41.dppd(<2 x double> {{.*}}, <2 x double> {{.*}}, i8 7)
 165   return _mm_dp_pd(x, y, 7);
 166 }
 167
 168 __m128 test_mm_dp_ps(__m128 x, __m128 y) {
 169   // CHECK-LABEL: test_mm_dp_ps
 170   // CHECK: call <4 x float> @llvm.x86.sse41.dpps(<4 x float> {{.*}}, <4 x float> {{.*}}, i8 7)
 171   return _mm_dp_ps(x, y, 7);
 172 }
 173
 174 int test_mm_extract_epi8(__m128i x) {
 175   // CHECK-LABEL: test_mm_extract_epi8
 176   // CHECK: extractelement <16 x i8> %{{.*}}, {{i32|i64}} 1
 177   // CHECK: zext i8 %{{.*}} to i32
 178   return _mm_extract_epi8(x, 1);
 179 }
 180
 181 int test_mm_extract_epi32(__m128i x) {
 182   // CHECK-LABEL: test_mm_extract_epi32
 183   // CHECK: extractelement <4 x i32> %{{.*}}, {{i32|i64}} 1
 184   return _mm_extract_epi32(x, 1);
 185 }
 186
 187 long long test_mm_extract_epi64(__m128i x) {
 188   // CHECK-LABEL: test_mm_extract_epi64
 189   // CHECK: extractelement <2 x i64> %{{.*}}, {{i32|i64}} 1
 190   return _mm_extract_epi64(x, 1);
 191 }
 192
 193 int test_mm_extract_ps(__m128 x) {
 194   // CHECK-LABEL: test_mm_extract_ps
 195   // CHECK: extractelement <4 x float> %{{.*}}, {{i32|i64}} 1
 196   return _mm_extract_ps(x, 1);
 197 }
 198
 199 __m128d test_mm_floor_pd(__m128d x) {
 200   // CHECK-LABEL: test_mm_floor_pd
 201   // CHECK: call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 1)
 202   return _mm_floor_pd(x);
 203 }
 204
 205 __m128 test_mm_floor_ps(__m128 x) {
 206   // CHECK-LABEL: test_mm_floor_ps
 207   // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 1)
 208   return _mm_floor_ps(x);
 209 }
 210
 211 __m128d test_mm_floor_sd(__m128d x, __m128d y) {
 212   // CHECK-LABEL: test_mm_floor_sd
 213   // CHECK: call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 1)
 214   return _mm_floor_sd(x, y);
 215 }
 216
 217 __m128 test_mm_floor_ss(__m128 x, __m128 y) {
 218   // CHECK-LABEL: test_mm_floor_ss
 219   // CHECK: call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 1)
 220   return _mm_floor_ss(x, y);
 221 }
 222
 223 __m128i test_mm_insert_epi8(__m128i x, char b) {
 224   // CHECK-LABEL: test_mm_insert_epi8
 225   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 1
 226   return _mm_insert_epi8(x, b, 1);
 227 }
 228
 229 __m128i test_mm_insert_epi32(__m128i x, int b) {
 230   // CHECK-LABEL: test_mm_insert_epi32
 231   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 1
 232   return _mm_insert_epi32(x, b, 1);
 233 }
 234
 235 #ifdef __x86_64__
 236 __m128i test_mm_insert_epi64(__m128i x, long long b) {
 237   // X64-LABEL: test_mm_insert_epi64
 238   // X64: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 1
 239   return _mm_insert_epi64(x, b, 1);
 240 }
 241 #endif
 242
 243 __m128 test_mm_insert_ps(__m128 x, __m128 y) {
 244   // CHECK-LABEL: test_mm_insert_ps
 245   // CHECK: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
 246   return _mm_insert_ps(x, y, 4);
 247 }
 248
 249 __m128i test_mm_max_epi8(__m128i x, __m128i y) {
 250   // CHECK-LABEL: test_mm_max_epi8
 251   // CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
 252   return _mm_max_epi8(x, y);
 253 }
 254
 255 __m128i test_mm_max_epi32(__m128i x, __m128i y) {
 256   // CHECK-LABEL: test_mm_max_epi32
 257   // CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 258   return _mm_max_epi32(x, y);
 259 }
 260
 261 __m128i test_mm_max_epu16(__m128i x, __m128i y) {
 262   // CHECK-LABEL: test_mm_max_epu16
 263   // CHECK: call <8 x i16> @llvm.umax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
 264   return _mm_max_epu16(x, y);
 265 }
 266
 267 __m128i test_mm_max_epu32(__m128i x, __m128i y) {
 268   // CHECK-LABEL: test_mm_max_epu32
 269   // CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 270   return _mm_max_epu32(x, y);
 271 }
 272
 273 __m128i test_mm_min_epi8(__m128i x, __m128i y) {
 274   // CHECK-LABEL: test_mm_min_epi8
 275   // CHECK: call <16 x i8> @llvm.smin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
 276   return _mm_min_epi8(x, y);
 277 }
 278
 279 __m128i test_mm_min_epi32(__m128i x, __m128i y) {
 280   // CHECK-LABEL: test_mm_min_epi32
 281   // CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 282   return _mm_min_epi32(x, y);
 283 }
 284
 285 __m128i test_mm_min_epu16(__m128i x, __m128i y) {
 286   // CHECK-LABEL: test_mm_min_epu16
 287   // CHECK: call <8 x i16> @llvm.umin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
 288   return _mm_min_epu16(x, y);
 289 }
 290
 291 __m128i test_mm_min_epu32(__m128i x, __m128i y) {
 292   // CHECK-LABEL: test_mm_min_epu32
 293   // CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 294   return _mm_min_epu32(x, y);
 295 }
 296
 297 __m128i test_mm_minpos_epu16(__m128i x) {
 298   // CHECK-LABEL: test_mm_minpos_epu16
 299   // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %{{.*}})
 300   return _mm_minpos_epu16(x);
 301 }
 302
 303 __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
 304   // CHECK-LABEL: test_mm_mpsadbw_epu8
 305   // CHECK: call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 1)
 306   return _mm_mpsadbw_epu8(x, y, 1);
 307 }
 308
 309 __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
 310   // CHECK-LABEL: test_mm_mul_epi32
 311   // CHECK: shl <2 x i64> %{{.*}}, <i64 32, i64 32>
 312   // CHECK: ashr <2 x i64> %{{.*}}, <i64 32, i64 32>
 313   // CHECK: shl <2 x i64> %{{.*}}, <i64 32, i64 32>
 314   // CHECK: ashr <2 x i64> %{{.*}}, <i64 32, i64 32>
 315   // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
 316   return _mm_mul_epi32(x, y);
 317 }
 318
 319 __m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
 320   // CHECK-LABEL: test_mm_mullo_epi32
 321   // CHECK: mul <4 x i32>
 322   return _mm_mullo_epi32(x, y);
 323 }
 324
 325 __m128i test_mm_packus_epi32(__m128i x, __m128i y) {
 326   // CHECK-LABEL: test_mm_packus_epi32
 327   // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 328   return _mm_packus_epi32(x, y);
 329 }
 330
 331 __m128d test_mm_round_pd(__m128d x) {
 332   // CHECK-LABEL: test_mm_round_pd
 333   // CHECK: call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 4)
 334   return _mm_round_pd(x, 4);
 335 }
 336
 337 __m128 test_mm_round_ps(__m128 x) {
 338   // CHECK-LABEL: test_mm_round_ps
 339   // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 4)
 340   return _mm_round_ps(x, 4);
 341 }
 342
 343 __m128d test_mm_round_sd(__m128d x, __m128d y) {
 344   // CHECK-LABEL: test_mm_round_sd
 345   // CHECK: call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 4)
 346   return _mm_round_sd(x, y, 4);
 347 }
 348
 349 __m128 test_mm_round_ss(__m128 x, __m128 y) {
 350   // CHECK-LABEL: test_mm_round_ss
 351   // CHECK: call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 4)
 352   return _mm_round_ss(x, y, 4);
 353 }
 354
 355 __m128i test_mm_stream_load_si128(__m128i const *a) {
 356   // CHECK-LABEL: test_mm_stream_load_si128
 357   // CHECK: load <2 x i64>, ptr %{{.*}}, align 16, !nontemporal
 358   return _mm_stream_load_si128(a);
 359 }
 360
 361 __m128i test_mm_stream_load_si128_void(const void *a) {
 362   // CHECK-LABEL: test_mm_stream_load_si128_void
 363   // CHECK: load <2 x i64>, ptr %{{.*}}, align 16, !nontemporal
 364   return _mm_stream_load_si128(a);
 365 }
 366
 367 int test_mm_test_all_ones(__m128i x) {
 368   // CHECK-LABEL: test_mm_test_all_ones
 369   // CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 370   return _mm_test_all_ones(x);
 371 }
 372
 373 int test_mm_test_all_zeros(__m128i x, __m128i y) {
 374   // CHECK-LABEL: test_mm_test_all_zeros
 375   // CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 376   return _mm_test_all_zeros(x, y);
 377 }
 378
 379 int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) {
 380   // CHECK-LABEL: test_mm_test_mix_ones_zeros
 381   // CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 382   return _mm_test_mix_ones_zeros(x, y);
 383 }
 384
 385 int test_mm_testc_si128(__m128i x, __m128i y) {
 386   // CHECK-LABEL: test_mm_testc_si128
 387   // CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 388   return _mm_testc_si128(x, y);
 389 }
 390
 391 int test_mm_testnzc_si128(__m128i x, __m128i y) {
 392   // CHECK-LABEL: test_mm_testnzc_si128
 393   // CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 394   return _mm_testnzc_si128(x, y);
 395 }
 396
 397 int test_mm_testz_si128(__m128i x, __m128i y) {
 398   // CHECK-LABEL: test_mm_testz_si128
 399   // CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 400   return _mm_testz_si128(x, y);
 401 }
 402
 403 // Make sure brackets work after macro intrinsics.
 404 float pr51324(__m128 a) {
 405   // CHECK-LABEL: pr51324
 406   // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 0)
 407   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
 408   return _mm_round_ps(a, 0)[0];
 409 }
 410
 411 // Ensure _mm_test_all_ones macro doesn't reuse argument
 412 __m128i expensive_call();
 413 int pr60006() {
 414   // CHECK-LABEL: pr60006
 415   // CHECK: call {{.*}} @expensive_call
 416   // CHECK-NOT: call {{.*}} @expensive_call
 417   // CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 418   return _mm_test_all_ones(expensive_call());
 419 }