clang/test/CodeGen/X86/x86-nontemporal.c

   1 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -target-feature +avx -target-feature +avx2 -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK
   2 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -target-feature +avx -target-feature +avx2 -target-feature +avx512f -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK
   3
   4 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -target-feature +avx -target-feature +avx2 -target-feature +avx512f -emit-llvm -o - -Wall -Werror -fmax-type-align=16 | FileCheck %s --check-prefix=CHECK
   5 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -target-feature +avx -target-feature +avx2 -target-feature +avx512f -fno-signed-char -emit-llvm -o - -Wall -Werror -fmax-type-align=16 | FileCheck %s --check-prefix=CHECK
   6
   7 #include <immintrin.h>
   8
   9 // (PR33830) Tests ensure the correct alignment of non-temporal load/stores on darwin targets where fmax-type-align is set to 16.
  10
  11 //
  12 // 128-bit vectors
  13 //
  14
  15 void test_mm_stream_pd(double* A, __m128d B) {
  16   // CHECK-LABEL: test_mm_stream_pd
  17   // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
  18   _mm_stream_pd(A, B);
  19 }
  20
  21 void test_mm_stream_ps(float* A, __m128 B) {
  22   // CHECK16-LABEL: test_mm_stream_ps
  23   // CHECK16: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
  24   _mm_stream_ps(A, B);
  25 }
  26
  27 void test_mm_stream_si128(__m128i* A, __m128i B) {
  28   // CHECK-LABEL: test_mm_stream_si128
  29   // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
  30   _mm_stream_si128(A, B);
  31 }
  32
  33 __m128i test_mm_stream_load_si128(__m128i const *A) {
  34   // CHECK-LABEL: test_mm_stream_load_si128
  35   // CHECK: load <2 x i64>, ptr %{{.*}}, align 16, !nontemporal
  36   return _mm_stream_load_si128(A);
  37 }
  38
  39 //
  40 // 256-bit vectors
  41 //
  42
  43 void test_mm256_stream_pd(double* A, __m256d B) {
  44   // CHECK-LABEL: test_mm256_stream_pd
  45   // CHECK: store <4 x double> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
  46   _mm256_stream_pd(A, B);
  47 }
  48
  49 void test_mm256_stream_ps(float* A, __m256 B) {
  50   // CHECK-LABEL: test_mm256_stream_ps
  51   // CHECK: store <8 x float> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
  52   _mm256_stream_ps(A, B);
  53 }
  54
  55 void test_mm256_stream_si256(__m256i* A, __m256i B) {
  56   // CHECK-LABEL: test_mm256_stream_si256
  57   // CHECK: store <4 x i64> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
  58   _mm256_stream_si256(A, B);
  59 }
  60
  61 __m256i test_mm256_stream_load_si256(__m256i const *A) {
  62   // CHECK-LABEL: test_mm256_stream_load_si256
  63   // CHECK: load <4 x i64>, ptr %{{.*}}, align 32, !nontemporal
  64   return _mm256_stream_load_si256(A);
  65 }
  66
  67 //
  68 // 512-bit vectors
  69 //
  70
  71 void test_mm512_stream_pd(double* A, __m512d B) {
  72   // CHECK-LABEL: test_mm512_stream_pd
  73   // CHECK: store <8 x double> %{{.*}}, ptr %{{.*}}, align 64, !nontemporal
  74   _mm512_stream_pd(A, B);
  75 }
  76
  77 void test_mm512_stream_ps(float* A, __m512 B) {
  78   // CHECK-LABEL: test_mm512_stream_ps
  79   // CHECK: store <16 x float> %{{.*}}, ptr %{{.*}}, align 64, !nontemporal
  80   _mm512_stream_ps(A, B);
  81 }
  82
  83 void test_mm512_stream_si512(__m512i* A, __m512i B) {
  84   // CHECK-LABEL: test_mm512_stream_si512
  85   // CHECK: store <8 x i64> %{{.*}}, ptr %{{.*}}, align 64, !nontemporal
  86   _mm512_stream_si512(A, B);
  87 }
  88
  89 __m512i test_mm512_stream_load_si512(void *A) {
  90   // CHECK-LABEL: test_mm512_stream_load_si512
  91   // CHECK: load <8 x i64>, ptr %{{.*}}, align 64, !nontemporal
  92   return _mm512_stream_load_si512(A);
  93 }