clang/test/CodeGen/AArch64/neon-ldst-one-rcpc3.c

   1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
   2 // RUN: %clang_cc1 -triple aarch64 -target-feature +neon \
   3 // RUN:  -target-feature +rcpc3 -disable-O0-optnone -emit-llvm -o - %s \
   4 // RUN: | opt -S -passes=mem2reg | FileCheck %s
   5
   6 // REQUIRES: aarch64-registered-target
   7
   8 #include <arm_neon.h>
   9
  10
  11 // CHECK-LABEL: @test_vldap1q_lane_u64(
  12 // CHECK-NEXT:  entry:
  13 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
  14 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
  15 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
  16 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
  17 // CHECK-NEXT:    ret <2 x i64> [[VLDAP1_LANE]]
  18 //
  19 uint64x2_t test_vldap1q_lane_u64(uint64_t  *a, uint64x2_t b) {
  20   return vldap1q_lane_u64(a, b, 1);
  21 }
  22
  23 // CHECK-LABEL: @test_vldap1q_lane_s64(
  24 // CHECK-NEXT:  entry:
  25 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
  26 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
  27 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
  28 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
  29 // CHECK-NEXT:    ret <2 x i64> [[VLDAP1_LANE]]
  30 //
  31 int64x2_t test_vldap1q_lane_s64(int64_t  *a, int64x2_t b) {
  32   return vldap1q_lane_s64(a, b, 1);
  33 }
  34
  35 // CHECK-LABEL: @test_vldap1q_lane_f64(
  36 // CHECK-NEXT:  entry:
  37 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8>
  38 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
  39 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8
  40 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x double> [[TMP1]], double [[TMP2]], i32 1
  41 // CHECK-NEXT:    ret <2 x double> [[VLDAP1_LANE]]
  42 //
  43 float64x2_t test_vldap1q_lane_f64(float64_t  *a, float64x2_t b) {
  44   return vldap1q_lane_f64(a, b, 1);
  45 }
  46
  47 // CHECK-LABEL: @test_vldap1q_lane_p64(
  48 // CHECK-NEXT:  entry:
  49 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
  50 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
  51 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
  52 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
  53 // CHECK-NEXT:    ret <2 x i64> [[VLDAP1_LANE]]
  54 //
  55 poly64x2_t test_vldap1q_lane_p64(poly64_t  *a, poly64x2_t b) {
  56   return vldap1q_lane_p64(a, b, 1);
  57 }
  58
  59 // CHECK-LABEL: @test_vldap1_lane_u64(
  60 // CHECK-NEXT:  entry:
  61 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
  62 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
  63 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
  64 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
  65 // CHECK-NEXT:    ret <1 x i64> [[VLDAP1_LANE]]
  66 //
  67 uint64x1_t test_vldap1_lane_u64(uint64_t  *a, uint64x1_t b) {
  68   return vldap1_lane_u64(a, b, 0);
  69 }
  70
  71 // CHECK-LABEL: @test_vldap1_lane_s64(
  72 // CHECK-NEXT:  entry:
  73 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
  74 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
  75 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
  76 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
  77 // CHECK-NEXT:    ret <1 x i64> [[VLDAP1_LANE]]
  78 //
  79 int64x1_t test_vldap1_lane_s64(int64_t  *a, int64x1_t b) {
  80   return vldap1_lane_s64(a, b, 0);
  81 }
  82
  83 // CHECK-LABEL: @test_vldap1_lane_f64(
  84 // CHECK-NEXT:  entry:
  85 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8>
  86 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
  87 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8
  88 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x double> [[TMP1]], double [[TMP2]], i32 0
  89 // CHECK-NEXT:    ret <1 x double> [[VLDAP1_LANE]]
  90 //
  91 float64x1_t test_vldap1_lane_f64(float64_t  *a, float64x1_t b) {
  92   return vldap1_lane_f64(a, b, 0);
  93 }
  94
  95 // CHECK-LABEL: @test_vldap1_lane_p64(
  96 // CHECK-NEXT:  entry:
  97 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
  98 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
  99 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
 100 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
 101 // CHECK-NEXT:    ret <1 x i64> [[VLDAP1_LANE]]
 102 //
 103 poly64x1_t test_vldap1_lane_p64(poly64_t  *a, poly64x1_t b) {
 104   return vldap1_lane_p64(a, b, 0);
 105 }
 106
 107 // CHECK-LABEL: @test_vstl1q_lane_u64(
 108 // CHECK-NEXT:  entry:
 109 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
 110 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 111 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
 112 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
 113 // CHECK-NEXT:    ret void
 114 //
 115 void test_vstl1q_lane_u64(uint64_t  *a, uint64x2_t b) {
 116   vstl1q_lane_u64(a, b, 1);
 117 }
 118
 119 // CHECK-LABEL: @test_vstl1q_lane_s64(
 120 // CHECK-NEXT:  entry:
 121 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
 122 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 123 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
 124 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
 125 // CHECK-NEXT:    ret void
 126 //
 127 void test_vstl1q_lane_s64(int64_t  *a, int64x2_t b) {
 128   vstl1q_lane_s64(a, b, 1);
 129 }
 130
 131 // CHECK-LABEL: @test_vstl1q_lane_f64(
 132 // CHECK-NEXT:  entry:
 133 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8>
 134 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
 135 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
 136 // CHECK-NEXT:    store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8
 137 // CHECK-NEXT:    ret void
 138 //
 139 void test_vstl1q_lane_f64(float64_t  *a, float64x2_t b) {
 140   vstl1q_lane_f64(a, b, 1);
 141 }
 142
 143 // CHECK-LABEL: @test_vstl1q_lane_p64(
 144 // CHECK-NEXT:  entry:
 145 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
 146 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 147 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
 148 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
 149 // CHECK-NEXT:    ret void
 150 //
 151 void test_vstl1q_lane_p64(poly64_t  *a, poly64x2_t b) {
 152   vstl1q_lane_p64(a, b, 1);
 153 }
 154
 155 // CHECK-LABEL: @test_vstl1_lane_u64(
 156 // CHECK-NEXT:  entry:
 157 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
 158 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
 159 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
 160 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
 161 // CHECK-NEXT:    ret void
 162 //
 163 void test_vstl1_lane_u64(uint64_t  *a, uint64x1_t b) {
 164   vstl1_lane_u64(a, b, 0);
 165 }
 166
 167 // CHECK-LABEL: @test_vstl1_lane_s64(
 168 // CHECK-NEXT:  entry:
 169 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
 170 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
 171 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
 172 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
 173 // CHECK-NEXT:    ret void
 174 //
 175 void test_vstl1_lane_s64(int64_t  *a, int64x1_t b) {
 176   vstl1_lane_s64(a, b, 0);
 177 }
 178
 179 // CHECK-LABEL: @test_vstl1_lane_f64(
 180 // CHECK-NEXT:  entry:
 181 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8>
 182 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
 183 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
 184 // CHECK-NEXT:    store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8
 185 // CHECK-NEXT:    ret void
 186 //
 187 void test_vstl1_lane_f64(float64_t  *a, float64x1_t b) {
 188   vstl1_lane_f64(a, b, 0);
 189 }
 190
 191 // CHECK-LABEL: @test_vstl1_lane_p64(
 192 // CHECK-NEXT:  entry:
 193 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
 194 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
 195 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
 196 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
 197 // CHECK-NEXT:    ret void
 198 //
 199 void test_vstl1_lane_p64(poly64_t  *a, poly64x1_t b) {
 200   vstl1_lane_p64(a, b, 0);
 201 }