clang/test/CodeGen/AArch64/v8.1a-neon-intrinsics.c

   1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
   2 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
   3 // RUN:  -target-feature +v8.1a -emit-llvm -disable-O0-optnone -o - %s | opt -passes=mem2reg,dce -S | FileCheck %s
   4
   5 // REQUIRES: aarch64-registered-target
   6
   7  #include <arm_neon.h>
   8
   9 // CHECK-LABEL: @test_vqrdmlah_laneq_s16(
  10 // CHECK-NEXT:  entry:
  11 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
  12 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
  13 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
  14 // CHECK-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
  15 // CHECK-NEXT:    ret <4 x i16> [[VQRDMLAH_V3_I]]
  16 //
  17 int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
  18   return vqrdmlah_laneq_s16(a, b, v, 7);
  19 }
  20
  21 // CHECK-LABEL: @test_vqrdmlah_laneq_s32(
  22 // CHECK-NEXT:  entry:
  23 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
  24 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
  25 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
  26 // CHECK-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
  27 // CHECK-NEXT:    ret <2 x i32> [[VQRDMLAH_V3_I]]
  28 //
  29 int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
  30   return vqrdmlah_laneq_s32(a, b, v, 3);
  31 }
  32
  33 // CHECK-LABEL: @test_vqrdmlahq_laneq_s16(
  34 // CHECK-NEXT:  entry:
  35 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
  36 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
  37 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
  38 // CHECK-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
  39 // CHECK-NEXT:    ret <8 x i16> [[VQRDMLAHQ_V3_I]]
  40 //
  41 int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
  42   return vqrdmlahq_laneq_s16(a, b, v, 7);
  43 }
  44
  45 // CHECK-LABEL: @test_vqrdmlahq_laneq_s32(
  46 // CHECK-NEXT:  entry:
  47 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
  48 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
  49 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
  50 // CHECK-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
  51 // CHECK-NEXT:    ret <4 x i32> [[VQRDMLAHQ_V3_I]]
  52 //
  53 int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
  54   return vqrdmlahq_laneq_s32(a, b, v, 3);
  55 }
  56
  57 // CHECK-LABEL: @test_vqrdmlahh_s16(
  58 // CHECK-NEXT:  entry:
  59 // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
  60 // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
  61 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i64 0
  62 // CHECK-NEXT:    [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
  63 // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0
  64 // CHECK-NEXT:    ret i16 [[TMP3]]
  65 //
  66 int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) {
  67   return vqrdmlahh_s16(a, b, c);
  68 }
  69
  70 // CHECK-LABEL: @test_vqrdmlahs_s32(
  71 // CHECK-NEXT:  entry:
  72 // CHECK-NEXT:    [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
  73 // CHECK-NEXT:    ret i32 [[VQRDMLAHS_S32_I]]
  74 //
  75 int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) {
  76   return vqrdmlahs_s32(a, b, c);
  77 }
  78
  79 // CHECK-LABEL: @test_vqrdmlahh_lane_s16(
  80 // CHECK-NEXT:  entry:
  81 // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3
  82 // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
  83 // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
  84 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
  85 // CHECK-NEXT:    [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
  86 // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0
  87 // CHECK-NEXT:    ret i16 [[TMP3]]
  88 //
  89 int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
  90   return vqrdmlahh_lane_s16(a, b, c, 3);
  91 }
  92
  93 // CHECK-LABEL: @test_vqrdmlahs_lane_s32(
  94 // CHECK-NEXT:  entry:
  95 // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1
  96 // CHECK-NEXT:    [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGET_LANE]])
  97 // CHECK-NEXT:    ret i32 [[VQRDMLAHS_S32_I]]
  98 //
  99 int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
 100   return vqrdmlahs_lane_s32(a, b, c, 1);
 101 }
 102
 103 // CHECK-LABEL: @test_vqrdmlahh_laneq_s16(
 104 // CHECK-NEXT:  entry:
 105 // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7
 106 // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
 107 // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
 108 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
 109 // CHECK-NEXT:    [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
 110 // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0
 111 // CHECK-NEXT:    ret i16 [[TMP3]]
 112 //
 113 int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
 114   return vqrdmlahh_laneq_s16(a, b, c, 7);
 115 }
 116
 117 // CHECK-LABEL: @test_vqrdmlahs_laneq_s32(
 118 // CHECK-NEXT:  entry:
 119 // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
 120 // CHECK-NEXT:    [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGETQ_LANE]])
 121 // CHECK-NEXT:    ret i32 [[VQRDMLAHS_S32_I]]
 122 //
 123 int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
 124   return vqrdmlahs_laneq_s32(a, b, c, 3);
 125 }
 126
 127 // CHECK-LABEL: @test_vqrdmlsh_laneq_s16(
 128 // CHECK-NEXT:  entry:
 129 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
 130 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 131 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
 132 // CHECK-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
 133 // CHECK-NEXT:    ret <4 x i16> [[VQRDMLSH_V3_I]]
 134 //
 135 int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
 136   return vqrdmlsh_laneq_s16(a, b, v, 7);
 137 }
 138
 139 // CHECK-LABEL: @test_vqrdmlsh_laneq_s32(
 140 // CHECK-NEXT:  entry:
 141 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
 142 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 143 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
 144 // CHECK-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
 145 // CHECK-NEXT:    ret <2 x i32> [[VQRDMLSH_V3_I]]
 146 //
 147 int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
 148   return vqrdmlsh_laneq_s32(a, b, v, 3);
 149 }
 150
 151 // CHECK-LABEL: @test_vqrdmlshq_laneq_s16(
 152 // CHECK-NEXT:  entry:
 153 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
 154 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 155 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
 156 // CHECK-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
 157 // CHECK-NEXT:    ret <8 x i16> [[VQRDMLSHQ_V3_I]]
 158 //
 159 int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
 160   return vqrdmlshq_laneq_s16(a, b, v, 7);
 161 }
 162
 163 // CHECK-LABEL: @test_vqrdmlshq_laneq_s32(
 164 // CHECK-NEXT:  entry:
 165 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
 166 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 167 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 168 // CHECK-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
 169 // CHECK-NEXT:    ret <4 x i32> [[VQRDMLSHQ_V3_I]]
 170 //
 171 int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
 172   return vqrdmlshq_laneq_s32(a, b, v, 3);
 173 }
 174
 175 // CHECK-LABEL: @test_vqrdmlshh_s16(
 176 // CHECK-NEXT:  entry:
 177 // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
 178 // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
 179 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i64 0
 180 // CHECK-NEXT:    [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
 181 // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0
 182 // CHECK-NEXT:    ret i16 [[TMP3]]
 183 //
 184 int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) {
 185   return vqrdmlshh_s16(a, b, c);
 186 }
 187
 188 // CHECK-LABEL: @test_vqrdmlshs_s32(
 189 // CHECK-NEXT:  entry:
 190 // CHECK-NEXT:    [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
 191 // CHECK-NEXT:    ret i32 [[VQRDMLSHS_S32_I]]
 192 //
 193 int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) {
 194   return vqrdmlshs_s32(a, b, c);
 195 }
 196
 197 // CHECK-LABEL: @test_vqrdmlshh_lane_s16(
 198 // CHECK-NEXT:  entry:
 199 // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3
 200 // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
 201 // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
 202 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
 203 // CHECK-NEXT:    [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
 204 // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0
 205 // CHECK-NEXT:    ret i16 [[TMP3]]
 206 //
 207 int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
 208   return vqrdmlshh_lane_s16(a, b, c, 3);
 209 }
 210
 211 // CHECK-LABEL: @test_vqrdmlshs_lane_s32(
 212 // CHECK-NEXT:  entry:
 213 // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1
 214 // CHECK-NEXT:    [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGET_LANE]])
 215 // CHECK-NEXT:    ret i32 [[VQRDMLSHS_S32_I]]
 216 //
 217 int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
 218   return vqrdmlshs_lane_s32(a, b, c, 1);
 219 }
 220
 221 // CHECK-LABEL: @test_vqrdmlshh_laneq_s16(
 222 // CHECK-NEXT:  entry:
 223 // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7
 224 // CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
 225 // CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0
 226 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
 227 // CHECK-NEXT:    [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
 228 // CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0
 229 // CHECK-NEXT:    ret i16 [[TMP3]]
 230 //
 231 int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
 232   return vqrdmlshh_laneq_s16(a, b, c, 7);
 233 }
 234
 235 // CHECK-LABEL: @test_vqrdmlshs_laneq_s32(
 236 // CHECK-NEXT:  entry:
 237 // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
 238 // CHECK-NEXT:    [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGETQ_LANE]])
 239 // CHECK-NEXT:    ret i32 [[VQRDMLSHS_S32_I]]
 240 //
 241 int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
 242   return vqrdmlshs_laneq_s32(a, b, c, 3);
 243 }