clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c

   1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
   2 // RUN: %clang_cc1 -triple armv8.1a-linux-gnu -target-abi apcs-gnu -target-feature +neon \
   3 // RUN:  -emit-llvm -o - %s -disable-O0-optnone | opt -passes=mem2reg,dce -S \
   4 // RUN:  | FileCheck %s --check-prefix=CHECK-ARM
   5
   6 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
   7 // RUN:  -target-feature +v8.1a -emit-llvm -o - %s -disable-O0-optnone | opt -passes=mem2reg,dce -S \
   8 // RUN:  | FileCheck %s --check-prefix=CHECK-AARCH64
   9
  10 // REQUIRES: arm-registered-target,aarch64-registered-target
  11
  12 #include <arm_neon.h>
  13
  14 // CHECK-ARM-LABEL: @test_vqrdmlah_s16(
  15 // CHECK-ARM-NEXT:  entry:
  16 // CHECK-ARM-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]])
  17 // CHECK-ARM-NEXT:    ret <4 x i16> [[VQRDMLAH_V3_I]]
  18 //
  19 // CHECK-AARCH64-LABEL: @test_vqrdmlah_s16(
  20 // CHECK-AARCH64-NEXT:  entry:
  21 // CHECK-AARCH64-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]])
  22 // CHECK-AARCH64-NEXT:    ret <4 x i16> [[VQRDMLAH_V3_I]]
  23 //
  24 int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
  25
  26   return vqrdmlah_s16(a, b, c);
  27 }
  28
  29 // CHECK-ARM-LABEL: @test_vqrdmlah_s32(
  30 // CHECK-ARM-NEXT:  entry:
  31 // CHECK-ARM-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]])
  32 // CHECK-ARM-NEXT:    ret <2 x i32> [[VQRDMLAH_V3_I]]
  33 //
  34 // CHECK-AARCH64-LABEL: @test_vqrdmlah_s32(
  35 // CHECK-AARCH64-NEXT:  entry:
  36 // CHECK-AARCH64-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]])
  37 // CHECK-AARCH64-NEXT:    ret <2 x i32> [[VQRDMLAH_V3_I]]
  38 //
  39 int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
  40
  41   return vqrdmlah_s32(a, b, c);
  42 }
  43
  44 // CHECK-ARM-LABEL: @test_vqrdmlahq_s16(
  45 // CHECK-ARM-NEXT:  entry:
  46 // CHECK-ARM-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
  47 // CHECK-ARM-NEXT:    ret <8 x i16> [[VQRDMLAHQ_V3_I]]
  48 //
  49 // CHECK-AARCH64-LABEL: @test_vqrdmlahq_s16(
  50 // CHECK-AARCH64-NEXT:  entry:
  51 // CHECK-AARCH64-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
  52 // CHECK-AARCH64-NEXT:    ret <8 x i16> [[VQRDMLAHQ_V3_I]]
  53 //
  54 int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
  55
  56   return vqrdmlahq_s16(a, b, c);
  57 }
  58
  59 // CHECK-ARM-LABEL: @test_vqrdmlahq_s32(
  60 // CHECK-ARM-NEXT:  entry:
  61 // CHECK-ARM-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
  62 // CHECK-ARM-NEXT:    ret <4 x i32> [[VQRDMLAHQ_V3_I]]
  63 //
  64 // CHECK-AARCH64-LABEL: @test_vqrdmlahq_s32(
  65 // CHECK-AARCH64-NEXT:  entry:
  66 // CHECK-AARCH64-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
  67 // CHECK-AARCH64-NEXT:    ret <4 x i32> [[VQRDMLAHQ_V3_I]]
  68 //
  69 int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
  70
  71   return vqrdmlahq_s32(a, b, c);
  72 }
  73
  74 // CHECK-ARM-LABEL: @test_vqrdmlah_lane_s16(
  75 // CHECK-ARM-NEXT:  entry:
  76 // CHECK-ARM-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
  77 // CHECK-ARM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
  78 // CHECK-ARM-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
  79 // CHECK-ARM-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
  80 // CHECK-ARM-NEXT:    ret <4 x i16> [[VQRDMLAH_V3_I]]
  81 //
  82 // CHECK-AARCH64-LABEL: @test_vqrdmlah_lane_s16(
  83 // CHECK-AARCH64-NEXT:  entry:
  84 // CHECK-AARCH64-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
  85 // CHECK-AARCH64-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
  86 // CHECK-AARCH64-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
  87 // CHECK-AARCH64-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
  88 // CHECK-AARCH64-NEXT:    ret <4 x i16> [[VQRDMLAH_V3_I]]
  89 //
  90 int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
  91
  92   return vqrdmlah_lane_s16(a, b, c, 3);
  93 }
  94
  95 // CHECK-ARM-LABEL: @test_vqrdmlah_lane_s32(
  96 // CHECK-ARM-NEXT:  entry:
  97 // CHECK-ARM-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
  98 // CHECK-ARM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
  99 // CHECK-ARM-NEXT:    [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
 100 // CHECK-ARM-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
 101 // CHECK-ARM-NEXT:    ret <2 x i32> [[VQRDMLAH_V3_I]]
 102 //
 103 // CHECK-AARCH64-LABEL: @test_vqrdmlah_lane_s32(
 104 // CHECK-AARCH64-NEXT:  entry:
 105 // CHECK-AARCH64-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
 106 // CHECK-AARCH64-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 107 // CHECK-AARCH64-NEXT:    [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
 108 // CHECK-AARCH64-NEXT:    [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
 109 // CHECK-AARCH64-NEXT:    ret <2 x i32> [[VQRDMLAH_V3_I]]
 110 //
 111 int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
 112
 113   return vqrdmlah_lane_s32(a, b, c, 1);
 114 }
 115
 116 // CHECK-ARM-LABEL: @test_vqrdmlahq_lane_s16(
 117 // CHECK-ARM-NEXT:  entry:
 118 // CHECK-ARM-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
 119 // CHECK-ARM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 120 // CHECK-ARM-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 121 // CHECK-ARM-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
 122 // CHECK-ARM-NEXT:    ret <8 x i16> [[VQRDMLAHQ_V3_I]]
 123 //
 124 // CHECK-AARCH64-LABEL: @test_vqrdmlahq_lane_s16(
 125 // CHECK-AARCH64-NEXT:  entry:
 126 // CHECK-AARCH64-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
 127 // CHECK-AARCH64-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 128 // CHECK-AARCH64-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 129 // CHECK-AARCH64-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
 130 // CHECK-AARCH64-NEXT:    ret <8 x i16> [[VQRDMLAHQ_V3_I]]
 131 //
 132 int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
 133
 134   return vqrdmlahq_lane_s16(a, b, c, 3);
 135 }
 136
 137 // CHECK-ARM-LABEL: @test_vqrdmlahq_lane_s32(
 138 // CHECK-ARM-NEXT:  entry:
 139 // CHECK-ARM-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
 140 // CHECK-ARM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 141 // CHECK-ARM-NEXT:    [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 142 // CHECK-ARM-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
 143 // CHECK-ARM-NEXT:    ret <4 x i32> [[VQRDMLAHQ_V3_I]]
 144 //
 145 // CHECK-AARCH64-LABEL: @test_vqrdmlahq_lane_s32(
 146 // CHECK-AARCH64-NEXT:  entry:
 147 // CHECK-AARCH64-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
 148 // CHECK-AARCH64-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 149 // CHECK-AARCH64-NEXT:    [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 150 // CHECK-AARCH64-NEXT:    [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
 151 // CHECK-AARCH64-NEXT:    ret <4 x i32> [[VQRDMLAHQ_V3_I]]
 152 //
 153 int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
 154
 155   return vqrdmlahq_lane_s32(a, b, c, 1);
 156 }
 157
 158 // CHECK-ARM-LABEL: @test_vqrdmlsh_s16(
 159 // CHECK-ARM-NEXT:  entry:
 160 // CHECK-ARM-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]])
 161 // CHECK-ARM-NEXT:    ret <4 x i16> [[VQRDMLSH_V3_I]]
 162 //
 163 // CHECK-AARCH64-LABEL: @test_vqrdmlsh_s16(
 164 // CHECK-AARCH64-NEXT:  entry:
 165 // CHECK-AARCH64-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]])
 166 // CHECK-AARCH64-NEXT:    ret <4 x i16> [[VQRDMLSH_V3_I]]
 167 //
 168 int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
 169
 170   return vqrdmlsh_s16(a, b, c);
 171 }
 172
 173 // CHECK-ARM-LABEL: @test_vqrdmlsh_s32(
 174 // CHECK-ARM-NEXT:  entry:
 175 // CHECK-ARM-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]])
 176 // CHECK-ARM-NEXT:    ret <2 x i32> [[VQRDMLSH_V3_I]]
 177 //
 178 // CHECK-AARCH64-LABEL: @test_vqrdmlsh_s32(
 179 // CHECK-AARCH64-NEXT:  entry:
 180 // CHECK-AARCH64-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]])
 181 // CHECK-AARCH64-NEXT:    ret <2 x i32> [[VQRDMLSH_V3_I]]
 182 //
 183 int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
 184
 185   return vqrdmlsh_s32(a, b, c);
 186 }
 187
 188 // CHECK-ARM-LABEL: @test_vqrdmlshq_s16(
 189 // CHECK-ARM-NEXT:  entry:
 190 // CHECK-ARM-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
 191 // CHECK-ARM-NEXT:    ret <8 x i16> [[VQRDMLSHQ_V3_I]]
 192 //
 193 // CHECK-AARCH64-LABEL: @test_vqrdmlshq_s16(
 194 // CHECK-AARCH64-NEXT:  entry:
 195 // CHECK-AARCH64-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
 196 // CHECK-AARCH64-NEXT:    ret <8 x i16> [[VQRDMLSHQ_V3_I]]
 197 //
 198 int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
 199
 200   return vqrdmlshq_s16(a, b, c);
 201 }
 202
 203 // CHECK-ARM-LABEL: @test_vqrdmlshq_s32(
 204 // CHECK-ARM-NEXT:  entry:
 205 // CHECK-ARM-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
 206 // CHECK-ARM-NEXT:    ret <4 x i32> [[VQRDMLSHQ_V3_I]]
 207 //
 208 // CHECK-AARCH64-LABEL: @test_vqrdmlshq_s32(
 209 // CHECK-AARCH64-NEXT:  entry:
 210 // CHECK-AARCH64-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
 211 // CHECK-AARCH64-NEXT:    ret <4 x i32> [[VQRDMLSHQ_V3_I]]
 212 //
 213 int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
 214
 215   return vqrdmlshq_s32(a, b, c);
 216 }
 217
 218 // CHECK-ARM-LABEL: @test_vqrdmlsh_lane_s16(
 219 // CHECK-ARM-NEXT:  entry:
 220 // CHECK-ARM-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
 221 // CHECK-ARM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 222 // CHECK-ARM-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 223 // CHECK-ARM-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
 224 // CHECK-ARM-NEXT:    ret <4 x i16> [[VQRDMLSH_V3_I]]
 225 //
 226 // CHECK-AARCH64-LABEL: @test_vqrdmlsh_lane_s16(
 227 // CHECK-AARCH64-NEXT:  entry:
 228 // CHECK-AARCH64-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
 229 // CHECK-AARCH64-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 230 // CHECK-AARCH64-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 231 // CHECK-AARCH64-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
 232 // CHECK-AARCH64-NEXT:    ret <4 x i16> [[VQRDMLSH_V3_I]]
 233 //
 234 int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
 235
 236   return vqrdmlsh_lane_s16(a, b, c, 3);
 237 }
 238
 239 // CHECK-ARM-LABEL: @test_vqrdmlsh_lane_s32(
 240 // CHECK-ARM-NEXT:  entry:
 241 // CHECK-ARM-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
 242 // CHECK-ARM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 243 // CHECK-ARM-NEXT:    [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
 244 // CHECK-ARM-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
 245 // CHECK-ARM-NEXT:    ret <2 x i32> [[VQRDMLSH_V3_I]]
 246 //
 247 // CHECK-AARCH64-LABEL: @test_vqrdmlsh_lane_s32(
 248 // CHECK-AARCH64-NEXT:  entry:
 249 // CHECK-AARCH64-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
 250 // CHECK-AARCH64-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 251 // CHECK-AARCH64-NEXT:    [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
 252 // CHECK-AARCH64-NEXT:    [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
 253 // CHECK-AARCH64-NEXT:    ret <2 x i32> [[VQRDMLSH_V3_I]]
 254 //
 255 int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
 256
 257   return vqrdmlsh_lane_s32(a, b, c, 1);
 258 }
 259
 260 // CHECK-ARM-LABEL: @test_vqrdmlshq_lane_s16(
 261 // CHECK-ARM-NEXT:  entry:
 262 // CHECK-ARM-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
 263 // CHECK-ARM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 264 // CHECK-ARM-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 265 // CHECK-ARM-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
 266 // CHECK-ARM-NEXT:    ret <8 x i16> [[VQRDMLSHQ_V3_I]]
 267 //
 268 // CHECK-AARCH64-LABEL: @test_vqrdmlshq_lane_s16(
 269 // CHECK-AARCH64-NEXT:  entry:
 270 // CHECK-AARCH64-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
 271 // CHECK-AARCH64-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 272 // CHECK-AARCH64-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 273 // CHECK-AARCH64-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
 274 // CHECK-AARCH64-NEXT:    ret <8 x i16> [[VQRDMLSHQ_V3_I]]
 275 //
 276 int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
 277
 278   return vqrdmlshq_lane_s16(a, b, c, 3);
 279 }
 280
 281 // CHECK-ARM-LABEL: @test_vqrdmlshq_lane_s32(
 282 // CHECK-ARM-NEXT:  entry:
 283 // CHECK-ARM-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
 284 // CHECK-ARM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 285 // CHECK-ARM-NEXT:    [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 286 // CHECK-ARM-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
 287 // CHECK-ARM-NEXT:    ret <4 x i32> [[VQRDMLSHQ_V3_I]]
 288 //
 289 // CHECK-AARCH64-LABEL: @test_vqrdmlshq_lane_s32(
 290 // CHECK-AARCH64-NEXT:  entry:
 291 // CHECK-AARCH64-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
 292 // CHECK-AARCH64-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 293 // CHECK-AARCH64-NEXT:    [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 294 // CHECK-AARCH64-NEXT:    [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
 295 // CHECK-AARCH64-NEXT:    ret <4 x i32> [[VQRDMLSHQ_V3_I]]
 296 //
 297 int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
 298
 299   return vqrdmlshq_lane_s32(a, b, c, 1);
 300 }