clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c

   1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
   2 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
   3
   4 // REQUIRES: aarch64-registered-target || arm-registered-target
   5
   6 #include <arm_neon.h>
   7
   8
   9 // CHECK-LABEL: define{{.*}} float @test_vmuls_lane_f32(float noundef %a, <2 x float> noundef %b) #0 {
  10 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1
  11 // CHECK:   [[MUL:%.*]] = fmul float %a, [[VGET_LANE]]
  12 // CHECK:   ret float [[MUL]]
  13 float32_t test_vmuls_lane_f32(float32_t a, float32x2_t b) {
  14   return vmuls_lane_f32(a, b, 1);
  15 }
  16
  17 // CHECK-LABEL: define{{.*}} double @test_vmuld_lane_f64(double noundef %a, <1 x double> noundef %b) #0 {
  18 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0
  19 // CHECK:   [[MUL:%.*]] = fmul double %a, [[VGET_LANE]]
  20 // CHECK:   ret double [[MUL]]
  21 float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) {
  22   return vmuld_lane_f64(a, b, 0);
  23 }
  24
  25 // CHECK-LABEL: define{{.*}} float @test_vmuls_laneq_f32(float noundef %a, <4 x float> noundef %b) #0 {
  26 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3
  27 // CHECK:   [[MUL:%.*]] = fmul float %a, [[VGETQ_LANE]]
  28 // CHECK:   ret float [[MUL]]
  29 float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) {
  30   return vmuls_laneq_f32(a, b, 3);
  31 }
  32
  33 // CHECK-LABEL: define{{.*}} double @test_vmuld_laneq_f64(double noundef %a, <2 x double> noundef %b) #0 {
  34 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
  35 // CHECK:   [[MUL:%.*]] = fmul double %a, [[VGETQ_LANE]]
  36 // CHECK:   ret double [[MUL]]
  37 float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) {
  38   return vmuld_laneq_f64(a, b, 1);
  39 }
  40
  41 // CHECK-LABEL: define{{.*}} <1 x double> @test_vmul_n_f64(<1 x double> noundef %a, double noundef %b) #0 {
  42 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %a to double
  43 // CHECK:   [[TMP3:%.*]] = fmul double [[TMP2]], %b
  44 // CHECK:   [[TMP4:%.*]] = bitcast double [[TMP3]] to <1 x double>
  45 // CHECK:   ret <1 x double> [[TMP4]]
  46 float64x1_t test_vmul_n_f64(float64x1_t a, float64_t b) {
  47   return vmul_n_f64(a, b);
  48 }
  49
  50 // CHECK-LABEL: define{{.*}} float @test_vmulxs_lane_f32(float noundef %a, <2 x float> noundef %b) #0 {
  51 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1
  52 // CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGET_LANE]])
  53 // CHECK:   ret float [[VMULXS_F32_I]]
  54 float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) {
  55   return vmulxs_lane_f32(a, b, 1);
  56 }
  57
  58 // CHECK-LABEL: define{{.*}} float @test_vmulxs_laneq_f32(float noundef %a, <4 x float> noundef %b) #0 {
  59 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3
  60 // CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGETQ_LANE]])
  61 // CHECK:   ret float [[VMULXS_F32_I]]
  62 float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) {
  63   return vmulxs_laneq_f32(a, b, 3);
  64 }
  65
  66 // CHECK-LABEL: define{{.*}} double @test_vmulxd_lane_f64(double noundef %a, <1 x double> noundef %b) #0 {
  67 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0
  68 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGET_LANE]])
  69 // CHECK:   ret double [[VMULXD_F64_I]]
  70 float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) {
  71   return vmulxd_lane_f64(a, b, 0);
  72 }
  73
  74 // CHECK-LABEL: define{{.*}} double @test_vmulxd_laneq_f64(double noundef %a, <2 x double> noundef %b) #0 {
  75 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
  76 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGETQ_LANE]])
  77 // CHECK:   ret double [[VMULXD_F64_I]]
  78 float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
  79   return vmulxd_laneq_f64(a, b, 1);
  80 }
  81
  82 // CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64(<1 x double> noundef %a, <1 x double> noundef %b) #0 {
  83 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
  84 // CHECK:   [[VGET_LANE6:%.*]] = extractelement <1 x double> %b, i32 0
  85 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]])
  86 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
  87 // CHECK:   ret <1 x double> [[VSET_LANE]]
  88 float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
  89   return vmulx_lane_f64(a, b, 0);
  90 }
  91
  92
  93 // CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_0(<1 x double> noundef %a, <2 x double> noundef %b) #0 {
  94 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
  95 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 0
  96 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
  97 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
  98 // CHECK:   ret <1 x double> [[VSET_LANE]]
  99 float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
 100   return vmulx_laneq_f64(a, b, 0);
 101 }
 102
 103 // CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_1(<1 x double> noundef %a, <2 x double> noundef %b) #0 {
 104 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
 105 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
 106 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
 107 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
 108 // CHECK:   ret <1 x double> [[VSET_LANE]]
 109 float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) {
 110   return vmulx_laneq_f64(a, b, 1);
 111 }
 112
 113
 114 // CHECK-LABEL: define{{.*}} float @test_vfmas_lane_f32(float noundef %a, float noundef %b, <2 x float> noundef %c) #0 {
 115 // CHECK:   [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
 116 // CHECK:   [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a)
 117 // CHECK:   ret float [[TMP2]]
 118 float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
 119   return vfmas_lane_f32(a, b, c, 1);
 120 }
 121
 122 // CHECK-LABEL: define{{.*}} double @test_vfmad_lane_f64(double noundef %a, double noundef %b, <1 x double> noundef %c) #0 {
 123 // CHECK:   [[EXTRACT:%.*]] = extractelement <1 x double> %c, i32 0
 124 // CHECK:   [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
 125 // CHECK:   ret double [[TMP2]]
 126 float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
 127   return vfmad_lane_f64(a, b, c, 0);
 128 }
 129
 130 // CHECK-LABEL: define{{.*}} double @test_vfmad_laneq_f64(double noundef %a, double noundef %b, <2 x double> noundef %c) #0 {
 131 // CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> %c, i32 1
 132 // CHECK:   [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
 133 // CHECK:   ret double [[TMP2]]
 134 float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
 135   return vfmad_laneq_f64(a, b, c, 1);
 136 }
 137
 138 // CHECK-LABEL: define{{.*}} float @test_vfmss_lane_f32(float noundef %a, float noundef %b, <2 x float> noundef %c) #0 {
 139 // CHECK:   [[SUB:%.*]] = fneg float %b
 140 // CHECK:   [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
 141 // CHECK:   [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a)
 142 // CHECK:   ret float [[TMP2]]
 143 float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
 144   return vfmss_lane_f32(a, b, c, 1);
 145 }
 146
 147 // CHECK-LABEL: define{{.*}} <1 x double> @test_vfma_lane_f64(<1 x double> noundef %a, <1 x double> noundef %b, <1 x double> noundef %v) #0 {
 148 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 149 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
 150 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
 151 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
 152 // CHECK:   [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
 153 // CHECK:   [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
 154 // CHECK:   [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
 155 // CHECK:   [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
 156 // CHECK:   ret <1 x double> [[FMLA2]]
 157 float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
 158   return vfma_lane_f64(a, b, v, 0);
 159 }
 160
 161 // CHECK-LABEL: define{{.*}} <1 x double> @test_vfms_lane_f64(<1 x double> noundef %a, <1 x double> noundef %b, <1 x double> noundef %v) #0 {
 162 // CHECK:   [[SUB:%.*]] = fneg <1 x double> %b
 163 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 164 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
 165 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
 166 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
 167 // CHECK:   [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
 168 // CHECK:   [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
 169 // CHECK:   [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
 170 // CHECK:   [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
 171 // CHECK:   ret <1 x double> [[FMLA2]]
 172 float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
 173   return vfms_lane_f64(a, b, v, 0);
 174 }
 175
 176 // CHECK-LABEL: define{{.*}} <1 x double> @test_vfma_laneq_f64(<1 x double> noundef %a, <1 x double> noundef %b, <2 x double> noundef %v) #0 {
 177 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 178 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
 179 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
 180 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
 181 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
 182 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
 183 // CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
 184 // CHECK:   [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
 185 // CHECK:   [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
 186 // CHECK:   ret <1 x double> [[TMP7]]
 187 float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
 188   return vfma_laneq_f64(a, b, v, 0);
 189 }
 190
 191 // CHECK-LABEL: define{{.*}} <1 x double> @test_vfms_laneq_f64(<1 x double> noundef %a, <1 x double> noundef %b, <2 x double> noundef %v) #0 {
 192 // CHECK:   [[SUB:%.*]] = fneg <1 x double> %b
 193 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 194 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
 195 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
 196 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
 197 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
 198 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
 199 // CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
 200 // CHECK:   [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
 201 // CHECK:   [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
 202 // CHECK:   ret <1 x double> [[TMP7]]
 203 float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
 204   return vfms_laneq_f64(a, b, v, 0);
 205 }
 206
 207 // CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 {
 208 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
 209 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
 210 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
 211 // CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
 212 // CHECK:   [[TMP4:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
 213 // CHECK:   ret i32 [[TMP4]]
 214 int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) {
 215   return vqdmullh_lane_s16(a, b, 3);
 216 }
 217
 218 // CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 {
 219 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
 220 // CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGET_LANE]])
 221 // CHECK:   ret i64 [[VQDMULLS_S32_I]]
 222 int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) {
 223   return vqdmulls_lane_s32(a, b, 1);
 224 }
 225
 226 // CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #0 {
 227 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
 228 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
 229 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
 230 // CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
 231 // CHECK:   [[TMP4:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
 232 // CHECK:   ret i32 [[TMP4]]
 233 int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) {
 234   return vqdmullh_laneq_s16(a, b, 7);
 235 }
 236
 237 // CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #0 {
 238 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
 239 // CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGETQ_LANE]])
 240 // CHECK:   ret i64 [[VQDMULLS_S32_I]]
 241 int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) {
 242   return vqdmulls_laneq_s32(a, b, 3);
 243 }
 244
 245 // CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 {
 246 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
 247 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
 248 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
 249 // CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
 250 // CHECK:   [[TMP4:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
 251 // CHECK:   ret i16 [[TMP4]]
 252 int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) {
 253   return vqdmulhh_lane_s16(a, b, 3);
 254 }
 255
 256 // CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 {
 257 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
 258 // CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGET_LANE]])
 259 // CHECK:   ret i32 [[VQDMULHS_S32_I]]
 260 int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) {
 261   return vqdmulhs_lane_s32(a, b, 1);
 262 }
 263
 264
 265 // CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #0 {
 266 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
 267 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
 268 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
 269 // CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
 270 // CHECK:   [[TMP4:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
 271 // CHECK:   ret i16 [[TMP4]]
 272 int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) {
 273   return vqdmulhh_laneq_s16(a, b, 7);
 274 }
 275
 276
 277 // CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #0 {
 278 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
 279 // CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
 280 // CHECK:   ret i32 [[VQDMULHS_S32_I]]
 281 int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) {
 282   return vqdmulhs_laneq_s32(a, b, 3);
 283 }
 284
 285 // CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 {
 286 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
 287 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
 288 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
 289 // CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
 290 // CHECK:   [[TMP4:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
 291 // CHECK:   ret i16 [[TMP4]]
 292 int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) {
 293   return vqrdmulhh_lane_s16(a, b, 3);
 294 }
 295
 296 // CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 {
 297 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
 298 // CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGET_LANE]])
 299 // CHECK:   ret i32 [[VQRDMULHS_S32_I]]
 300 int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) {
 301   return vqrdmulhs_lane_s32(a, b, 1);
 302 }
 303
 304
 305 // CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #0 {
 306 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
 307 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
 308 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
 309 // CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
 310 // CHECK:   [[TMP4:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
 311 // CHECK:   ret i16 [[TMP4]]
 312 int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) {
 313   return vqrdmulhh_laneq_s16(a, b, 7);
 314 }
 315
 316
 317 // CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #0 {
 318 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
 319 // CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
 320 // CHECK:   ret i32 [[VQRDMULHS_S32_I]]
 321 int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) {
 322   return vqrdmulhs_laneq_s32(a, b, 3);
 323 }
 324
 325 // CHECK-LABEL: define{{.*}} i32 @test_vqdmlalh_lane_s16(i32 noundef %a, i16 noundef %b, <4 x i16> noundef %c) #0 {
 326 // CHECK:   [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3
 327 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
 328 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
 329 // CHECK:   [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
 330 // CHECK:   [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
 331 // CHECK:   [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0]])
 332 // CHECK:   ret i32 [[VQDMLXL1]]
 333 int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
 334   return vqdmlalh_lane_s16(a, b, c, 3);
 335 }
 336
 337 // CHECK-LABEL: define{{.*}} i64 @test_vqdmlals_lane_s32(i64 noundef %a, i32 noundef %b, <2 x i32> noundef %c) #0 {
 338 // CHECK:   [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1
 339 // CHECK:   [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
 340 // CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]])
 341 // CHECK:   ret i64 [[VQDMLXL1]]
 342 int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) {
 343   return vqdmlals_lane_s32(a, b, c, 1);
 344 }
 345
 346 // CHECK-LABEL: define{{.*}} i32 @test_vqdmlalh_laneq_s16(i32 noundef %a, i16 noundef %b, <8 x i16> noundef %c) #0 {
 347 // CHECK:   [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7
 348 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
 349 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
 350 // CHECK:   [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
 351 // CHECK:   [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
 352 // CHECK:   [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0]])
 353 // CHECK:   ret i32 [[VQDMLXL1]]
 354 int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
 355   return vqdmlalh_laneq_s16(a, b, c, 7);
 356 }
 357
 358 // CHECK-LABEL: define{{.*}} i64 @test_vqdmlals_laneq_s32(i64 noundef %a, i32 noundef %b, <4 x i32> noundef %c) #0 {
 359 // CHECK:   [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3
 360 // CHECK:   [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
 361 // CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]])
 362 // CHECK:   ret i64 [[VQDMLXL1]]
 363 int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
 364   return vqdmlals_laneq_s32(a, b, c, 3);
 365 }
 366
 367 // CHECK-LABEL: define{{.*}} i32 @test_vqdmlslh_lane_s16(i32 noundef %a, i16 noundef %b, <4 x i16> noundef %c) #0 {
 368 // CHECK:   [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3
 369 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
 370 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
 371 // CHECK:   [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
 372 // CHECK:   [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
 373 // CHECK:   [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0]])
 374 // CHECK:   ret i32 [[VQDMLXL1]]
 375 int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
 376   return vqdmlslh_lane_s16(a, b, c, 3);
 377 }
 378
 379 // CHECK-LABEL: define{{.*}} i64 @test_vqdmlsls_lane_s32(i64 noundef %a, i32 noundef %b, <2 x i32> noundef %c) #0 {
 380 // CHECK:   [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1
 381 // CHECK:   [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
 382 // CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]])
 383 // CHECK:   ret i64 [[VQDMLXL1]]
 384 int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) {
 385   return vqdmlsls_lane_s32(a, b, c, 1);
 386 }
 387
 388 // CHECK-LABEL: define{{.*}} i32 @test_vqdmlslh_laneq_s16(i32 noundef %a, i16 noundef %b, <8 x i16> noundef %c) #0 {
 389 // CHECK:   [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7
 390 // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
 391 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
 392 // CHECK:   [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
 393 // CHECK:   [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
 394 // CHECK:   [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0]])
 395 // CHECK:   ret i32 [[VQDMLXL1]]
 396 int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
 397   return vqdmlslh_laneq_s16(a, b, c, 7);
 398 }
 399
 400 // CHECK-LABEL: define{{.*}} i64 @test_vqdmlsls_laneq_s32(i64 noundef %a, i32 noundef %b, <4 x i32> noundef %c) #0 {
 401 // CHECK:   [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3
 402 // CHECK:   [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
 403 // CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]])
 404 // CHECK:   ret i64 [[VQDMLXL1]]
 405 int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
 406   return vqdmlsls_laneq_s32(a, b, c, 3);
 407 }
 408
 409 // CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64_0() #0 {
 410 // CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
 411 // CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
 412 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
 413 // CHECK:   [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
 414 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]])
 415 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
 416 // CHECK:   ret <1 x double> [[VSET_LANE]]
 417 float64x1_t test_vmulx_lane_f64_0() {
 418       float64x1_t arg1;
 419       float64x1_t arg2;
 420       float64x1_t result;
 421       float64_t sarg1, sarg2, sres;
 422       arg1 = vcreate_f64(UINT64_C(0x3fd6304bc43ab5c2));
 423       arg2 = vcreate_f64(UINT64_C(0x3fee211e215aeef3));
 424       result = vmulx_lane_f64(arg1, arg2, 0);
 425       return result;
 426 }
 427
 428 // CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_2() #0 {
 429 // CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
 430 // CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
 431 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
 432 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
 433 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1
 434 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
 435 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
 436 // CHECK:   ret <1 x double> [[VSET_LANE]]
 437 float64x1_t test_vmulx_laneq_f64_2() {
 438       float64x1_t arg1;
 439       float64x1_t arg2;
 440       float64x2_t arg3;
 441       float64x1_t result;
 442       float64_t sarg1, sarg2, sres;
 443       arg1 = vcreate_f64(UINT64_C(0x3fd6304bc43ab5c2));
 444       arg2 = vcreate_f64(UINT64_C(0x3fee211e215aeef3));
 445       arg3 = vcombine_f64(arg1, arg2);
 446       result = vmulx_laneq_f64(arg1, arg3, 1);
 447       return result;
 448 }