clang/test/CodeGen/AArch64/neon-fma.c

   1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
   2 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
   3
   4 // REQUIRES: aarch64-registered-target || arm-registered-target
   5
   6 #include <arm_neon.h>
   7
   8 // CHECK-LABEL: define {{[^@]+}}@test_vmla_n_f32
   9 // CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
  10 // CHECK-NEXT:  entry:
  11 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[C]], i32 0
  12 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[C]], i32 1
  13 // CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x float> [[B]], [[VECINIT1_I]]
  14 // CHECK-NEXT:    [[ADD_I:%.*]] = fadd <2 x float> [[A]], [[MUL_I]]
  15 // CHECK-NEXT:    ret <2 x float> [[ADD_I]]
  16 //
  17 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
  18   return vmla_n_f32(a, b, c);
  19 }
  20
  21 // CHECK-LABEL: define {{[^@]+}}@test_vmlaq_n_f32
  22 // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] {
  23 // CHECK-NEXT:  entry:
  24 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[C]], i32 0
  25 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C]], i32 1
  26 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C]], i32 2
  27 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C]], i32 3
  28 // CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[B]], [[VECINIT3_I]]
  29 // CHECK-NEXT:    [[ADD_I:%.*]] = fadd <4 x float> [[A]], [[MUL_I]]
  30 // CHECK-NEXT:    ret <4 x float> [[ADD_I]]
  31 //
  32 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
  33   return vmlaq_n_f32(a, b, c);
  34 }
  35
  36 // CHECK-LABEL: define {{[^@]+}}@test_vmlsq_n_f32
  37 // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] {
  38 // CHECK-NEXT:  entry:
  39 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[C]], i32 0
  40 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C]], i32 1
  41 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C]], i32 2
  42 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C]], i32 3
  43 // CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[B]], [[VECINIT3_I]]
  44 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <4 x float> [[A]], [[MUL_I]]
  45 // CHECK-NEXT:    ret <4 x float> [[SUB_I]]
  46 //
  47 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
  48   return vmlsq_n_f32(a, b, c);
  49 }
  50
  51 // CHECK-LABEL: define {{[^@]+}}@test_vmls_n_f32
  52 // CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] {
  53 // CHECK-NEXT:  entry:
  54 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[C]], i32 0
  55 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[C]], i32 1
  56 // CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x float> [[B]], [[VECINIT1_I]]
  57 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <2 x float> [[A]], [[MUL_I]]
  58 // CHECK-NEXT:    ret <2 x float> [[SUB_I]]
  59 //
  60 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
  61   return vmls_n_f32(a, b, c);
  62 }
  63
  64 // CHECK-LABEL: define {{[^@]+}}@test_vmla_lane_f32_0
  65 // CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
  66 // CHECK-NEXT:  entry:
  67 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
  68 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
  69 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
  70 // CHECK-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
  71 // CHECK-NEXT:    [[ADD:%.*]] = fadd <2 x float> [[A]], [[MUL]]
  72 // CHECK-NEXT:    ret <2 x float> [[ADD]]
  73 //
  74 float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
  75   return vmla_lane_f32(a, b, v, 0);
  76 }
  77
  78 // CHECK-LABEL: define {{[^@]+}}@test_vmlaq_lane_f32_0
  79 // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
  80 // CHECK-NEXT:  entry:
  81 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
  82 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
  83 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer
  84 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
  85 // CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x float> [[A]], [[MUL]]
  86 // CHECK-NEXT:    ret <4 x float> [[ADD]]
  87 //
  88 float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
  89   return vmlaq_lane_f32(a, b, v, 0);
  90 }
  91
  92 // CHECK-LABEL: define {{[^@]+}}@test_vmla_laneq_f32_0
  93 // CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
  94 // CHECK-NEXT:  entry:
  95 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
  96 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
  97 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer
  98 // CHECK-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
  99 // CHECK-NEXT:    [[ADD:%.*]] = fadd <2 x float> [[A]], [[MUL]]
 100 // CHECK-NEXT:    ret <2 x float> [[ADD]]
 101 //
 102 float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
 103   return vmla_laneq_f32(a, b, v, 0);
 104 }
 105
 106 // CHECK-LABEL: define {{[^@]+}}@test_vmlaq_laneq_f32_0
 107 // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 108 // CHECK-NEXT:  entry:
 109 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
 110 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 111 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
 112 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
 113 // CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x float> [[A]], [[MUL]]
 114 // CHECK-NEXT:    ret <4 x float> [[ADD]]
 115 //
 116 float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
 117   return vmlaq_laneq_f32(a, b, v, 0);
 118 }
 119
 120 // CHECK-LABEL: define {{[^@]+}}@test_vmls_lane_f32_0
 121 // CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 122 // CHECK-NEXT:  entry:
 123 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
 124 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
 125 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
 126 // CHECK-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
 127 // CHECK-NEXT:    [[SUB:%.*]] = fsub <2 x float> [[A]], [[MUL]]
 128 // CHECK-NEXT:    ret <2 x float> [[SUB]]
 129 //
 130 float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
 131   return vmls_lane_f32(a, b, v, 0);
 132 }
 133
 134 // CHECK-LABEL: define {{[^@]+}}@test_vmlsq_lane_f32_0
 135 // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 136 // CHECK-NEXT:  entry:
 137 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
 138 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
 139 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer
 140 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
 141 // CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x float> [[A]], [[MUL]]
 142 // CHECK-NEXT:    ret <4 x float> [[SUB]]
 143 //
 144 float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
 145   return vmlsq_lane_f32(a, b, v, 0);
 146 }
 147
 148 // CHECK-LABEL: define {{[^@]+}}@test_vmls_laneq_f32_0
 149 // CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 150 // CHECK-NEXT:  entry:
 151 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
 152 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 153 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer
 154 // CHECK-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
 155 // CHECK-NEXT:    [[SUB:%.*]] = fsub <2 x float> [[A]], [[MUL]]
 156 // CHECK-NEXT:    ret <2 x float> [[SUB]]
 157 //
 158 float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
 159   return vmls_laneq_f32(a, b, v, 0);
 160 }
 161
 162 // CHECK-LABEL: define {{[^@]+}}@test_vmlsq_laneq_f32_0
 163 // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 164 // CHECK-NEXT:  entry:
 165 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
 166 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 167 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
 168 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
 169 // CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x float> [[A]], [[MUL]]
 170 // CHECK-NEXT:    ret <4 x float> [[SUB]]
 171 //
 172 float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
 173   return vmlsq_laneq_f32(a, b, v, 0);
 174 }
 175
 176 // CHECK-LABEL: define {{[^@]+}}@test_vmla_lane_f32
 177 // CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 178 // CHECK-NEXT:  entry:
 179 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
 180 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
 181 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
 182 // CHECK-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
 183 // CHECK-NEXT:    [[ADD:%.*]] = fadd <2 x float> [[A]], [[MUL]]
 184 // CHECK-NEXT:    ret <2 x float> [[ADD]]
 185 //
 186 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
 187   return vmla_lane_f32(a, b, v, 1);
 188 }
 189
 190 // CHECK-LABEL: define {{[^@]+}}@test_vmlaq_lane_f32
 191 // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 192 // CHECK-NEXT:  entry:
 193 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
 194 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
 195 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 196 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
 197 // CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x float> [[A]], [[MUL]]
 198 // CHECK-NEXT:    ret <4 x float> [[ADD]]
 199 //
 200 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
 201   return vmlaq_lane_f32(a, b, v, 1);
 202 }
 203
 204 // CHECK-LABEL: define {{[^@]+}}@test_vmla_laneq_f32
 205 // CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 206 // CHECK-NEXT:  entry:
 207 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
 208 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 209 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3>
 210 // CHECK-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
 211 // CHECK-NEXT:    [[ADD:%.*]] = fadd <2 x float> [[A]], [[MUL]]
 212 // CHECK-NEXT:    ret <2 x float> [[ADD]]
 213 //
 214 float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
 215   return vmla_laneq_f32(a, b, v, 3);
 216 }
 217
 218 // CHECK-LABEL: define {{[^@]+}}@test_vmlaq_laneq_f32
 219 // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 220 // CHECK-NEXT:  entry:
 221 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
 222 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 223 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 224 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
 225 // CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x float> [[A]], [[MUL]]
 226 // CHECK-NEXT:    ret <4 x float> [[ADD]]
 227 //
 228 float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
 229   return vmlaq_laneq_f32(a, b, v, 3);
 230 }
 231
 232 // CHECK-LABEL: define {{[^@]+}}@test_vmls_lane_f32
 233 // CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 234 // CHECK-NEXT:  entry:
 235 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
 236 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
 237 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
 238 // CHECK-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
 239 // CHECK-NEXT:    [[SUB:%.*]] = fsub <2 x float> [[A]], [[MUL]]
 240 // CHECK-NEXT:    ret <2 x float> [[SUB]]
 241 //
 242 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
 243   return vmls_lane_f32(a, b, v, 1);
 244 }
 245
 246 // CHECK-LABEL: define {{[^@]+}}@test_vmlsq_lane_f32
 247 // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 248 // CHECK-NEXT:  entry:
 249 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
 250 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
 251 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 252 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
 253 // CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x float> [[A]], [[MUL]]
 254 // CHECK-NEXT:    ret <4 x float> [[SUB]]
 255 //
 256 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
 257   return vmlsq_lane_f32(a, b, v, 1);
 258 }
 259 // CHECK-LABEL: define {{[^@]+}}@test_vmls_laneq_f32
 260 // CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 261 // CHECK-NEXT:  entry:
 262 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
 263 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 264 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3>
 265 // CHECK-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
 266 // CHECK-NEXT:    [[SUB:%.*]] = fsub <2 x float> [[A]], [[MUL]]
 267 // CHECK-NEXT:    ret <2 x float> [[SUB]]
 268 //
 269 float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
 270   return vmls_laneq_f32(a, b, v, 3);
 271 }
 272
 273 // CHECK-LABEL: define {{[^@]+}}@test_vmlsq_laneq_f32
 274 // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
 275 // CHECK-NEXT:  entry:
 276 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
 277 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 278 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 279 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
 280 // CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x float> [[A]], [[MUL]]
 281 // CHECK-NEXT:    ret <4 x float> [[SUB]]
 282 //
 283 float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
 284   return vmlsq_laneq_f32(a, b, v, 3);
 285 }
 286
 287 // CHECK-LABEL: define {{[^@]+}}@test_vfmaq_n_f64
 288 // CHECK-SAME: (<2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]], double noundef [[C:%.*]]) #[[ATTR0]] {
 289 // CHECK-NEXT:  entry:
 290 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
 291 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C]], i32 1
 292 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <16 x i8>
 293 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <16 x i8>
 294 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8>
 295 // CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[B]], <2 x double> [[VECINIT1_I]], <2 x double> [[A]])
 296 // CHECK-NEXT:    ret <2 x double> [[TMP3]]
 297 //
 298 float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
 299   return vfmaq_n_f64(a, b, c);
 300 }
 301
 302 // CHECK-LABEL: define {{[^@]+}}@test_vfmsq_n_f64
 303 // CHECK-SAME: (<2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]], double noundef [[C:%.*]]) #[[ATTR0]] {
 304 // CHECK-NEXT:  entry:
 305 // CHECK-NEXT:    [[FNEG_I:%.*]] = fneg <2 x double> [[B]]
 306 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
 307 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C]], i32 1
 308 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <16 x i8>
 309 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <16 x i8>
 310 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8>
 311 // CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FNEG_I]], <2 x double> [[VECINIT1_I]], <2 x double> [[A]])
 312 // CHECK-NEXT:    ret <2 x double> [[TMP3]]
 313 //
 314 float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
 315   return vfmsq_n_f64(a, b, c);
 316 }
 317