llvm/test/CodeGen/AArch64/neon-vcmla.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
   2 ; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+fullfp16 -o - | FileCheck %s
   3
   4 define <4 x half> @test_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
   5 ; CHECK-LABEL: test_16x4:
   6 ; CHECK:       // %bb.0: // %entry
   7 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #0
   8 ; CHECK-NEXT:    ret
   9 entry:
  10   %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
  11   ret <4 x half> %res
  12 }
  13
  14 define <4 x half> @test_16x4_lane_1(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
  15 ; CHECK-LABEL: test_16x4_lane_1:
  16 ; CHECK:       // %bb.0: // %entry
  17 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
  18 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.h[1], #0
  19 ; CHECK-NEXT:    ret
  20 entry:
  21   %c.cast = bitcast <4 x half> %c to <2 x i32>
  22   %c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> <i32 1, i32 1>
  23   %c.res = bitcast <2 x i32> %c.dup to <4 x half>
  24   %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
  25   ret <4 x half> %res
  26 }
  27
  28 define <4 x half> @test_rot90_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
  29 ; CHECK-LABEL: test_rot90_16x4:
  30 ; CHECK:       // %bb.0: // %entry
  31 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #90
  32 ; CHECK-NEXT:    ret
  33 entry:
  34   %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
  35   ret <4 x half> %res
  36 }
  37
  38 define <4 x half> @test_rot90_16x4_lane_0(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
  39 ; CHECK-LABEL: test_rot90_16x4_lane_0:
  40 ; CHECK:       // %bb.0: // %entry
  41 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
  42 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.h[0], #90
  43 ; CHECK-NEXT:    ret
  44 entry:
  45   %c.cast = bitcast <4 x half> %c to <2 x i32>
  46   %c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> <i32 0, i32 0>
  47   %c.res = bitcast <2 x i32> %c.dup to <4 x half>
  48   %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
  49   ret <4 x half> %res
  50 }
  51
  52 define <4 x half> @test_rot180_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
  53 ; CHECK-LABEL: test_rot180_16x4:
  54 ; CHECK:       // %bb.0: // %entry
  55 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #180
  56 ; CHECK-NEXT:    ret
  57 entry:
  58   %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
  59   ret <4 x half> %res
  60 }
  61
  62 define <4 x half> @test_rot180_16x4_lane_0(<4 x half> %a, <4 x half> %b, <8 x half> %c) {
  63 ; CHECK-LABEL: test_rot180_16x4_lane_0:
  64 ; CHECK:       // %bb.0: // %entry
  65 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.h[0], #180
  66 ; CHECK-NEXT:    ret
  67 entry:
  68
  69   %c.cast = bitcast <8 x half> %c to <4 x i32>
  70   %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <2 x i32> <i32 0, i32 0>
  71   %c.res = bitcast <2 x i32> %c.dup to <4 x half>
  72   %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
  73   ret <4 x half> %res
  74 }
  75
  76 define <4 x half> @test_rot270_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
  77 ; CHECK-LABEL: test_rot270_16x4:
  78 ; CHECK:       // %bb.0: // %entry
  79 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #270
  80 ; CHECK-NEXT:    ret
  81 entry:
  82   %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
  83   ret <4 x half> %res
  84 }
  85
  86 define <2 x float> @test_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
  87 ; CHECK-LABEL: test_32x2:
  88 ; CHECK:       // %bb.0: // %entry
  89 ; CHECK-NEXT:    fcmla v0.2s, v1.2s, v2.2s, #0
  90 ; CHECK-NEXT:    ret
  91 entry:
  92   %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
  93   ret <2 x float> %res
  94 }
  95
  96 define <2 x float> @test_rot90_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
  97 ; CHECK-LABEL: test_rot90_32x2:
  98 ; CHECK:       // %bb.0: // %entry
  99 ; CHECK-NEXT:    fcmla v0.2s, v1.2s, v2.2s, #90
 100 ; CHECK-NEXT:    ret
 101 entry:
 102   %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
 103   ret <2 x float> %res
 104 }
 105
 106 define <2 x float> @test_rot180_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 107 ; CHECK-LABEL: test_rot180_32x2:
 108 ; CHECK:       // %bb.0: // %entry
 109 ; CHECK-NEXT:    fcmla v0.2s, v1.2s, v2.2s, #180
 110 ; CHECK-NEXT:    ret
 111 entry:
 112   %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
 113   ret <2 x float> %res
 114 }
 115
 116 define <2 x float> @test_rot270_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 117 ; CHECK-LABEL: test_rot270_32x2:
 118 ; CHECK:       // %bb.0: // %entry
 119 ; CHECK-NEXT:    fcmla v0.2s, v1.2s, v2.2s, #270
 120 ; CHECK-NEXT:    ret
 121 entry:
 122   %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
 123   ret <2 x float> %res
 124 }
 125
 126 define <8 x half> @test_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 127 ; CHECK-LABEL: test_16x8:
 128 ; CHECK:       // %bb.0: // %entry
 129 ; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.8h, #0
 130 ; CHECK-NEXT:    ret
 131 entry:
 132   %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 133   ret <8 x half> %res
 134 }
 135
 136 define <8 x half> @test_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 137 ; CHECK-LABEL: test_16x8_lane_0:
 138 ; CHECK:       // %bb.0: // %entry
 139 ; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.h[0], #0
 140 ; CHECK-NEXT:    ret
 141 entry:
 142   %c.cast = bitcast <8 x half> %c to <4 x i32>
 143   %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 144   %c.res = bitcast <4 x i32> %c.dup to <8 x half>
 145   %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
 146   ret <8 x half> %res
 147 }
 148
 149 define <8 x half> @test_rot90_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 150 ; CHECK-LABEL: test_rot90_16x8:
 151 ; CHECK:       // %bb.0: // %entry
 152 ; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.8h, #90
 153 ; CHECK-NEXT:    ret
 154 entry:
 155   %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 156   ret <8 x half> %res
 157 }
 158
 159 define <8 x half> @test_rot90_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 160 ; CHECK-LABEL: test_rot90_16x8_lane_1:
 161 ; CHECK:       // %bb.0: // %entry
 162 ; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.h[1], #90
 163 ; CHECK-NEXT:    ret
 164 entry:
 165   %c.cast = bitcast <8 x half> %c to <4 x i32>
 166   %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 167   %c.res = bitcast <4 x i32> %c.dup to <8 x half>
 168   %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
 169   ret <8 x half> %res
 170 }
 171
 172 define <8 x half> @test_rot180_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 173 ; CHECK-LABEL: test_rot180_16x8:
 174 ; CHECK:       // %bb.0: // %entry
 175 ; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.8h, #180
 176 ; CHECK-NEXT:    ret
 177 entry:
 178   %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 179   ret <8 x half> %res
 180 }
 181
 182 define <8 x half> @test_rot180_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 183 ; CHECK-LABEL: test_rot180_16x8_lane_1:
 184 ; CHECK:       // %bb.0: // %entry
 185 ; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.h[1], #180
 186 ; CHECK-NEXT:    ret
 187 entry:
 188   %c.cast = bitcast <8 x half> %c to <4 x i32>
 189   %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 190   %c.res = bitcast <4 x i32> %c.dup to <8 x half>
 191   %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
 192   ret <8 x half> %res
 193 }
 194
 195 define <8 x half> @test_rot270_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 196 ; CHECK-LABEL: test_rot270_16x8:
 197 ; CHECK:       // %bb.0: // %entry
 198 ; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.8h, #270
 199 ; CHECK-NEXT:    ret
 200 entry:
 201   %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 202   ret <8 x half> %res
 203 }
 204
 205 define <8 x half> @test_rot270_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 206 ; CHECK-LABEL: test_rot270_16x8_lane_0:
 207 ; CHECK:       // %bb.0: // %entry
 208 ; CHECK-NEXT:    fcmla v0.8h, v1.8h, v2.h[0], #270
 209 ; CHECK-NEXT:    ret
 210 entry:
 211   %c.cast = bitcast <8 x half> %c to <4 x i32>
 212   %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 213   %c.res = bitcast <4 x i32> %c.dup to <8 x half>
 214   %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
 215   ret <8 x half> %res
 216 }
 217
 218 define <4 x float> @test_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 219 ; CHECK-LABEL: test_32x4:
 220 ; CHECK:       // %bb.0: // %entry
 221 ; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #0
 222 ; CHECK-NEXT:    ret
 223 entry:
 224   %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
 225   ret <4 x float> %res
 226 }
 227
 228 define <4 x float> @test_32x4_lane_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 229 ; CHECK-LABEL: test_32x4_lane_0:
 230 ; CHECK:       // %bb.0: // %entry
 231 ; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.s[0], #0
 232 ; CHECK-NEXT:    ret
 233 entry:
 234   %c.cast = bitcast <4 x float> %c to <2 x i64>
 235   %c.dup = shufflevector <2 x i64> %c.cast , <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 236   %c.res = bitcast <2 x i64> %c.dup to <4 x float>
 237   %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c.res)
 238   ret <4 x float> %res
 239 }
 240
 241 define <4 x float> @test_rot90_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 242 ; CHECK-LABEL: test_rot90_32x4:
 243 ; CHECK:       // %bb.0: // %entry
 244 ; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #90
 245 ; CHECK-NEXT:    ret
 246 entry:
 247   %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
 248   ret <4 x float> %res
 249 }
 250
 251 define <4 x float> @test_rot180_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 252 ; CHECK-LABEL: test_rot180_32x4:
 253 ; CHECK:       // %bb.0: // %entry
 254 ; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #180
 255 ; CHECK-NEXT:    ret
 256 entry:
 257   %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
 258   ret <4 x float> %res
 259 }
 260
 261 define <4 x float> @test_rot270_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 262 ; CHECK-LABEL: test_rot270_32x4:
 263 ; CHECK:       // %bb.0: // %entry
 264 ; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #270
 265 ; CHECK-NEXT:    ret
 266 entry:
 267   %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
 268   ret <4 x float> %res
 269 }
 270
 271 define <2 x double> @test_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 272 ; CHECK-LABEL: test_64x2:
 273 ; CHECK:       // %bb.0: // %entry
 274 ; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #0
 275 ; CHECK-NEXT:    ret
 276 entry:
 277   %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
 278   ret <2 x double> %res
 279 }
 280
 281 define <2 x double> @test_rot90_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 282 ; CHECK-LABEL: test_rot90_64x2:
 283 ; CHECK:       // %bb.0: // %entry
 284 ; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #90
 285 ; CHECK-NEXT:    ret
 286 entry:
 287   %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
 288   ret <2 x double> %res
 289 }
 290
 291 define <2 x double> @test_rot180_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 292 ; CHECK-LABEL: test_rot180_64x2:
 293 ; CHECK:       // %bb.0: // %entry
 294 ; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #180
 295 ; CHECK-NEXT:    ret
 296 entry:
 297   %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
 298   ret <2 x double> %res
 299 }
 300
 301 define <2 x double> @test_rot270_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 302 ; CHECK-LABEL: test_rot270_64x2:
 303 ; CHECK:       // %bb.0: // %entry
 304 ; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #270
 305 ; CHECK-NEXT:    ret
 306 entry:
 307   %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
 308   ret <2 x double> %res
 309 }
 310
 311 define <4 x float> @reassoc_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 312 ; CHECK-LABEL: reassoc_f32x4:
 313 ; CHECK:       // %bb.0: // %entry
 314 ; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #0
 315 ; CHECK-NEXT:    ret
 316 entry:
 317   %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
 318   %res = fadd fast <4 x float> %d, %a
 319   ret <4 x float> %res
 320 }
 321
 322 define <4 x float> @reassoc_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 323 ; CHECK-LABEL: reassoc_c_f32x4:
 324 ; CHECK:       // %bb.0: // %entry
 325 ; CHECK-NEXT:    fcmla v0.4s, v1.4s, v2.4s, #90
 326 ; CHECK-NEXT:    ret
 327 entry:
 328   %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
 329   %res = fadd fast <4 x float> %a, %d
 330   ret <4 x float> %res
 331 }
 332
 333 define <4 x half> @reassoc_f16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 334 ; CHECK-LABEL: reassoc_f16x4:
 335 ; CHECK:       // %bb.0: // %entry
 336 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #180
 337 ; CHECK-NEXT:    ret
 338 entry:
 339   %d = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> zeroinitializer, <4 x half> %b, <4 x half> %c)
 340   %res = fadd fast <4 x half> %d, %a
 341   ret <4 x half> %res
 342 }
 343
 344 define <4 x half> @reassoc_c_f16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 345 ; CHECK-LABEL: reassoc_c_f16x4:
 346 ; CHECK:       // %bb.0: // %entry
 347 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.4h, #270
 348 ; CHECK-NEXT:    ret
 349 entry:
 350   %d = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> zeroinitializer, <4 x half> %b, <4 x half> %c)
 351   %res = fadd fast <4 x half> %a, %d
 352   ret <4 x half> %res
 353 }
 354
 355 define <2 x double> @reassoc_f64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %g) {
 356 ; CHECK-LABEL: reassoc_f64x2:
 357 ; CHECK:       // %bb.0: // %entry
 358 ; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #270
 359 ; CHECK-NEXT:    fcmla v0.2d, v2.2d, v3.2d, #270
 360 ; CHECK-NEXT:    ret
 361 entry:
 362   %d = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
 363   %e = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> zeroinitializer, <2 x double> %c, <2 x double> %g)
 364   %res = fadd fast <2 x double> %e, %d
 365   ret <2 x double> %res
 366 }
 367
 368 define <2 x double> @reassoc_c_f64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %g) {
 369 ; CHECK-LABEL: reassoc_c_f64x2:
 370 ; CHECK:       // %bb.0: // %entry
 371 ; CHECK-NEXT:    fadd v0.2d, v0.2d, v0.2d
 372 ; CHECK-NEXT:    fcmla v0.2d, v1.2d, v2.2d, #270
 373 ; CHECK-NEXT:    fcmla v0.2d, v2.2d, v3.2d, #270
 374 ; CHECK-NEXT:    ret
 375 entry:
 376   %d = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
 377   %e = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %c, <2 x double> %g)
 378   %res = fadd fast <2 x double> %e, %d
 379   ret <2 x double> %res
 380 }
 381
 382 define <4 x float> @reassoc_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 383 ; CHECK-LABEL: reassoc_nonfast_f32x4:
 384 ; CHECK:       // %bb.0: // %entry
 385 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
 386 ; CHECK-NEXT:    fcmla v3.4s, v1.4s, v2.4s, #0
 387 ; CHECK-NEXT:    fadd v0.4s, v3.4s, v0.4s
 388 ; CHECK-NEXT:    ret
 389 entry:
 390   %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
 391   %res = fadd <4 x float> %d, %a
 392   ret <4 x float> %res
 393 }
 394
 395 declare <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half>, <4 x half>, <4 x half>)
 396 declare <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half>, <4 x half>, <4 x half>)
 397 declare <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half>, <4 x half>, <4 x half>)
 398 declare <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half>, <4 x half>, <4 x half>)
 399 declare <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half>, <8 x half>, <8 x half>)
 400 declare <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half>, <8 x half>, <8 x half>)
 401 declare <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half>, <8 x half>, <8 x half>)
 402 declare <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half>, <8 x half>, <8 x half>)
 403 declare <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float>, <2 x float>, <2 x float>)
 404 declare <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float>, <2 x float>, <2 x float>)
 405 declare <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float>, <2 x float>, <2 x float>)
 406 declare <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float>, <2 x float>, <2 x float>)
 407 declare <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float>, <4 x float>, <4 x float>)
 408 declare <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float>, <4 x float>, <4 x float>)
 409 declare <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float>, <4 x float>, <4 x float>)
 410 declare <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float>, <4 x float>, <4 x float>)
 411 declare <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double>, <2 x double>, <2 x double>)
 412 declare <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double>, <2 x double>, <2 x double>)
 413 declare <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double>, <2 x double>, <2 x double>)
 414 declare <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double>, <2 x double>, <2 x double>)