llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
   3
   4 target triple = "aarch64"
   5
   6 ; Expected to transform
   7 define <4 x float> @mul_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
   8 ; CHECK-LABEL: mul_mul:
   9 ; CHECK:       // %bb.0: // %entry
  10 ; CHECK-NEXT:    movi v4.2d, #0000000000000000
  11 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
  12 ; CHECK-NEXT:    fcmla v4.4s, v1.4s, v0.4s, #0
  13 ; CHECK-NEXT:    fcmla v4.4s, v1.4s, v0.4s, #90
  14 ; CHECK-NEXT:    fcmla v3.4s, v2.4s, v4.4s, #0
  15 ; CHECK-NEXT:    fcmla v3.4s, v2.4s, v4.4s, #90
  16 ; CHECK-NEXT:    mov v0.16b, v3.16b
  17 ; CHECK-NEXT:    ret
  18 entry:
  19   %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
  20   %strided.vec151 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
  21   %strided.vec153 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
  22   %strided.vec154 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
  23   %0 = fmul fast <2 x float> %strided.vec154, %strided.vec151
  24   %1 = fmul fast <2 x float> %strided.vec153, %strided.vec
  25   %2 = fmul fast <2 x float> %strided.vec154, %strided.vec
  26   %3 = fmul fast <2 x float> %strided.vec153, %strided.vec151
  27   %4 = fadd fast <2 x float> %3, %2
  28   %5 = fsub fast <2 x float> %1, %0
  29   %strided.vec156 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
  30   %strided.vec157 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
  31   %6 = fmul fast <2 x float> %4, %strided.vec156
  32   %7 = fmul fast <2 x float> %5, %strided.vec157
  33   %8 = fadd fast <2 x float> %6, %7
  34   %9 = fmul fast <2 x float> %strided.vec156, %5
  35   %10 = fmul fast <2 x float> %4, %strided.vec157
  36   %11 = fsub fast <2 x float> %9, %10
  37   %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  38   ret <4 x float> %interleaved.vec
  39 }
  40
  41 ; Expected to not transform
  42 define <4 x float> @add_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
  43 ; CHECK-LABEL: add_mul:
  44 ; CHECK:       // %bb.0: // %entry
  45 ; CHECK-NEXT:    fsub v0.4s, v1.4s, v0.4s
  46 ; CHECK-NEXT:    fsub v1.4s, v1.4s, v2.4s
  47 ; CHECK-NEXT:    ext v3.16b, v2.16b, v2.16b, #8
  48 ; CHECK-NEXT:    ext v4.16b, v0.16b, v0.16b, #8
  49 ; CHECK-NEXT:    ext v5.16b, v1.16b, v1.16b, #8
  50 ; CHECK-NEXT:    zip2 v0.2s, v0.2s, v4.2s
  51 ; CHECK-NEXT:    zip2 v4.2s, v2.2s, v3.2s
  52 ; CHECK-NEXT:    zip1 v1.2s, v1.2s, v5.2s
  53 ; CHECK-NEXT:    zip1 v2.2s, v2.2s, v3.2s
  54 ; CHECK-NEXT:    fmul v5.2s, v4.2s, v0.2s
  55 ; CHECK-NEXT:    fmul v3.2s, v1.2s, v4.2s
  56 ; CHECK-NEXT:    fneg v4.2s, v5.2s
  57 ; CHECK-NEXT:    fmla v3.2s, v0.2s, v2.2s
  58 ; CHECK-NEXT:    fmla v4.2s, v1.2s, v2.2s
  59 ; CHECK-NEXT:    zip1 v0.4s, v4.4s, v3.4s
  60 ; CHECK-NEXT:    ret
  61 entry:
  62   %0 = fsub fast <4 x float> %b, %c
  63   %1 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2>
  64   %strided.vec58 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
  65   %strided.vec59 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
  66   %2 = fmul fast <2 x float> %1, %strided.vec59
  67   %3 = fsub fast <4 x float> %b, %a
  68   %4 = shufflevector <4 x float> %3, <4 x float> poison, <2 x i32> <i32 1, i32 3>
  69   %5 = fmul fast <2 x float> %strided.vec58, %4
  70   %6 = fadd fast <2 x float> %5, %2
  71   %7 = fmul fast <2 x float> %strided.vec58, %1
  72   %8 = fmul fast <2 x float> %strided.vec59, %4
  73   %9 = fsub fast <2 x float> %7, %8
  74   %interleaved.vec = shufflevector <2 x float> %9, <2 x float> %6, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  75   ret <4 x float> %interleaved.vec
  76 }
  77
  78 ; Expected to not transform
  79 define <4 x float> @mul_mul270_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
  80 ; CHECK-LABEL: mul_mul270_mul:
  81 ; CHECK:       // %bb.0: // %entry
  82 ; CHECK-NEXT:    ext v3.16b, v2.16b, v2.16b, #8
  83 ; CHECK-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
  84 ; CHECK-NEXT:    zip1 v5.2s, v2.2s, v3.2s
  85 ; CHECK-NEXT:    zip1 v6.2s, v1.2s, v4.2s
  86 ; CHECK-NEXT:    zip2 v2.2s, v2.2s, v3.2s
  87 ; CHECK-NEXT:    zip2 v1.2s, v1.2s, v4.2s
  88 ; CHECK-NEXT:    ext v3.16b, v0.16b, v0.16b, #8
  89 ; CHECK-NEXT:    fmul v7.2s, v6.2s, v5.2s
  90 ; CHECK-NEXT:    fneg v4.2s, v7.2s
  91 ; CHECK-NEXT:    zip2 v7.2s, v0.2s, v3.2s
  92 ; CHECK-NEXT:    zip1 v0.2s, v0.2s, v3.2s
  93 ; CHECK-NEXT:    fmla v4.2s, v2.2s, v1.2s
  94 ; CHECK-NEXT:    fmul v1.2s, v1.2s, v5.2s
  95 ; CHECK-NEXT:    fmul v3.2s, v4.2s, v7.2s
  96 ; CHECK-NEXT:    fmla v1.2s, v2.2s, v6.2s
  97 ; CHECK-NEXT:    fmul v2.2s, v4.2s, v0.2s
  98 ; CHECK-NEXT:    fneg v3.2s, v3.2s
  99 ; CHECK-NEXT:    fmla v2.2s, v7.2s, v1.2s
 100 ; CHECK-NEXT:    fmla v3.2s, v0.2s, v1.2s
 101 ; CHECK-NEXT:    zip1 v0.4s, v3.4s, v2.4s
 102 ; CHECK-NEXT:    ret
 103 entry:
 104   %strided.vec = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 105   %strided.vec81 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 106   %strided.vec83 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 107   %strided.vec84 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 108   %0 = fmul fast <2 x float> %strided.vec84, %strided.vec
 109   %1 = fmul fast <2 x float> %strided.vec83, %strided.vec81
 110   %2 = fadd fast <2 x float> %1, %0
 111   %strided.vec86 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 112   %strided.vec87 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 113   %3 = fmul fast <2 x float> %2, %strided.vec87
 114   %4 = fmul fast <2 x float> %strided.vec84, %strided.vec81
 115   %5 = fmul fast <2 x float> %strided.vec83, %strided.vec
 116   %6 = fsub fast <2 x float> %4, %5
 117   %7 = fmul fast <2 x float> %6, %strided.vec86
 118   %8 = fadd fast <2 x float> %3, %7
 119   %9 = fmul fast <2 x float> %2, %strided.vec86
 120   %10 = fmul fast <2 x float> %6, %strided.vec87
 121   %11 = fsub fast <2 x float> %9, %10
 122   %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 123   ret <4 x float> %interleaved.vec
 124 }
 125
 126 ; (a * b) * a
 127 ; Expected to transform
 128 define <4 x float> @mul_triangle(<4 x float> %a, <4 x float> %b) {
 129 ; CHECK-LABEL: mul_triangle:
 130 ; CHECK:       // %bb.0: // %entry
 131 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
 132 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
 133 ; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #0
 134 ; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #90
 135 ; CHECK-NEXT:    fcmla v2.4s, v3.4s, v0.4s, #0
 136 ; CHECK-NEXT:    fcmla v2.4s, v3.4s, v0.4s, #90
 137 ; CHECK-NEXT:    mov v0.16b, v2.16b
 138 ; CHECK-NEXT:    ret
 139 entry:
 140   %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 141   %strided.vec35 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 142   %strided.vec37 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 143   %strided.vec38 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 144   %0 = fmul fast <2 x float> %strided.vec37, %strided.vec
 145   %1 = fmul fast <2 x float> %strided.vec38, %strided.vec35
 146   %2 = fsub fast <2 x float> %0, %1
 147   %3 = fmul fast <2 x float> %2, %strided.vec35
 148   %4 = fmul fast <2 x float> %strided.vec38, %strided.vec
 149   %5 = fmul fast <2 x float> %strided.vec35, %strided.vec37
 150   %6 = fadd fast <2 x float> %4, %5
 151   %7 = fmul fast <2 x float> %6, %strided.vec
 152   %8 = fadd fast <2 x float> %3, %7
 153   %9 = fmul fast <2 x float> %2, %strided.vec
 154   %10 = fmul fast <2 x float> %6, %strided.vec35
 155   %11 = fsub fast <2 x float> %9, %10
 156   %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 157   ret <4 x float> %interleaved.vec
 158 }
 159
 160
 161 ; d * (b * a) * (c * a)
 162 ; Expected to transform
 163 define <4 x float> @mul_diamond(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
 164 ; CHECK-LABEL: mul_diamond:
 165 ; CHECK:       // %bb.0: // %entry
 166 ; CHECK-NEXT:    movi v4.2d, #0000000000000000
 167 ; CHECK-NEXT:    movi v5.2d, #0000000000000000
 168 ; CHECK-NEXT:    movi v6.2d, #0000000000000000
 169 ; CHECK-NEXT:    fcmla v4.4s, v0.4s, v1.4s, #0
 170 ; CHECK-NEXT:    fcmla v6.4s, v0.4s, v2.4s, #0
 171 ; CHECK-NEXT:    fcmla v4.4s, v0.4s, v1.4s, #90
 172 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
 173 ; CHECK-NEXT:    fcmla v6.4s, v0.4s, v2.4s, #90
 174 ; CHECK-NEXT:    fcmla v5.4s, v3.4s, v4.4s, #0
 175 ; CHECK-NEXT:    fcmla v5.4s, v3.4s, v4.4s, #90
 176 ; CHECK-NEXT:    fcmla v1.4s, v5.4s, v6.4s, #0
 177 ; CHECK-NEXT:    fcmla v1.4s, v5.4s, v6.4s, #90
 178 ; CHECK-NEXT:    mov v0.16b, v1.16b
 179 ; CHECK-NEXT:    ret
 180 entry:
 181   %a.real = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 182   %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 183   %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 184   %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 185   %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 186   %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 187   %d.real = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 188   %d.imag = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 189   %0 = fmul fast <2 x float> %a.imag, %b.real
 190   %1 = fmul fast <2 x float> %a.real, %b.imag
 191   %2 = fadd fast <2 x float> %1, %0
 192   %3 = fmul fast <2 x float> %a.real, %b.real
 193   %4 = fmul fast <2 x float> %b.imag, %a.imag
 194   %5 = fsub fast <2 x float> %3, %4
 195   %6 = fmul fast <2 x float> %d.real, %5
 196   %7 = fmul fast <2 x float> %2, %d.imag
 197   %8 = fmul fast <2 x float> %d.real, %2
 198   %9 = fmul fast <2 x float> %5, %d.imag
 199   %10 = fsub fast <2 x float> %6, %7
 200   %11 = fadd fast <2 x float> %8, %9
 201   %12 = fmul fast <2 x float> %c.real, %a.imag
 202   %13 = fmul fast <2 x float> %c.imag, %a.real
 203   %14 = fadd fast <2 x float> %13, %12
 204   %15 = fmul fast <2 x float> %14, %10
 205   %16 = fmul fast <2 x float> %c.real, %a.real
 206   %17 = fmul fast <2 x float> %c.imag, %a.imag
 207   %18 = fsub fast <2 x float> %16, %17
 208   %19 = fmul fast <2 x float> %18, %11
 209   %20 = fadd fast <2 x float> %15, %19
 210   %21 = fmul fast <2 x float> %18, %10
 211   %22 = fmul fast <2 x float> %14, %11
 212   %23 = fsub fast <2 x float> %21, %22
 213   %interleaved.vec = shufflevector <2 x float> %23, <2 x float> %20, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 214   ret <4 x float> %interleaved.vec
 215 }
 216
 217 ; Expected to transform
 218 define <4 x float> @mul_add90_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 219 ; CHECK-LABEL: mul_add90_mul:
 220 ; CHECK:       // %bb.0: // %entry
 221 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
 222 ; CHECK-NEXT:    movi v4.2d, #0000000000000000
 223 ; CHECK-NEXT:    fcmla v4.4s, v0.4s, v2.4s, #0
 224 ; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #0
 225 ; CHECK-NEXT:    fcmla v4.4s, v0.4s, v2.4s, #90
 226 ; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #90
 227 ; CHECK-NEXT:    fcadd v0.4s, v4.4s, v3.4s, #90
 228 ; CHECK-NEXT:    ret
 229 entry:
 230   %ar = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 231   %ai = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 232   %br = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 233   %bi = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 234   %cr = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 235   %ci = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 236
 237   %i6 = fmul fast <2 x float> %br, %ar
 238   %i7 = fmul fast <2 x float> %bi, %ai
 239   %xr = fsub fast <2 x float> %i6, %i7
 240   %i9 = fmul fast <2 x float> %bi, %ar
 241   %i10 = fmul fast <2 x float> %br, %ai
 242   %xi = fadd fast <2 x float> %i9, %i10
 243
 244   %j6 = fmul fast <2 x float> %cr, %ar
 245   %j7 = fmul fast <2 x float> %ci, %ai
 246   %yr = fsub fast <2 x float> %j6, %j7
 247   %j9 = fmul fast <2 x float> %ci, %ar
 248   %j10 = fmul fast <2 x float> %cr, %ai
 249   %yi = fadd fast <2 x float> %j9, %j10
 250
 251   %zr = fsub fast <2 x float> %yr, %xi
 252   %zi = fadd fast <2 x float> %yi, %xr
 253   %interleaved.vec = shufflevector <2 x float> %zr, <2 x float> %zi, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 254   ret <4 x float> %interleaved.vec
 255 }
 256
 257 ; Expected to not transform
 258 define <4 x float> @mul_triangle_addmul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 259 ; CHECK-LABEL: mul_triangle_addmul:
 260 ; CHECK:       // %bb.0: // %entry
 261 ; CHECK-NEXT:    ext v3.16b, v0.16b, v0.16b, #8
 262 ; CHECK-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
 263 ; CHECK-NEXT:    zip1 v5.2s, v0.2s, v3.2s
 264 ; CHECK-NEXT:    zip1 v6.2s, v1.2s, v4.2s
 265 ; CHECK-NEXT:    zip2 v1.2s, v1.2s, v4.2s
 266 ; CHECK-NEXT:    ext v4.16b, v2.16b, v2.16b, #8
 267 ; CHECK-NEXT:    zip2 v0.2s, v0.2s, v3.2s
 268 ; CHECK-NEXT:    fmul v7.2s, v6.2s, v5.2s
 269 ; CHECK-NEXT:    fmul v5.2s, v1.2s, v5.2s
 270 ; CHECK-NEXT:    zip1 v3.2s, v2.2s, v4.2s
 271 ; CHECK-NEXT:    zip2 v2.2s, v2.2s, v4.2s
 272 ; CHECK-NEXT:    fmov d4, d7
 273 ; CHECK-NEXT:    fmov d16, d5
 274 ; CHECK-NEXT:    fmls v7.2s, v0.2s, v2.2s
 275 ; CHECK-NEXT:    fmla v5.2s, v0.2s, v3.2s
 276 ; CHECK-NEXT:    fmls v4.2s, v0.2s, v1.2s
 277 ; CHECK-NEXT:    fmla v16.2s, v0.2s, v6.2s
 278 ; CHECK-NEXT:    fsub v0.2s, v7.2s, v16.2s
 279 ; CHECK-NEXT:    fadd v1.2s, v5.2s, v4.2s
 280 ; CHECK-NEXT:    zip1 v0.4s, v0.4s, v1.4s
 281 ; CHECK-NEXT:    ret
 282 entry:
 283   %ar = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 284   %ai = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 285   %br = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 286   %bi = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 287   %cr = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 288   %ci = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 289
 290   %i6 = fmul fast <2 x float> %br, %ar
 291   %i7 = fmul fast <2 x float> %bi, %ai
 292   %xr = fsub fast <2 x float> %i6, %i7
 293   %i9 = fmul fast <2 x float> %bi, %ar
 294   %i10 = fmul fast <2 x float> %br, %ai
 295   %xi = fadd fast <2 x float> %i9, %i10
 296
 297   ;%j6 = fmul fast <2 x float> %cr, %ar
 298   %j7 = fmul fast <2 x float> %ci, %ai
 299   %yr = fsub fast <2 x float> %i6, %j7
 300   ;%j9 = fmul fast <2 x float> %ci, %ar
 301   %j10 = fmul fast <2 x float> %cr, %ai
 302   %yi = fadd fast <2 x float> %i9, %j10
 303
 304   %zr = fsub fast <2 x float> %yr, %xi
 305   %zi = fadd fast <2 x float> %yi, %xr
 306   %interleaved.vec = shufflevector <2 x float> %zr, <2 x float> %zi, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 307   ret <4 x float> %interleaved.vec
 308 }
 309
 310 ; Expected to not transform
 311 define <4 x float> @mul_triangle_multiuses(<4 x float> %a, <4 x float> %b, ptr %p) {
 312 ; CHECK-LABEL: mul_triangle_multiuses:
 313 ; CHECK:       // %bb.0: // %entry
 314 ; CHECK-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
 315 ; CHECK-NEXT:    ext v3.16b, v1.16b, v1.16b, #8
 316 ; CHECK-NEXT:    zip2 v4.2s, v0.2s, v2.2s
 317 ; CHECK-NEXT:    zip1 v5.2s, v1.2s, v3.2s
 318 ; CHECK-NEXT:    zip1 v0.2s, v0.2s, v2.2s
 319 ; CHECK-NEXT:    zip2 v1.2s, v1.2s, v3.2s
 320 ; CHECK-NEXT:    fmul v2.2s, v4.2s, v5.2s
 321 ; CHECK-NEXT:    fmul v3.2s, v1.2s, v4.2s
 322 ; CHECK-NEXT:    fmla v2.2s, v0.2s, v1.2s
 323 ; CHECK-NEXT:    fneg v1.2s, v3.2s
 324 ; CHECK-NEXT:    fmul v3.2s, v2.2s, v4.2s
 325 ; CHECK-NEXT:    fmla v1.2s, v0.2s, v5.2s
 326 ; CHECK-NEXT:    fmul v5.2s, v2.2s, v0.2s
 327 ; CHECK-NEXT:    fneg v3.2s, v3.2s
 328 ; CHECK-NEXT:    fmla v5.2s, v4.2s, v1.2s
 329 ; CHECK-NEXT:    fmla v3.2s, v0.2s, v1.2s
 330 ; CHECK-NEXT:    mov v1.d[1], v2.d[0]
 331 ; CHECK-NEXT:    zip1 v0.4s, v3.4s, v5.4s
 332 ; CHECK-NEXT:    str q1, [x0]
 333 ; CHECK-NEXT:    ret
 334 entry:
 335   %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 336   %strided.vec35 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 337   %strided.vec37 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 338   %strided.vec38 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 339   %0 = fmul fast <2 x float> %strided.vec37, %strided.vec
 340   %1 = fmul fast <2 x float> %strided.vec38, %strided.vec35
 341   %2 = fsub fast <2 x float> %0, %1
 342   %3 = fmul fast <2 x float> %2, %strided.vec35
 343   %4 = fmul fast <2 x float> %strided.vec38, %strided.vec
 344   %5 = fmul fast <2 x float> %strided.vec35, %strided.vec37
 345   %6 = fadd fast <2 x float> %4, %5
 346   %otheruse = shufflevector <2 x float> %2, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 347   store <4 x float> %otheruse, ptr %p
 348   %7 = fmul fast <2 x float> %6, %strided.vec
 349   %8 = fadd fast <2 x float> %3, %7
 350   %9 = fmul fast <2 x float> %2, %strided.vec
 351   %10 = fmul fast <2 x float> %6, %strided.vec35
 352   %11 = fsub fast <2 x float> %9, %10
 353   %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 354   ret <4 x float> %interleaved.vec
 355 }
 356
 357 ; Expected to transform
 358 define <4 x float> @mul_addequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 359 ; CHECK-LABEL: mul_addequal:
 360 ; CHECK:       // %bb.0: // %entry
 361 ; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #0
 362 ; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #90
 363 ; CHECK-NEXT:    mov v0.16b, v2.16b
 364 ; CHECK-NEXT:    ret
 365 entry:
 366   %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 367   %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 368   %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 369   %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 370   %0 = fmul fast <2 x float> %b.imag, %strided.vec
 371   %1 = fmul fast <2 x float> %b.real, %a.imag
 372   %2 = fadd fast <2 x float> %1, %0
 373   %3 = fmul fast <2 x float> %b.real, %strided.vec
 374   %4 = fmul fast <2 x float> %a.imag, %b.imag
 375   %5 = fsub fast <2 x float> %3, %4
 376   %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 377   %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 378   %6 = fadd fast <2 x float> %5, %c.real
 379   %7 = fadd fast <2 x float> %2, %c.imag
 380   %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 381   ret <4 x float> %interleaved.vec
 382 }
 383
 384 ; Expected to transform
 385 define <4 x float> @mul_subequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 386 ; CHECK-LABEL: mul_subequal:
 387 ; CHECK:       // %bb.0: // %entry
 388 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
 389 ; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #0
 390 ; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #90
 391 ; CHECK-NEXT:    fsub v0.4s, v3.4s, v2.4s
 392 ; CHECK-NEXT:    ret
 393 entry:
 394   %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 395   %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 396   %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 397   %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 398   %0 = fmul fast <2 x float> %b.imag, %strided.vec
 399   %1 = fmul fast <2 x float> %b.real, %a.imag
 400   %2 = fadd fast <2 x float> %1, %0
 401   %3 = fmul fast <2 x float> %b.real, %strided.vec
 402   %4 = fmul fast <2 x float> %a.imag, %b.imag
 403   %5 = fsub fast <2 x float> %3, %4
 404   %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 405   %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 406   %6 = fsub fast <2 x float> %5, %c.real
 407   %7 = fsub fast <2 x float> %2, %c.imag
 408   %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 409   ret <4 x float> %interleaved.vec
 410 }
 411
 412
 413 ; Expected to transform
 414 define <4 x float> @mul_mulequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 415 ; CHECK-LABEL: mul_mulequal:
 416 ; CHECK:       // %bb.0: // %entry
 417 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
 418 ; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #0
 419 ; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #90
 420 ; CHECK-NEXT:    fmul v0.4s, v3.4s, v2.4s
 421 ; CHECK-NEXT:    ret
 422 entry:
 423   %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 424   %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 425   %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 426   %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 427   %0 = fmul fast <2 x float> %b.imag, %strided.vec
 428   %1 = fmul fast <2 x float> %b.real, %a.imag
 429   %2 = fadd fast <2 x float> %1, %0
 430   %3 = fmul fast <2 x float> %b.real, %strided.vec
 431   %4 = fmul fast <2 x float> %a.imag, %b.imag
 432   %5 = fsub fast <2 x float> %3, %4
 433   %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 434   %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 435   %6 = fmul fast <2 x float> %5, %c.real
 436   %7 = fmul fast <2 x float> %2, %c.imag
 437   %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 438   ret <4 x float> %interleaved.vec
 439 }
 440
 441 ; Expected to not transform
 442 define <4 x float> @mul_divequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 443 ; CHECK-LABEL: mul_divequal:
 444 ; CHECK:       // %bb.0: // %entry
 445 ; CHECK-NEXT:    ext v3.16b, v0.16b, v0.16b, #8
 446 ; CHECK-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
 447 ; CHECK-NEXT:    zip2 v5.2s, v0.2s, v3.2s
 448 ; CHECK-NEXT:    zip2 v6.2s, v1.2s, v4.2s
 449 ; CHECK-NEXT:    zip1 v0.2s, v0.2s, v3.2s
 450 ; CHECK-NEXT:    zip1 v1.2s, v1.2s, v4.2s
 451 ; CHECK-NEXT:    ext v3.16b, v2.16b, v2.16b, #8
 452 ; CHECK-NEXT:    fmul v7.2s, v5.2s, v6.2s
 453 ; CHECK-NEXT:    fneg v4.2s, v7.2s
 454 ; CHECK-NEXT:    zip1 v7.2s, v2.2s, v3.2s
 455 ; CHECK-NEXT:    zip2 v2.2s, v2.2s, v3.2s
 456 ; CHECK-NEXT:    fmla v4.2s, v0.2s, v1.2s
 457 ; CHECK-NEXT:    fmul v0.2s, v6.2s, v0.2s
 458 ; CHECK-NEXT:    fmla v0.2s, v5.2s, v1.2s
 459 ; CHECK-NEXT:    fdiv v4.2s, v4.2s, v7.2s
 460 ; CHECK-NEXT:    fdiv v0.2s, v0.2s, v2.2s
 461 ; CHECK-NEXT:    zip1 v0.4s, v4.4s, v0.4s
 462 ; CHECK-NEXT:    ret
 463 entry:
 464   %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 465   %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 466   %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 467   %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 468   %0 = fmul fast <2 x float> %b.imag, %strided.vec
 469   %1 = fmul fast <2 x float> %b.real, %a.imag
 470   %2 = fadd fast <2 x float> %1, %0
 471   %3 = fmul fast <2 x float> %b.real, %strided.vec
 472   %4 = fmul fast <2 x float> %a.imag, %b.imag
 473   %5 = fsub fast <2 x float> %3, %4
 474   %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 475   %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 476   %6 = fdiv fast <2 x float> %5, %c.real
 477   %7 = fdiv fast <2 x float> %2, %c.imag
 478   %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 479   ret <4 x float> %interleaved.vec
 480 }
 481
 482 ; Expected to transform
 483 define <4 x float> @mul_negequal(<4 x float> %a, <4 x float> %b) {
 484 ; CHECK-LABEL: mul_negequal:
 485 ; CHECK:       // %bb.0: // %entry
 486 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
 487 ; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #180
 488 ; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #270
 489 ; CHECK-NEXT:    mov v0.16b, v2.16b
 490 ; CHECK-NEXT:    ret
 491 entry:
 492   %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 493   %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 494   %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
 495   %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
 496   %0 = fmul fast <2 x float> %b.imag, %strided.vec
 497   %1 = fmul fast <2 x float> %b.real, %a.imag
 498   %2 = fadd fast <2 x float> %1, %0
 499   %3 = fmul fast <2 x float> %b.real, %strided.vec
 500   %4 = fmul fast <2 x float> %a.imag, %b.imag
 501   %5 = fsub fast <2 x float> %3, %4
 502   %6 = fneg fast <2 x float> %5
 503   %7 = fneg fast <2 x float> %2
 504   %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 505   ret <4 x float> %interleaved.vec
 506 }