llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-f32-mul.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s --mattr=+mve.fp -o - | FileCheck %s
   3
   4 target triple = "thumbv8.1m.main-none-none-eabi"
   5
   6 ; Expected to not transform
   7 define arm_aapcs_vfpcc <2 x float> @complex_mul_v2f32(<2 x float> %a, <2 x float> %b) {
   8 ; CHECK-LABEL: complex_mul_v2f32:
   9 ; CHECK:       @ %bb.0: @ %entry
  10 ; CHECK-NEXT:    vmul.f32 s9, s5, s0
  11 ; CHECK-NEXT:    vmul.f32 s8, s1, s5
  12 ; CHECK-NEXT:    vfma.f32 s9, s4, s1
  13 ; CHECK-NEXT:    vfnms.f32 s8, s4, s0
  14 ; CHECK-NEXT:    vmov q0, q2
  15 ; CHECK-NEXT:    bx lr
  16 entry:
  17   %a.real   = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 0>
  18   %a.imag = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 1>
  19   %b.real = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 0>
  20   %b.imag = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 1>
  21   %0 = fmul fast <1 x float> %b.imag, %a.real
  22   %1 = fmul fast <1 x float> %b.real, %a.imag
  23   %2 = fadd fast <1 x float> %1, %0
  24   %3 = fmul fast <1 x float> %b.real, %a.real
  25   %4 = fmul fast <1 x float> %a.imag, %b.imag
  26   %5 = fsub fast <1 x float> %3, %4
  27   %interleaved.vec = shufflevector <1 x float> %5, <1 x float> %2, <2 x i32> <i32 0, i32 1>
  28   ret <2 x float> %interleaved.vec
  29 }
  30
  31 ; Expected to transform
  32 define arm_aapcs_vfpcc <4 x float> @complex_mul_v4f32(<4 x float> %a, <4 x float> %b) {
  33 ; CHECK-LABEL: complex_mul_v4f32:
  34 ; CHECK:       @ %bb.0: @ %entry
  35 ; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #0
  36 ; CHECK-NEXT:    vcmla.f32 q2, q0, q1, #90
  37 ; CHECK-NEXT:    vmov q0, q2
  38 ; CHECK-NEXT:    bx lr
  39 entry:
  40   %a.real   = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
  41   %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
  42   %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
  43   %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
  44   %0 = fmul fast <2 x float> %b.imag, %a.real
  45   %1 = fmul fast <2 x float> %b.real, %a.imag
  46   %2 = fadd fast <2 x float> %1, %0
  47   %3 = fmul fast <2 x float> %b.real, %a.real
  48   %4 = fmul fast <2 x float> %a.imag, %b.imag
  49   %5 = fsub fast <2 x float> %3, %4
  50   %interleaved.vec = shufflevector <2 x float> %5, <2 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  51   ret <4 x float> %interleaved.vec
  52 }
  53
  54 ; Expected to transform
  55 define arm_aapcs_vfpcc <8 x float> @complex_mul_v8f32(<8 x float> %a, <8 x float> %b) {
  56 ; CHECK-LABEL: complex_mul_v8f32:
  57 ; CHECK:       @ %bb.0: @ %entry
  58 ; CHECK-NEXT:    .vsave {d8, d9}
  59 ; CHECK-NEXT:    vpush {d8, d9}
  60 ; CHECK-NEXT:    vcmul.f32 q4, q0, q2, #0
  61 ; CHECK-NEXT:    vcmla.f32 q4, q0, q2, #90
  62 ; CHECK-NEXT:    vcmul.f32 q2, q1, q3, #0
  63 ; CHECK-NEXT:    vcmla.f32 q2, q1, q3, #90
  64 ; CHECK-NEXT:    vmov q0, q4
  65 ; CHECK-NEXT:    vmov q1, q2
  66 ; CHECK-NEXT:    vpop {d8, d9}
  67 ; CHECK-NEXT:    bx lr
  68 entry:
  69   %a.real   = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  70   %a.imag = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  71   %b.real = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  72   %b.imag = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  73   %0 = fmul fast <4 x float> %b.imag, %a.real
  74   %1 = fmul fast <4 x float> %b.real, %a.imag
  75   %2 = fadd fast <4 x float> %1, %0
  76   %3 = fmul fast <4 x float> %b.real, %a.real
  77   %4 = fmul fast <4 x float> %a.imag, %b.imag
  78   %5 = fsub fast <4 x float> %3, %4
  79   %interleaved.vec = shufflevector <4 x float> %5, <4 x float> %2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
  80   ret <8 x float> %interleaved.vec
  81 }
  82
  83 ; Expected to transform
  84 define arm_aapcs_vfpcc <16 x float> @complex_mul_v16f32(<16 x float> %a, <16 x float> %b) {
  85 ; CHECK-LABEL: complex_mul_v16f32:
  86 ; CHECK:       @ %bb.0: @ %entry
  87 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
  88 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
  89 ; CHECK-NEXT:    add r3, sp, #64
  90 ; CHECK-NEXT:    add r2, sp, #80
  91 ; CHECK-NEXT:    vldrw.u32 q5, [r3]
  92 ; CHECK-NEXT:    add r1, sp, #96
  93 ; CHECK-NEXT:    add r0, sp, #112
  94 ; CHECK-NEXT:    vcmul.f32 q4, q0, q5, #0
  95 ; CHECK-NEXT:    vcmla.f32 q4, q0, q5, #90
  96 ; CHECK-NEXT:    vldrw.u32 q0, [r2]
  97 ; CHECK-NEXT:    vcmul.f32 q5, q1, q0, #0
  98 ; CHECK-NEXT:    vcmla.f32 q5, q1, q0, #90
  99 ; CHECK-NEXT:    vldrw.u32 q0, [r1]
 100 ; CHECK-NEXT:    vmov q1, q5
 101 ; CHECK-NEXT:    vcmul.f32 q6, q2, q0, #0
 102 ; CHECK-NEXT:    vcmla.f32 q6, q2, q0, #90
 103 ; CHECK-NEXT:    vldrw.u32 q0, [r0]
 104 ; CHECK-NEXT:    vmov q2, q6
 105 ; CHECK-NEXT:    vcmul.f32 q7, q3, q0, #0
 106 ; CHECK-NEXT:    vcmla.f32 q7, q3, q0, #90
 107 ; CHECK-NEXT:    vmov q0, q4
 108 ; CHECK-NEXT:    vmov q3, q7
 109 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 110 ; CHECK-NEXT:    bx lr
 111 entry:
 112   %a.real   = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 113   %a.imag = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 114   %b.real = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 115   %b.imag = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 116   %0 = fmul fast <8 x float> %b.imag, %a.real
 117   %1 = fmul fast <8 x float> %b.real, %a.imag
 118   %2 = fadd fast <8 x float> %1, %0
 119   %3 = fmul fast <8 x float> %b.real, %a.real
 120   %4 = fmul fast <8 x float> %a.imag, %b.imag
 121   %5 = fsub fast <8 x float> %3, %4
 122   %interleaved.vec = shufflevector <8 x float> %5, <8 x float> %2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 123   ret <16 x float> %interleaved.vec
 124 }