llvm/test/CodeGen/Thumb2/mve-vcmla.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
   3
   4 declare <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32, <8 x half>, <8 x half>, <8 x half>)
   5 declare <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32, <4 x float>, <4 x float>, <4 x float>)
   6 declare <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32, <8 x half>, <8 x half>)
   7 declare <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32, <4 x float>, <4 x float>)
   8
   9
  10 define arm_aapcs_vfpcc <4 x float> @reassoc_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
  11 ; CHECK-LABEL: reassoc_f32x4:
  12 ; CHECK:       @ %bb.0: @ %entry
  13 ; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #0
  14 ; CHECK-NEXT:    bx lr
  15 entry:
  16   %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
  17   %res = fadd fast <4 x float> %d, %a
  18   ret <4 x float> %res
  19 }
  20
  21 define arm_aapcs_vfpcc <4 x float> @reassoc_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
  22 ; CHECK-LABEL: reassoc_c_f32x4:
  23 ; CHECK:       @ %bb.0: @ %entry
  24 ; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #90
  25 ; CHECK-NEXT:    bx lr
  26 entry:
  27   %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
  28   %res = fadd fast <4 x float> %a, %d
  29   ret <4 x float> %res
  30 }
  31
  32 define arm_aapcs_vfpcc <8 x half> @reassoc_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
  33 ; CHECK-LABEL: reassoc_f16x4:
  34 ; CHECK:       @ %bb.0: @ %entry
  35 ; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #180
  36 ; CHECK-NEXT:    bx lr
  37 entry:
  38   %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 2, <8 x half> zeroinitializer, <8 x half> %b, <8 x half> %c)
  39   %res = fadd fast <8 x half> %d, %a
  40   ret <8 x half> %res
  41 }
  42
  43 define arm_aapcs_vfpcc <8 x half> @reassoc_c_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
  44 ; CHECK-LABEL: reassoc_c_f16x4:
  45 ; CHECK:       @ %bb.0: @ %entry
  46 ; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #270
  47 ; CHECK-NEXT:    bx lr
  48 entry:
  49   %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 3, <8 x half> zeroinitializer, <8 x half> %b, <8 x half> %c)
  50   %res = fadd fast <8 x half> %a, %d
  51   ret <8 x half> %res
  52 }
  53
  54 define arm_aapcs_vfpcc <4 x float> @reassoc_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
  55 ; CHECK-LABEL: reassoc_nonfast_f32x4:
  56 ; CHECK:       @ %bb.0: @ %entry
  57 ; CHECK-NEXT:    vmov.i32 q3, #0x0
  58 ; CHECK-NEXT:    vcmla.f32 q3, q1, q2, #0
  59 ; CHECK-NEXT:    vadd.f32 q0, q3, q0
  60 ; CHECK-NEXT:    bx lr
  61 entry:
  62   %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
  63   %res = fadd <4 x float> %d, %a
  64   ret <4 x float> %res
  65 }
  66
  67
  68
  69 define arm_aapcs_vfpcc <4 x float> @muladd_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
  70 ; CHECK-LABEL: muladd_f32x4:
  71 ; CHECK:       @ %bb.0: @ %entry
  72 ; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #0
  73 ; CHECK-NEXT:    bx lr
  74 entry:
  75   %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %b, <4 x float> %c)
  76   %res = fadd fast <4 x float> %d, %a
  77   ret <4 x float> %res
  78 }
  79
  80 define arm_aapcs_vfpcc <4 x float> @muladd_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
  81 ; CHECK-LABEL: muladd_c_f32x4:
  82 ; CHECK:       @ %bb.0: @ %entry
  83 ; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #90
  84 ; CHECK-NEXT:    bx lr
  85 entry:
  86   %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 1, <4 x float> %b, <4 x float> %c)
  87   %res = fadd fast <4 x float> %a, %d
  88   ret <4 x float> %res
  89 }
  90
  91 define arm_aapcs_vfpcc <8 x half> @muladd_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
  92 ; CHECK-LABEL: muladd_f16x4:
  93 ; CHECK:       @ %bb.0: @ %entry
  94 ; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #180
  95 ; CHECK-NEXT:    bx lr
  96 entry:
  97   %d = tail call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 2, <8 x half> %b, <8 x half> %c)
  98   %res = fadd fast <8 x half> %d, %a
  99   ret <8 x half> %res
 100 }
 101
 102 define arm_aapcs_vfpcc <8 x half> @muladd_c_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 103 ; CHECK-LABEL: muladd_c_f16x4:
 104 ; CHECK:       @ %bb.0: @ %entry
 105 ; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #270
 106 ; CHECK-NEXT:    bx lr
 107 entry:
 108   %d = tail call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 3, <8 x half> %b, <8 x half> %c)
 109   %res = fadd fast <8 x half> %a, %d
 110   ret <8 x half> %res
 111 }
 112
 113 define arm_aapcs_vfpcc <4 x float> @muladd_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 114 ; CHECK-LABEL: muladd_nonfast_f32x4:
 115 ; CHECK:       @ %bb.0: @ %entry
 116 ; CHECK-NEXT:    vcmul.f32 q3, q1, q2, #0
 117 ; CHECK-NEXT:    vadd.f32 q0, q3, q0
 118 ; CHECK-NEXT:    bx lr
 119 entry:
 120   %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %b, <4 x float> %c)
 121   %res = fadd <4 x float> %d, %a
 122   ret <4 x float> %res
 123 }
 124
 125 define arm_aapcs_vfpcc <8 x half> @same_register_f16(<8 x half> %a) {
 126 ; CHECK-LABEL: same_register_f16:
 127 ; CHECK:       @ %bb.0: @ %entry
 128 ; CHECK-NEXT:    vcmla.f16 q0, q0, q0, #0
 129 ; CHECK-NEXT:    bx lr
 130 entry:
 131   %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 0, <8 x half> zeroinitializer, <8 x half> %a, <8 x half> %a)
 132   %res = fadd fast <8 x half> %d, %a
 133   ret <8 x half> %res
 134 }
 135
 136 define arm_aapcs_vfpcc <4 x float> @same_register_f32(<4 x float> %a) {
 137 ; CHECK-LABEL: same_register_f32:
 138 ; CHECK:       @ %bb.0: @ %entry
 139 ; CHECK-NEXT:    vmov q1, q0
 140 ; CHECK-NEXT:    vcmla.f32 q1, q0, q0, #0
 141 ; CHECK-NEXT:    vmov q0, q1
 142 ; CHECK-NEXT:    bx lr
 143 entry:
 144   %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %a, <4 x float> %a)
 145   %res = fadd fast <4 x float> %d, %a
 146   ret <4 x float> %res
 147 }