llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16-sdnode.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
   2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
   3 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
   4 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
   5 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
   6
   7 define <vscale x 1 x float> @vfwmaccbf16_vv_nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c) {
   8 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv1f32:
   9 ; ZVFBFWMA:       # %bb.0:
  10 ; ZVFBFWMA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
  11 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v9, v10
  12 ; ZVFBFWMA-NEXT:    ret
  13 ;
  14 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv1f32:
  15 ; ZVFBFMIN:       # %bb.0:
  16 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
  17 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v9
  18 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v9, v10
  19 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
  20 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v9
  21 ; ZVFBFMIN-NEXT:    ret
  22   %b.ext = fpext <vscale x 1 x bfloat> %b to <vscale x 1 x float>
  23   %c.ext = fpext <vscale x 1 x bfloat> %c to <vscale x 1 x float>
  24   %res = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> %b.ext, <vscale x 1 x float> %c.ext, <vscale x 1 x float> %a)
  25   ret <vscale x 1 x float> %res
  26 }
  27
  28 define <vscale x 1 x float> @vfwmaccbf16_vf_nxv1f32(<vscale x 1 x float> %a, bfloat %b, <vscale x 1 x bfloat> %c) {
  29 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv1f32:
  30 ; ZVFBFWMA:       # %bb.0:
  31 ; ZVFBFWMA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
  32 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vf v8, fa0, v9
  33 ; ZVFBFWMA-NEXT:    ret
  34 ;
  35 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv1f32:
  36 ; ZVFBFMIN:       # %bb.0:
  37 ; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
  38 ; ZVFBFMIN-NEXT:    slli a0, a0, 16
  39 ; ZVFBFMIN-NEXT:    fmv.w.x fa5, a0
  40 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
  41 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
  42 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
  43 ; ZVFBFMIN-NEXT:    vfmacc.vf v8, fa5, v10
  44 ; ZVFBFMIN-NEXT:    ret
  45   %b.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
  46   %b.splat = shufflevector <vscale x 1 x bfloat> %b.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
  47   %b.ext = fpext <vscale x 1 x bfloat> %b.splat to <vscale x 1 x float>
  48   %c.ext = fpext <vscale x 1 x bfloat> %c to <vscale x 1 x float>
  49   %res = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> %b.ext, <vscale x 1 x float> %c.ext, <vscale x 1 x float> %a)
  50   ret <vscale x 1 x float> %res
  51 }
  52
  53 define <vscale x 2 x float> @vfwmaccbf16_vv_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c) {
  54 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv2f32:
  55 ; ZVFBFWMA:       # %bb.0:
  56 ; ZVFBFWMA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
  57 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v9, v10
  58 ; ZVFBFWMA-NEXT:    ret
  59 ;
  60 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv2f32:
  61 ; ZVFBFMIN:       # %bb.0:
  62 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
  63 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v9
  64 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v9, v10
  65 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
  66 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v9
  67 ; ZVFBFMIN-NEXT:    ret
  68   %b.ext = fpext <vscale x 2 x bfloat> %b to <vscale x 2 x float>
  69   %c.ext = fpext <vscale x 2 x bfloat> %c to <vscale x 2 x float>
  70   %res = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %b.ext, <vscale x 2 x float> %c.ext, <vscale x 2 x float> %a)
  71   ret <vscale x 2 x float> %res
  72 }
  73
  74 define <vscale x 2 x float> @vfwmaccbf16_vf_nxv2f32(<vscale x 2 x float> %a, bfloat %b, <vscale x 2 x bfloat> %c) {
  75 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv2f32:
  76 ; ZVFBFWMA:       # %bb.0:
  77 ; ZVFBFWMA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
  78 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vf v8, fa0, v9
  79 ; ZVFBFWMA-NEXT:    ret
  80 ;
  81 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv2f32:
  82 ; ZVFBFMIN:       # %bb.0:
  83 ; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
  84 ; ZVFBFMIN-NEXT:    slli a0, a0, 16
  85 ; ZVFBFMIN-NEXT:    fmv.w.x fa5, a0
  86 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
  87 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
  88 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
  89 ; ZVFBFMIN-NEXT:    vfmacc.vf v8, fa5, v10
  90 ; ZVFBFMIN-NEXT:    ret
  91   %b.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
  92   %b.splat = shufflevector <vscale x 2 x bfloat> %b.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
  93   %b.ext = fpext <vscale x 2 x bfloat> %b.splat to <vscale x 2 x float>
  94   %c.ext = fpext <vscale x 2 x bfloat> %c to <vscale x 2 x float>
  95   %res = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %b.ext, <vscale x 2 x float> %c.ext, <vscale x 2 x float> %a)
  96   ret <vscale x 2 x float> %res
  97 }
  98
  99 define <vscale x 4 x float> @vfwmaccbf16_vv_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c) {
 100 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv4f32:
 101 ; ZVFBFWMA:       # %bb.0:
 102 ; ZVFBFWMA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 103 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v10, v11
 104 ; ZVFBFWMA-NEXT:    ret
 105 ;
 106 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv4f32:
 107 ; ZVFBFMIN:       # %bb.0:
 108 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 109 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
 110 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v14, v11
 111 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 112 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v12, v14
 113 ; ZVFBFMIN-NEXT:    ret
 114   %b.ext = fpext <vscale x 4 x bfloat> %b to <vscale x 4 x float>
 115   %c.ext = fpext <vscale x 4 x bfloat> %c to <vscale x 4 x float>
 116   %res = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %b.ext, <vscale x 4 x float> %c.ext, <vscale x 4 x float> %a)
 117   ret <vscale x 4 x float> %res
 118 }
 119
 120 define <vscale x 4 x float> @vfwmaccbf16_vf_nxv4f32(<vscale x 4 x float> %a, bfloat %b, <vscale x 4 x bfloat> %c) {
 121 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv4f32:
 122 ; ZVFBFWMA:       # %bb.0:
 123 ; ZVFBFWMA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 124 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vf v8, fa0, v10
 125 ; ZVFBFWMA-NEXT:    ret
 126 ;
 127 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv4f32:
 128 ; ZVFBFMIN:       # %bb.0:
 129 ; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
 130 ; ZVFBFMIN-NEXT:    slli a0, a0, 16
 131 ; ZVFBFMIN-NEXT:    fmv.w.x fa5, a0
 132 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 133 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
 134 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 135 ; ZVFBFMIN-NEXT:    vfmacc.vf v8, fa5, v12
 136 ; ZVFBFMIN-NEXT:    ret
 137   %b.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
 138   %b.splat = shufflevector <vscale x 4 x bfloat> %b.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
 139   %b.ext = fpext <vscale x 4 x bfloat> %b.splat to <vscale x 4 x float>
 140   %c.ext = fpext <vscale x 4 x bfloat> %c to <vscale x 4 x float>
 141   %res = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %b.ext, <vscale x 4 x float> %c.ext, <vscale x 4 x float> %a)
 142   ret <vscale x 4 x float> %res
 143 }
 144
 145 define <vscale x 8 x float> @vfwmaccbf16_vv_nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) {
 146 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv8f32:
 147 ; ZVFBFWMA:       # %bb.0:
 148 ; ZVFBFWMA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 149 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v12, v14
 150 ; ZVFBFWMA-NEXT:    ret
 151 ;
 152 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv8f32:
 153 ; ZVFBFMIN:       # %bb.0:
 154 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 155 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
 156 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v20, v14
 157 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 158 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v16, v20
 159 ; ZVFBFMIN-NEXT:    ret
 160   %b.ext = fpext <vscale x 8 x bfloat> %b to <vscale x 8 x float>
 161   %c.ext = fpext <vscale x 8 x bfloat> %c to <vscale x 8 x float>
 162   %res = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> %b.ext, <vscale x 8 x float> %c.ext, <vscale x 8 x float> %a)
 163   ret <vscale x 8 x float> %res
 164 }
 165
 166 define <vscale x 8 x float> @vfwmaccbf16_vf_nxv8f32(<vscale x 8 x float> %a, bfloat %b, <vscale x 8 x bfloat> %c) {
 167 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv8f32:
 168 ; ZVFBFWMA:       # %bb.0:
 169 ; ZVFBFWMA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 170 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vf v8, fa0, v12
 171 ; ZVFBFWMA-NEXT:    ret
 172 ;
 173 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv8f32:
 174 ; ZVFBFMIN:       # %bb.0:
 175 ; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
 176 ; ZVFBFMIN-NEXT:    slli a0, a0, 16
 177 ; ZVFBFMIN-NEXT:    fmv.w.x fa5, a0
 178 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 179 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
 180 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 181 ; ZVFBFMIN-NEXT:    vfmacc.vf v8, fa5, v16
 182 ; ZVFBFMIN-NEXT:    ret
 183   %b.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
 184   %b.splat = shufflevector <vscale x 8 x bfloat> %b.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
 185   %b.ext = fpext <vscale x 8 x bfloat> %b.splat to <vscale x 8 x float>
 186   %c.ext = fpext <vscale x 8 x bfloat> %c to <vscale x 8 x float>
 187   %res = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> %b.ext, <vscale x 8 x float> %c.ext, <vscale x 8 x float> %a)
 188   ret <vscale x 8 x float> %res
 189 }
 190
 191 define <vscale x 16 x float> @vfwmaccbf16_vv_nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c) {
 192 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv16f32:
 193 ; ZVFBFWMA:       # %bb.0:
 194 ; ZVFBFWMA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 195 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v16, v20
 196 ; ZVFBFWMA-NEXT:    ret
 197 ;
 198 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv16f32:
 199 ; ZVFBFMIN:       # %bb.0:
 200 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 201 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
 202 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v0, v20
 203 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 204 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v24, v0
 205 ; ZVFBFMIN-NEXT:    ret
 206   %b.ext = fpext <vscale x 16 x bfloat> %b to <vscale x 16 x float>
 207   %c.ext = fpext <vscale x 16 x bfloat> %c to <vscale x 16 x float>
 208   %res = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> %b.ext, <vscale x 16 x float> %c.ext, <vscale x 16 x float> %a)
 209   ret <vscale x 16 x float> %res
 210 }
 211
 212 define <vscale x 16 x float> @vfwmaccbf16_vf_nxv16f32(<vscale x 16 x float> %a, bfloat %b, <vscale x 16 x bfloat> %c) {
 213 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv16f32:
 214 ; ZVFBFWMA:       # %bb.0:
 215 ; ZVFBFWMA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 216 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vf v8, fa0, v16
 217 ; ZVFBFWMA-NEXT:    ret
 218 ;
 219 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv16f32:
 220 ; ZVFBFMIN:       # %bb.0:
 221 ; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
 222 ; ZVFBFMIN-NEXT:    slli a0, a0, 16
 223 ; ZVFBFMIN-NEXT:    fmv.w.x fa5, a0
 224 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 225 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
 226 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 227 ; ZVFBFMIN-NEXT:    vfmacc.vf v8, fa5, v24
 228 ; ZVFBFMIN-NEXT:    ret
 229   %b.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
 230   %b.splat = shufflevector <vscale x 16 x bfloat> %b.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
 231   %b.ext = fpext <vscale x 16 x bfloat> %b.splat to <vscale x 16 x float>
 232   %c.ext = fpext <vscale x 16 x bfloat> %c to <vscale x 16 x float>
 233   %res = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> %b.ext, <vscale x 16 x float> %c.ext, <vscale x 16 x float> %a)
 234   ret <vscale x 16 x float> %res
 235 }