llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
   2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
   3 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
   4 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
   5 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
   6
   7 define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x bfloat> %c) {
   8 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v1f32:
   9 ; ZVFBFWMA:       # %bb.0:
  10 ; ZVFBFWMA-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
  11 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v9, v10
  12 ; ZVFBFWMA-NEXT:    ret
  13 ;
  14 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v1f32:
  15 ; ZVFBFMIN:       # %bb.0:
  16 ; ZVFBFMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
  17 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v9
  18 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v9, v10
  19 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
  20 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v9
  21 ; ZVFBFMIN-NEXT:    ret
  22   %b.ext = fpext <1 x bfloat> %b to <1 x float>
  23   %c.ext = fpext <1 x bfloat> %c to <1 x float>
  24   %res = call <1 x float> @llvm.fma.v1f32(<1 x float> %b.ext, <1 x float> %c.ext, <1 x float> %a)
  25   ret <1 x float> %res
  26 }
  27
  28 define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> %c) {
  29 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v1f32:
  30 ; ZVFBFWMA:       # %bb.0:
  31 ; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
  32 ; ZVFBFWMA-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
  33 ; ZVFBFWMA-NEXT:    vmv.s.x v10, a0
  34 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v10, v9
  35 ; ZVFBFWMA-NEXT:    ret
  36 ;
  37 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v1f32:
  38 ; ZVFBFMIN:       # %bb.0:
  39 ; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
  40 ; ZVFBFMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
  41 ; ZVFBFMIN-NEXT:    vmv.s.x v10, a0
  42 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v10
  43 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
  44 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
  45 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v10
  46 ; ZVFBFMIN-NEXT:    ret
  47   %b.head = insertelement <1 x bfloat> poison, bfloat %b, i32 0
  48   %b.splat = shufflevector <1 x bfloat> %b.head, <1 x bfloat> poison, <1 x i32> zeroinitializer
  49   %b.ext = fpext <1 x bfloat> %b.splat to <1 x float>
  50   %c.ext = fpext <1 x bfloat> %c to <1 x float>
  51   %res = call <1 x float> @llvm.fma.v1f32(<1 x float> %b.ext, <1 x float> %c.ext, <1 x float> %a)
  52   ret <1 x float> %res
  53 }
  54
  55 define <2 x float> @vfwmaccbf16_vv_v2f32(<2 x float> %a, <2 x bfloat> %b, <2 x bfloat> %c) {
  56 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v2f32:
  57 ; ZVFBFWMA:       # %bb.0:
  58 ; ZVFBFWMA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
  59 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v9, v10
  60 ; ZVFBFWMA-NEXT:    ret
  61 ;
  62 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v2f32:
  63 ; ZVFBFMIN:       # %bb.0:
  64 ; ZVFBFMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
  65 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v9
  66 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v9, v10
  67 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
  68 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v9
  69 ; ZVFBFMIN-NEXT:    ret
  70   %b.ext = fpext <2 x bfloat> %b to <2 x float>
  71   %c.ext = fpext <2 x bfloat> %c to <2 x float>
  72   %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %b.ext, <2 x float> %c.ext, <2 x float> %a)
  73   ret <2 x float> %res
  74 }
  75
  76 define <2 x float> @vfwmaccbf16_vf_v2f32(<2 x float> %a, bfloat %b, <2 x bfloat> %c) {
  77 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v2f32:
  78 ; ZVFBFWMA:       # %bb.0:
  79 ; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
  80 ; ZVFBFWMA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
  81 ; ZVFBFWMA-NEXT:    vmv.v.x v10, a0
  82 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v10, v9
  83 ; ZVFBFWMA-NEXT:    ret
  84 ;
  85 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v2f32:
  86 ; ZVFBFMIN:       # %bb.0:
  87 ; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
  88 ; ZVFBFMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
  89 ; ZVFBFMIN-NEXT:    vmv.v.x v10, a0
  90 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v10
  91 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
  92 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
  93 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v10
  94 ; ZVFBFMIN-NEXT:    ret
  95   %b.head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
  96   %b.splat = shufflevector <2 x bfloat> %b.head, <2 x bfloat> poison, <2 x i32> zeroinitializer
  97   %b.ext = fpext <2 x bfloat> %b.splat to <2 x float>
  98   %c.ext = fpext <2 x bfloat> %c to <2 x float>
  99   %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %b.ext, <2 x float> %c.ext, <2 x float> %a)
 100   ret <2 x float> %res
 101 }
 102
 103 define <4 x float> @vfwmaccbf16_vv_v4f32(<4 x float> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 104 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v4f32:
 105 ; ZVFBFWMA:       # %bb.0:
 106 ; ZVFBFWMA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 107 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v9, v10
 108 ; ZVFBFWMA-NEXT:    ret
 109 ;
 110 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v4f32:
 111 ; ZVFBFMIN:       # %bb.0:
 112 ; ZVFBFMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 113 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v9
 114 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v9, v10
 115 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 116 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v9
 117 ; ZVFBFMIN-NEXT:    ret
 118   %b.ext = fpext <4 x bfloat> %b to <4 x float>
 119   %c.ext = fpext <4 x bfloat> %c to <4 x float>
 120   %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %b.ext, <4 x float> %c.ext, <4 x float> %a)
 121   ret <4 x float> %res
 122 }
 123
 124 define <4 x float> @vfwmaccbf16_vf_v4f32(<4 x float> %a, bfloat %b, <4 x bfloat> %c) {
 125 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v4f32:
 126 ; ZVFBFWMA:       # %bb.0:
 127 ; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
 128 ; ZVFBFWMA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 129 ; ZVFBFWMA-NEXT:    vmv.v.x v10, a0
 130 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v10, v9
 131 ; ZVFBFWMA-NEXT:    ret
 132 ;
 133 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v4f32:
 134 ; ZVFBFMIN:       # %bb.0:
 135 ; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
 136 ; ZVFBFMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 137 ; ZVFBFMIN-NEXT:    vmv.v.x v10, a0
 138 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v10
 139 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
 140 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 141 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v10
 142 ; ZVFBFMIN-NEXT:    ret
 143   %b.head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
 144   %b.splat = shufflevector <4 x bfloat> %b.head, <4 x bfloat> poison, <4 x i32> zeroinitializer
 145   %b.ext = fpext <4 x bfloat> %b.splat to <4 x float>
 146   %c.ext = fpext <4 x bfloat> %c to <4 x float>
 147   %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %b.ext, <4 x float> %c.ext, <4 x float> %a)
 148   ret <4 x float> %res
 149 }
 150
 151 define <8 x float> @vfwmaccbf16_vv_v8f32(<8 x float> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 152 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v8f32:
 153 ; ZVFBFWMA:       # %bb.0:
 154 ; ZVFBFWMA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 155 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v10, v11
 156 ; ZVFBFWMA-NEXT:    ret
 157 ;
 158 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v8f32:
 159 ; ZVFBFMIN:       # %bb.0:
 160 ; ZVFBFMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 161 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
 162 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v14, v11
 163 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 164 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v12, v14
 165 ; ZVFBFMIN-NEXT:    ret
 166   %b.ext = fpext <8 x bfloat> %b to <8 x float>
 167   %c.ext = fpext <8 x bfloat> %c to <8 x float>
 168   %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %b.ext, <8 x float> %c.ext, <8 x float> %a)
 169   ret <8 x float> %res
 170 }
 171
 172 define <8 x float> @vfwmaccbf16_vf_v8f32(<8 x float> %a, bfloat %b, <8 x bfloat> %c) {
 173 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v8f32:
 174 ; ZVFBFWMA:       # %bb.0:
 175 ; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
 176 ; ZVFBFWMA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 177 ; ZVFBFWMA-NEXT:    vmv.v.x v11, a0
 178 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v11, v10
 179 ; ZVFBFWMA-NEXT:    ret
 180 ;
 181 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v8f32:
 182 ; ZVFBFMIN:       # %bb.0:
 183 ; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
 184 ; ZVFBFMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 185 ; ZVFBFMIN-NEXT:    vmv.v.x v11, a0
 186 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v12, v11
 187 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v14, v10
 188 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 189 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v12, v14
 190 ; ZVFBFMIN-NEXT:    ret
 191   %b.head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
 192   %b.splat = shufflevector <8 x bfloat> %b.head, <8 x bfloat> poison, <8 x i32> zeroinitializer
 193   %b.ext = fpext <8 x bfloat> %b.splat to <8 x float>
 194   %c.ext = fpext <8 x bfloat> %c to <8 x float>
 195   %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %b.ext, <8 x float> %c.ext, <8 x float> %a)
 196   ret <8 x float> %res
 197 }
 198
 199 define <16 x float> @vfwmaccbf16_vv_v16f32(<16 x float> %a, <16 x bfloat> %b, <16 x bfloat> %c) {
 200 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v16f32:
 201 ; ZVFBFWMA:       # %bb.0:
 202 ; ZVFBFWMA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 203 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v12, v14
 204 ; ZVFBFWMA-NEXT:    ret
 205 ;
 206 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v16f32:
 207 ; ZVFBFMIN:       # %bb.0:
 208 ; ZVFBFMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 209 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
 210 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v20, v14
 211 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 212 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v16, v20
 213 ; ZVFBFMIN-NEXT:    ret
 214   %b.ext = fpext <16 x bfloat> %b to <16 x float>
 215   %c.ext = fpext <16 x bfloat> %c to <16 x float>
 216   %res = call <16 x float> @llvm.fma.v16f32(<16 x float> %b.ext, <16 x float> %c.ext, <16 x float> %a)
 217   ret <16 x float> %res
 218 }
 219
 220 define <16 x float> @vfwmaccbf16_vf_v16f32(<16 x float> %a, bfloat %b, <16 x bfloat> %c) {
 221 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v16f32:
 222 ; ZVFBFWMA:       # %bb.0:
 223 ; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
 224 ; ZVFBFWMA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 225 ; ZVFBFWMA-NEXT:    vmv.v.x v14, a0
 226 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v14, v12
 227 ; ZVFBFWMA-NEXT:    ret
 228 ;
 229 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v16f32:
 230 ; ZVFBFMIN:       # %bb.0:
 231 ; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
 232 ; ZVFBFMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 233 ; ZVFBFMIN-NEXT:    vmv.v.x v14, a0
 234 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v16, v14
 235 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v20, v12
 236 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 237 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v16, v20
 238 ; ZVFBFMIN-NEXT:    ret
 239   %b.head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
 240   %b.splat = shufflevector <16 x bfloat> %b.head, <16 x bfloat> poison, <16 x i32> zeroinitializer
 241   %b.ext = fpext <16 x bfloat> %b.splat to <16 x float>
 242   %c.ext = fpext <16 x bfloat> %c to <16 x float>
 243   %res = call <16 x float> @llvm.fma.v16f32(<16 x float> %b.ext, <16 x float> %c.ext, <16 x float> %a)
 244   ret <16 x float> %res
 245 }
 246
 247 define <32 x float> @vfwmaccbf32_vv_v32f32(<32 x float> %a, <32 x bfloat> %b, <32 x bfloat> %c) {
 248 ; ZVFBFWMA-LABEL: vfwmaccbf32_vv_v32f32:
 249 ; ZVFBFWMA:       # %bb.0:
 250 ; ZVFBFWMA-NEXT:    li a0, 32
 251 ; ZVFBFWMA-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 252 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v16, v20
 253 ; ZVFBFWMA-NEXT:    ret
 254 ;
 255 ; ZVFBFMIN-LABEL: vfwmaccbf32_vv_v32f32:
 256 ; ZVFBFMIN:       # %bb.0:
 257 ; ZVFBFMIN-NEXT:    li a0, 32
 258 ; ZVFBFMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 259 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
 260 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v0, v20
 261 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 262 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v24, v0
 263 ; ZVFBFMIN-NEXT:    ret
 264   %b.ext = fpext <32 x bfloat> %b to <32 x float>
 265   %c.ext = fpext <32 x bfloat> %c to <32 x float>
 266   %res = call <32 x float> @llvm.fma.v32f32(<32 x float> %b.ext, <32 x float> %c.ext, <32 x float> %a)
 267   ret <32 x float> %res
 268 }
 269
 270 define <32 x float> @vfwmaccbf32_vf_v32f32(<32 x float> %a, bfloat %b, <32 x bfloat> %c) {
 271 ; ZVFBFWMA-LABEL: vfwmaccbf32_vf_v32f32:
 272 ; ZVFBFWMA:       # %bb.0:
 273 ; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
 274 ; ZVFBFWMA-NEXT:    li a1, 32
 275 ; ZVFBFWMA-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 276 ; ZVFBFWMA-NEXT:    vmv.v.x v20, a0
 277 ; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v20, v16
 278 ; ZVFBFWMA-NEXT:    ret
 279 ;
 280 ; ZVFBFMIN-LABEL: vfwmaccbf32_vf_v32f32:
 281 ; ZVFBFMIN:       # %bb.0:
 282 ; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
 283 ; ZVFBFMIN-NEXT:    li a1, 32
 284 ; ZVFBFMIN-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 285 ; ZVFBFMIN-NEXT:    vmv.v.x v20, a0
 286 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v24, v20
 287 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v0, v16
 288 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 289 ; ZVFBFMIN-NEXT:    vfmacc.vv v8, v24, v0
 290 ; ZVFBFMIN-NEXT:    ret
 291   %b.head = insertelement <32 x bfloat> poison, bfloat %b, i32 0
 292   %b.splat = shufflevector <32 x bfloat> %b.head, <32 x bfloat> poison, <32 x i32> zeroinitializer
 293   %b.ext = fpext <32 x bfloat> %b.splat to <32 x float>
 294   %c.ext = fpext <32 x bfloat> %c to <32 x float>
 295   %res = call <32 x float> @llvm.fma.v32f32(<32 x float> %b.ext, <32 x float> %c.ext, <32 x float> %a)
 296   ret <32 x float> %res
 297 }