llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s
   3
   4 declare <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
   5 declare <4 x float> @llvm.x86.avx512fp16.maskz.vfmul.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
   6 declare <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
   7 declare <4 x float> @llvm.x86.avx512fp16.maskz.vfcmul.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
   8 declare <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
   9 declare <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
  10 declare <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
  11 declare <4 x float> @llvm.x86.avx512fp16.maskz.vfcmadd.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
  12
  13 ;; no mask, no rounding
  14
  15 define <4 x float> @test_nm_nr_int_x86_avx512fp16_mask_cfmul_sh(<4 x float> %x0, <4 x float> %x1) {
  16 ; CHECK-LABEL: test_nm_nr_int_x86_avx512fp16_mask_cfmul_sh:
  17 ; CHECK:       ## %bb.0:
  18 ; CHECK-NEXT:    vfmulcsh %xmm1, %xmm0, %xmm2
  19 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  20 ; CHECK-NEXT:    retq
  21   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> undef, i8 -1, i32 4)
  22   ret <4 x float> %res
  23 }
  24
  25 define <4 x float> @test_nm_nr_int_x86_avx512fp16_mask_cfcmul_sh(<4 x float> %x0, <4 x float> %x1) {
  26 ; CHECK-LABEL: test_nm_nr_int_x86_avx512fp16_mask_cfcmul_sh:
  27 ; CHECK:       ## %bb.0:
  28 ; CHECK-NEXT:    vfcmulcsh %xmm1, %xmm0, %xmm2
  29 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  30 ; CHECK-NEXT:    retq
  31   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> undef, i8 -1, i32 4)
  32   ret <4 x float> %res
  33 }
  34
  35 define <4 x float> @test_nm_nr_int_x86_avx512fp16_cfmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2) {
  36 ; CHECK-LABEL: test_nm_nr_int_x86_avx512fp16_cfmadd_sh:
  37 ; CHECK:       ## %bb.0:
  38 ; CHECK-NEXT:    vfmaddcsh %xmm1, %xmm0, %xmm2
  39 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  40 ; CHECK-NEXT:    retq
  41   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
  42   ret <4 x float> %res
  43 }
  44
  45 define <4 x float> @test_nm_nr_int_x86_avx512fp16_cfcmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2) {
  46 ; CHECK-LABEL: test_nm_nr_int_x86_avx512fp16_cfcmadd_sh:
  47 ; CHECK:       ## %bb.0:
  48 ; CHECK-NEXT:    vfcmaddcsh %xmm1, %xmm0, %xmm2
  49 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  50 ; CHECK-NEXT:    retq
  51   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
  52   ret <4 x float> %res
  53 }
  54
  55 ;; no mask, rounding
  56
  57 define <4 x float> @test_nm_r_int_x86_avx512fp16_mask_cfmul_sh(<4 x float> %x0, <4 x float> %x1) {
  58 ; CHECK-LABEL: test_nm_r_int_x86_avx512fp16_mask_cfmul_sh:
  59 ; CHECK:       ## %bb.0:
  60 ; CHECK-NEXT:    vfmulcsh {rd-sae}, %xmm1, %xmm0, %xmm2
  61 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  62 ; CHECK-NEXT:    retq
  63   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> undef, i8 -1, i32 9)
  64   ret <4 x float> %res
  65 }
  66
  67 define <4 x float> @test_nm_r_int_x86_avx512fp16_mask_cfcmul_sh(<4 x float> %x0, <4 x float> %x1) {
  68 ; CHECK-LABEL: test_nm_r_int_x86_avx512fp16_mask_cfcmul_sh:
  69 ; CHECK:       ## %bb.0:
  70 ; CHECK-NEXT:    vfcmulcsh {rd-sae}, %xmm1, %xmm0, %xmm2
  71 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  72 ; CHECK-NEXT:    retq
  73   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> undef, i8 -1, i32 9)
  74   ret <4 x float> %res
  75 }
  76
  77 define <4 x float> @test_nm_r_int_x86_avx512fp16_mask_cfmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2) {
  78 ; CHECK-LABEL: test_nm_r_int_x86_avx512fp16_mask_cfmadd_sh:
  79 ; CHECK:       ## %bb.0:
  80 ; CHECK-NEXT:    vfmaddcsh {rd-sae}, %xmm1, %xmm0, %xmm2
  81 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  82 ; CHECK-NEXT:    retq
  83   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 9)
  84   ret <4 x float> %res
  85 }
  86
  87 define <4 x float> @test_nm_r_int_x86_avx512fp16_mask_cfcmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2) {
  88 ; CHECK-LABEL: test_nm_r_int_x86_avx512fp16_mask_cfcmadd_sh:
  89 ; CHECK:       ## %bb.0:
  90 ; CHECK-NEXT:    vfcmaddcsh {rd-sae}, %xmm1, %xmm0, %xmm2
  91 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  92 ; CHECK-NEXT:    retq
  93   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 9)
  94   ret <4 x float> %res
  95 }
  96
  97 ;; mask, no rounding
  98
  99 define <4 x float> @test_m_nr_int_x86_avx512fp16_mask_cfmul_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 100 ; CHECK-LABEL: test_m_nr_int_x86_avx512fp16_mask_cfmul_sh:
 101 ; CHECK:       ## %bb.0:
 102 ; CHECK-NEXT:    kmovd %edi, %k1
 103 ; CHECK-NEXT:    vfmulcsh %xmm1, %xmm0, %xmm2 {%k1}
 104 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 105 ; CHECK-NEXT:    retq
 106   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
 107   ret <4 x float> %res
 108 }
 109
 110 define <4 x float> @test_m_nr_int_x86_avx512fp16_mask_cfcmul_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 111 ; CHECK-LABEL: test_m_nr_int_x86_avx512fp16_mask_cfcmul_sh:
 112 ; CHECK:       ## %bb.0:
 113 ; CHECK-NEXT:    kmovd %edi, %k1
 114 ; CHECK-NEXT:    vfcmulcsh %xmm1, %xmm0, %xmm2 {%k1}
 115 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 116 ; CHECK-NEXT:    retq
 117   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
 118   ret <4 x float> %res
 119 }
 120
 121 define <4 x float> @test_m_nr_int_x86_avx512fp16_mask_cfmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 122 ; CHECK-LABEL: test_m_nr_int_x86_avx512fp16_mask_cfmadd_sh:
 123 ; CHECK:       ## %bb.0:
 124 ; CHECK-NEXT:    kmovd %edi, %k1
 125 ; CHECK-NEXT:    vfmaddcsh %xmm1, %xmm0, %xmm2 {%k1}
 126 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 127 ; CHECK-NEXT:    retq
 128   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
 129   ret <4 x float> %res
 130 }
 131
 132 define <4 x float> @test_m_nr_int_x86_avx512fp16_mask_cfcmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 133 ; CHECK-LABEL: test_m_nr_int_x86_avx512fp16_mask_cfcmadd_sh:
 134 ; CHECK:       ## %bb.0:
 135 ; CHECK-NEXT:    kmovd %edi, %k1
 136 ; CHECK-NEXT:    vfcmaddcsh %xmm1, %xmm0, %xmm2 {%k1}
 137 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 138 ; CHECK-NEXT:    retq
 139   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
 140   ret <4 x float> %res
 141 }
 142
 143 ;; mask, rounding
 144
 145 define <4 x float> @test_int_x86_avx512fp16_mask_cfmul_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 146 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfmul_sh:
 147 ; CHECK:       ## %bb.0:
 148 ; CHECK-NEXT:    kmovd %edi, %k1
 149 ; CHECK-NEXT:    vfmulcsh {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 150 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 151 ; CHECK-NEXT:    retq
 152   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 9)
 153   ret <4 x float> %res
 154 }
 155
 156 define <4 x float> @test_int_x86_avx512fp16_mask_cfcmul_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 157 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfcmul_sh:
 158 ; CHECK:       ## %bb.0:
 159 ; CHECK-NEXT:    kmovd %edi, %k1
 160 ; CHECK-NEXT:    vfcmulcsh {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 161 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 162 ; CHECK-NEXT:    retq
 163   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 9)
 164   ret <4 x float> %res
 165 }
 166
 167 define <4 x float> @test_int_x86_avx512fp16_mask_cfmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 168 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfmadd_sh:
 169 ; CHECK:       ## %bb.0:
 170 ; CHECK-NEXT:    kmovd %edi, %k1
 171 ; CHECK-NEXT:    vfmaddcsh {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 172 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 173 ; CHECK-NEXT:    retq
 174   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 9)
 175   ret <4 x float> %res
 176 }
 177
 178 define <4 x float> @test_int_x86_avx512fp16_mask_cfcmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 179 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfcmadd_sh:
 180 ; CHECK:       ## %bb.0:
 181 ; CHECK-NEXT:    kmovd %edi, %k1
 182 ; CHECK-NEXT:    vfcmaddcsh {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 183 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 184 ; CHECK-NEXT:    retq
 185   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 9)
 186   ret <4 x float> %res
 187 }
 188
 189 ;; maskz, no rounding
 190
 191 define <4 x float> @test_m_nr_int_x86_avx512fp16_maskz_cfmul_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 192 ; CHECK-LABEL: test_m_nr_int_x86_avx512fp16_maskz_cfmul_sh:
 193 ; CHECK:       ## %bb.0:
 194 ; CHECK-NEXT:    kmovd %edi, %k1
 195 ; CHECK-NEXT:    vfmulcsh %xmm1, %xmm0, %xmm2 {%k1} {z}
 196 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 197 ; CHECK-NEXT:    retq
 198   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x3, i32 4)
 199   ret <4 x float> %res
 200 }
 201
 202 define <4 x float> @test_m_nr_int_x86_avx512fp16_maskz_cfcmul_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 203 ; CHECK-LABEL: test_m_nr_int_x86_avx512fp16_maskz_cfcmul_sh:
 204 ; CHECK:       ## %bb.0:
 205 ; CHECK-NEXT:    kmovd %edi, %k1
 206 ; CHECK-NEXT:    vfcmulcsh %xmm1, %xmm0, %xmm2 {%k1} {z}
 207 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 208 ; CHECK-NEXT:    retq
 209   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x3, i32 4)
 210   ret <4 x float> %res
 211 }
 212
 213 define <4 x float> @test_m_nr_int_x86_avx512fp16_maskz_cfmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 214 ; CHECK-LABEL: test_m_nr_int_x86_avx512fp16_maskz_cfmadd_sh:
 215 ; CHECK:       ## %bb.0:
 216 ; CHECK-NEXT:    kmovd %edi, %k1
 217 ; CHECK-NEXT:    vfmaddcsh %xmm1, %xmm0, %xmm2 {%k1} {z}
 218 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 219 ; CHECK-NEXT:    retq
 220   %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
 221   ret <4 x float> %res
 222 }
 223
 224 define <4 x float> @test_m_nr_int_x86_avx512fp16_maskz_cfcmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 225 ; CHECK-LABEL: test_m_nr_int_x86_avx512fp16_maskz_cfcmadd_sh:
 226 ; CHECK:       ## %bb.0:
 227 ; CHECK-NEXT:    kmovd %edi, %k1
 228 ; CHECK-NEXT:    vfcmaddcsh %xmm1, %xmm0, %xmm2 {%k1} {z}
 229 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 230 ; CHECK-NEXT:    retq
 231   %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfcmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
 232   ret <4 x float> %res
 233 }
 234
 235 ;; maskz, rounding
 236
 237 define <4 x float> @test_int_x86_avx512fp16_maskz_cfmul_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 238 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfmul_sh:
 239 ; CHECK:       ## %bb.0:
 240 ; CHECK-NEXT:    kmovd %edi, %k1
 241 ; CHECK-NEXT:    vfmulcsh {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} {z}
 242 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 243 ; CHECK-NEXT:    retq
 244   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x3, i32 9)
 245   ret <4 x float> %res
 246 }
 247
 248 define <4 x float> @test_int_x86_avx512fp16_maskz_cfcmul_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 249 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfcmul_sh:
 250 ; CHECK:       ## %bb.0:
 251 ; CHECK-NEXT:    kmovd %edi, %k1
 252 ; CHECK-NEXT:    vfcmulcsh {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} {z}
 253 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 254 ; CHECK-NEXT:    retq
 255   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x3, i32 9)
 256   ret <4 x float> %res
 257 }
 258
 259 define <4 x float> @test_int_x86_avx512fp16_maskz_cfmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 260 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfmadd_sh:
 261 ; CHECK:       ## %bb.0:
 262 ; CHECK-NEXT:    kmovd %edi, %k1
 263 ; CHECK-NEXT:    vfmaddcsh {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} {z}
 264 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 265 ; CHECK-NEXT:    retq
 266   %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 9)
 267   ret <4 x float> %res
 268 }
 269
 270 define <4 x float> @test_int_x86_avx512fp16_maskz_cfcmadd_sh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
 271 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfcmadd_sh:
 272 ; CHECK:       ## %bb.0:
 273 ; CHECK-NEXT:    kmovd %edi, %k1
 274 ; CHECK-NEXT:    vfcmaddcsh {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} {z}
 275 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 276 ; CHECK-NEXT:    retq
 277   %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfcmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 9)
 278   ret <4 x float> %res
 279 }