llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s
   3
   4 declare <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float>, <4 x float>, <4 x float>, i8)
   5
   6 define <4 x float> @test_int_x86_avx512fp8_mask_cfmul_ph_bst(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
   7 ; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfmul_ph_bst:
   8 ; CHECK:       ## %bb.0:
   9 ; CHECK-NEXT:    kmovd %edi, %k1
  10 ; CHECK-NEXT:    vfmulcph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 {%k1}
  11 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  12 ; CHECK-NEXT:    retq
  13   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x0, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x2, i8 %x3)
  14   ret <4 x float> %res
  15 }
  16
  17 define <4 x float> @test_int_x86_avx512fp8_mask_cfmul_ph_bst2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
  18 ; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfmul_ph_bst2:
  19 ; CHECK:       ## %bb.0:
  20 ; CHECK-NEXT:    kmovd %edi, %k1
  21 ; CHECK-NEXT:    vfmulcph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 {%k1}
  22 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  23 ; CHECK-NEXT:    retq
  24   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x0, <4 x float> %x2, i8 %x3)
  25   ret <4 x float> %res
  26 }
  27
  28 define <4 x float> @test_int_x86_avx512fp8_mask_cfmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
  29 ; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfmul_ph_128:
  30 ; CHECK:       ## %bb.0:
  31 ; CHECK-NEXT:    kmovd %edi, %k1
  32 ; CHECK-NEXT:    vfmulcph %xmm1, %xmm0, %xmm2 {%k1}
  33 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  34 ; CHECK-NEXT:    retq
  35   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
  36   ret <4 x float> %res
  37 }
  38
  39 define <4 x float> @test_int_x86_avx512fp8_maskz_cfmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
  40 ; CHECK-LABEL: test_int_x86_avx512fp8_maskz_cfmul_ph_128:
  41 ; CHECK:       ## %bb.0:
  42 ; CHECK-NEXT:    kmovd %edi, %k1
  43 ; CHECK-NEXT:    vfmulcph %xmm1, %xmm0, %xmm2 {%k1} {z}
  44 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
  45 ; CHECK-NEXT:    retq
  46   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x3)
  47   ret <4 x float> %res
  48 }
  49
  50 define <4 x float> @test_int_x86_avx512fp8_cfmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2){
  51 ; CHECK-LABEL: test_int_x86_avx512fp8_cfmul_ph_128:
  52 ; CHECK:       ## %bb.0:
  53 ; CHECK-NEXT:    vfmulcph %xmm1, %xmm2, %xmm0
  54 ; CHECK-NEXT:    retq
  55   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float> %x2, <4 x float> %x1, <4 x float> %x0, i8 -1)
  56   ret <4 x float> %res
  57 }
  58
  59 declare <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float>, <8 x float>, <8 x float>, i8)
  60
  61 define <8 x float> @test_int_x86_avx512fp16_mask_cfmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){
  62 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfmul_ph_256:
  63 ; CHECK:       ## %bb.0:
  64 ; CHECK-NEXT:    kmovd %edi, %k1
  65 ; CHECK-NEXT:    vfmulcph %ymm1, %ymm0, %ymm2 {%k1}
  66 ; CHECK-NEXT:    vmovaps %ymm2, %ymm0
  67 ; CHECK-NEXT:    retq
  68   %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
  69   ret <8 x float> %res
  70 }
  71
  72 define <8 x float> @test_int_x86_avx512fp16_maskz_cfmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){
  73 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfmul_ph_256:
  74 ; CHECK:       ## %bb.0:
  75 ; CHECK-NEXT:    kmovd %edi, %k1
  76 ; CHECK-NEXT:    vfmulcph %ymm1, %ymm0, %ymm2 {%k1} {z}
  77 ; CHECK-NEXT:    vmovaps %ymm2, %ymm0
  78 ; CHECK-NEXT:    retq
  79   %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> zeroinitializer, i8 %x3)
  80   ret <8 x float> %res
  81 }
  82
  83 define <8 x float> @test_int_x86_avx512fp16_cfmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2){
  84 ; CHECK-LABEL: test_int_x86_avx512fp16_cfmul_ph_256:
  85 ; CHECK:       ## %bb.0:
  86 ; CHECK-NEXT:    vfmulcph %ymm1, %ymm2, %ymm0
  87 ; CHECK-NEXT:    retq
  88   %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.256(<8 x float> %x2, <8 x float> %x1, <8 x float> %x0, i8 -1)
  89   ret <8 x float> %res
  90 }
  91
  92 declare <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
  93
  94 define <16 x float> @test_int_x86_avx512fp16_mask_cfmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
  95 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfmul_ph_512:
  96 ; CHECK:       ## %bb.0:
  97 ; CHECK-NEXT:    kmovd %edi, %k1
  98 ; CHECK-NEXT:    vfmulcph %zmm1, %zmm0, %zmm2 {%k1}
  99 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
 100 ; CHECK-NEXT:    retq
 101   %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
 102   ret <16 x float> %res
 103 }
 104
 105 define <16 x float> @test_int_x86_avx512fp16_maskz_cfmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
 106 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfmul_ph_512:
 107 ; CHECK:       ## %bb.0:
 108 ; CHECK-NEXT:    kmovd %edi, %k1
 109 ; CHECK-NEXT:    vfmulcph %zmm1, %zmm0, %zmm2 {%k1} {z}
 110 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
 111 ; CHECK-NEXT:    retq
 112   %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> zeroinitializer, i16 %x3, i32 4)
 113   ret <16 x float> %res
 114 }
 115
 116 define <16 x float> @test_int_x86_avx512fp16_cfmul_ph_512_rn(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
 117 ; CHECK-LABEL: test_int_x86_avx512fp16_cfmul_ph_512_rn:
 118 ; CHECK:       ## %bb.0:
 119 ; CHECK-NEXT:    vfmulcph {rz-sae}, %zmm1, %zmm2, %zmm0
 120 ; CHECK-NEXT:    retq
 121   %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 11)
 122   ret <16 x float> %res
 123 }
 124
 125 define <16 x float> @test_int_x86_avx512fp16_cfmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
 126 ; CHECK-LABEL: test_int_x86_avx512fp16_cfmul_ph_512:
 127 ; CHECK:       ## %bb.0:
 128 ; CHECK-NEXT:    vfmulcph %zmm1, %zmm2, %zmm0
 129 ; CHECK-NEXT:    retq
 130   %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 4)
 131   ret <16 x float> %res
 132 }
 133
 134 declare <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float>, <4 x float>, <4 x float>, i8)
 135
 136 define <4 x float> @test_int_x86_avx512fp8_mask_cfcmul_ph_bst(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
 137 ; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfcmul_ph_bst:
 138 ; CHECK:       ## %bb.0:
 139 ; CHECK-NEXT:    kmovd %edi, %k1
 140 ; CHECK-NEXT:    vfcmulcph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 {%k1}
 141 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 142 ; CHECK-NEXT:    retq
 143   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x0, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x2, i8 %x3)
 144   ret <4 x float> %res
 145 }
 146
 147 ; Check conjugate complex FMUL is not commutable.
 148 define <4 x float> @test_int_x86_avx512fp8_mask_cfcmul_ph_bst2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
 149 ; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfcmul_ph_bst2:
 150 ; CHECK:       ## %bb.0:
 151 ; CHECK-NEXT:    kmovd %edi, %k1
 152 ; CHECK-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
 153 ; CHECK-NEXT:    vfcmulcph %xmm0, %xmm1, %xmm2 {%k1}
 154 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 155 ; CHECK-NEXT:    retq
 156   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %x0, <4 x float> %x2, i8 %x3)
 157   ret <4 x float> %res
 158 }
 159
 160 define <4 x float> @test_int_x86_avx512fp8_mask_cfcmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
 161 ; CHECK-LABEL: test_int_x86_avx512fp8_mask_cfcmul_ph_128:
 162 ; CHECK:       ## %bb.0:
 163 ; CHECK-NEXT:    kmovd %edi, %k1
 164 ; CHECK-NEXT:    vfcmulcph %xmm1, %xmm0, %xmm2 {%k1}
 165 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 166 ; CHECK-NEXT:    retq
 167   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
 168   ret <4 x float> %res
 169 }
 170
 171 define <4 x float> @test_int_x86_avx512fp8_maskz_cfcmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3){
 172 ; CHECK-LABEL: test_int_x86_avx512fp8_maskz_cfcmul_ph_128:
 173 ; CHECK:       ## %bb.0:
 174 ; CHECK-NEXT:    kmovd %edi, %k1
 175 ; CHECK-NEXT:    vfcmulcph %xmm1, %xmm0, %xmm2 {%k1} {z}
 176 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 177 ; CHECK-NEXT:    retq
 178   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x3)
 179   ret <4 x float> %res
 180 }
 181
 182 define <4 x float> @test_int_x86_avx512fp8_cfcmul_ph_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2){
 183 ; CHECK-LABEL: test_int_x86_avx512fp8_cfcmul_ph_128:
 184 ; CHECK:       ## %bb.0:
 185 ; CHECK-NEXT:    vfcmulcph %xmm1, %xmm2, %xmm0
 186 ; CHECK-NEXT:    retq
 187   %res = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.128(<4 x float> %x2, <4 x float> %x1, <4 x float> %x0, i8 -1)
 188   ret <4 x float> %res
 189 }
 190
 191 declare <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float>, <8 x float>, <8 x float>, i8)
 192
 193 define <8 x float> @test_int_x86_avx512fp16_mask_cfcmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){
 194 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfcmul_ph_256:
 195 ; CHECK:       ## %bb.0:
 196 ; CHECK-NEXT:    kmovd %edi, %k1
 197 ; CHECK-NEXT:    vfcmulcph %ymm1, %ymm0, %ymm2 {%k1}
 198 ; CHECK-NEXT:    vmovaps %ymm2, %ymm0
 199 ; CHECK-NEXT:    retq
 200   %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
 201   ret <8 x float> %res
 202 }
 203
 204 define <8 x float> @test_int_x86_avx512fp16_maskz_cfcmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3){
 205 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfcmul_ph_256:
 206 ; CHECK:       ## %bb.0:
 207 ; CHECK-NEXT:    kmovd %edi, %k1
 208 ; CHECK-NEXT:    vfcmulcph %ymm1, %ymm0, %ymm2 {%k1} {z}
 209 ; CHECK-NEXT:    vmovaps %ymm2, %ymm0
 210 ; CHECK-NEXT:    retq
 211   %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %x0, <8 x float> %x1, <8 x float> zeroinitializer, i8 %x3)
 212   ret <8 x float> %res
 213 }
 214
 215 define <8 x float> @test_int_x86_avx512fp16_cfcmul_ph_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2){
 216 ; CHECK-LABEL: test_int_x86_avx512fp16_cfcmul_ph_256:
 217 ; CHECK:       ## %bb.0:
 218 ; CHECK-NEXT:    vfcmulcph %ymm1, %ymm2, %ymm0
 219 ; CHECK-NEXT:    retq
 220   %res = call <8 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.256(<8 x float> %x2, <8 x float> %x1, <8 x float> %x0, i8 -1)
 221   ret <8 x float> %res
 222 }
 223
 224 declare <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
 225
 226 define <16 x float> @test_int_x86_avx512fp16_mask_cfcmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
 227 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cfcmul_ph_512:
 228 ; CHECK:       ## %bb.0:
 229 ; CHECK-NEXT:    kmovd %edi, %k1
 230 ; CHECK-NEXT:    vfcmulcph %zmm1, %zmm0, %zmm2 {%k1}
 231 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
 232 ; CHECK-NEXT:    retq
 233   %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
 234   ret <16 x float> %res
 235 }
 236
 237 define <16 x float> @test_int_x86_avx512fp16_maskz_cfcmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
 238 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_cfcmul_ph_512:
 239 ; CHECK:       ## %bb.0:
 240 ; CHECK-NEXT:    kmovd %edi, %k1
 241 ; CHECK-NEXT:    vfcmulcph %zmm1, %zmm0, %zmm2 {%k1} {z}
 242 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
 243 ; CHECK-NEXT:    retq
 244   %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x0, <16 x float> %x1, <16 x float> zeroinitializer, i16 %x3, i32 4)
 245   ret <16 x float> %res
 246 }
 247
 248 define <16 x float> @test_int_x86_avx512fp16_cfcmul_ph_512_rn(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
 249 ; CHECK-LABEL: test_int_x86_avx512fp16_cfcmul_ph_512_rn:
 250 ; CHECK:       ## %bb.0:
 251 ; CHECK-NEXT:    vfcmulcph {rz-sae}, %zmm1, %zmm2, %zmm0
 252 ; CHECK-NEXT:    retq
 253   %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 11)
 254   ret <16 x float> %res
 255 }
 256
 257 define <16 x float> @test_int_x86_avx512fp16_cfcmul_ph_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
 258 ; CHECK-LABEL: test_int_x86_avx512fp16_cfcmul_ph_512:
 259 ; CHECK:       ## %bb.0:
 260 ; CHECK-NEXT:    vfcmulcph %zmm1, %zmm2, %zmm0
 261 ; CHECK-NEXT:    retq
 262   %res = call <16 x float> @llvm.x86.avx512fp16.mask.vfcmul.cph.512(<16 x float> %x2, <16 x float> %x1, <16 x float> %x0, i16 -1, i32 4)
 263   ret <16 x float> %res
 264 }