test/CodeGen/X86/avx512dqvl-intrinsics.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
   4
   5 declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8)
   6
   7 define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
   8 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
   9 ; X86:       # %bb.0:
  10 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
  11 ; X86-NEXT:    vcvtpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
  12 ; X86-NEXT:    vcvtpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0]
  13 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
  14 ; X86-NEXT:    retl # encoding: [0xc3]
  15 ;
  16 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
  17 ; X64:       # %bb.0:
  18 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  19 ; X64-NEXT:    vcvtpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
  20 ; X64-NEXT:    vcvtpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0]
  21 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
  22 ; X64-NEXT:    retq # encoding: [0xc3]
  23   %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
  24   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
  25   %res2 = add <2 x i64> %res, %res1
  26   ret <2 x i64> %res2
  27 }
  28
  29 declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double>, <4 x i64>, i8)
  30
  31 define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
  32 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
  33 ; X86:       # %bb.0:
  34 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
  35 ; X86-NEXT:    vcvtpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
  36 ; X86-NEXT:    vcvtpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0]
  37 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
  38 ; X86-NEXT:    retl # encoding: [0xc3]
  39 ;
  40 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
  41 ; X64:       # %bb.0:
  42 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  43 ; X64-NEXT:    vcvtpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
  44 ; X64-NEXT:    vcvtpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0]
  45 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
  46 ; X64-NEXT:    retq # encoding: [0xc3]
  47   %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
  48   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
  49   %res2 = add <4 x i64> %res, %res1
  50   ret <4 x i64> %res2
  51 }
  52
  53 declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double>, <2 x i64>, i8)
  54
  55 define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
  56 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
  57 ; X86:       # %bb.0:
  58 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
  59 ; X86-NEXT:    vcvtpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
  60 ; X86-NEXT:    vcvtpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0]
  61 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
  62 ; X86-NEXT:    retl # encoding: [0xc3]
  63 ;
  64 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
  65 ; X64:       # %bb.0:
  66 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  67 ; X64-NEXT:    vcvtpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
  68 ; X64-NEXT:    vcvtpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0]
  69 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
  70 ; X64-NEXT:    retq # encoding: [0xc3]
  71   %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
  72   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
  73   %res2 = add <2 x i64> %res, %res1
  74   ret <2 x i64> %res2
  75 }
  76
  77 declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double>, <4 x i64>, i8)
  78
  79 define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
  80 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
  81 ; X86:       # %bb.0:
  82 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
  83 ; X86-NEXT:    vcvtpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
  84 ; X86-NEXT:    vcvtpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0]
  85 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
  86 ; X86-NEXT:    retl # encoding: [0xc3]
  87 ;
  88 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
  89 ; X64:       # %bb.0:
  90 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  91 ; X64-NEXT:    vcvtpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
  92 ; X64-NEXT:    vcvtpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0]
  93 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
  94 ; X64-NEXT:    retq # encoding: [0xc3]
  95   %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
  96   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
  97   %res2 = add <4 x i64> %res, %res1
  98   ret <4 x i64> %res2
  99 }
 100
 101 declare <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float>, <2 x i64>, i8)
 102
 103 define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
 104 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
 105 ; X86:       # %bb.0:
 106 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 107 ; X86-NEXT:    vcvtps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
 108 ; X86-NEXT:    vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
 109 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 110 ; X86-NEXT:    retl # encoding: [0xc3]
 111 ;
 112 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
 113 ; X64:       # %bb.0:
 114 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 115 ; X64-NEXT:    vcvtps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
 116 ; X64-NEXT:    vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
 117 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 118 ; X64-NEXT:    retq # encoding: [0xc3]
 119   %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
 120   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
 121   %res2 = add <2 x i64> %res, %res1
 122   ret <2 x i64> %res2
 123 }
 124
 125 declare <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float>, <4 x i64>, i8)
 126
 127 define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
 128 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
 129 ; X86:       # %bb.0:
 130 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 131 ; X86-NEXT:    vcvtps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
 132 ; X86-NEXT:    vcvtps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0]
 133 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 134 ; X86-NEXT:    retl # encoding: [0xc3]
 135 ;
 136 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
 137 ; X64:       # %bb.0:
 138 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 139 ; X64-NEXT:    vcvtps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
 140 ; X64-NEXT:    vcvtps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0]
 141 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 142 ; X64-NEXT:    retq # encoding: [0xc3]
 143   %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
 144   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
 145   %res2 = add <4 x i64> %res, %res1
 146   ret <4 x i64> %res2
 147 }
 148
 149 declare <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float>, <2 x i64>, i8)
 150
 151 define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
 152 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
 153 ; X86:       # %bb.0:
 154 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 155 ; X86-NEXT:    vcvtps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
 156 ; X86-NEXT:    vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
 157 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 158 ; X86-NEXT:    retl # encoding: [0xc3]
 159 ;
 160 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
 161 ; X64:       # %bb.0:
 162 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 163 ; X64-NEXT:    vcvtps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
 164 ; X64-NEXT:    vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
 165 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 166 ; X64-NEXT:    retq # encoding: [0xc3]
 167   %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
 168   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
 169   %res2 = add <2 x i64> %res, %res1
 170   ret <2 x i64> %res2
 171 }
 172
 173 declare <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float>, <4 x i64>, i8)
 174
 175 define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
 176 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
 177 ; X86:       # %bb.0:
 178 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 179 ; X86-NEXT:    vcvtps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
 180 ; X86-NEXT:    vcvtps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0]
 181 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 182 ; X86-NEXT:    retl # encoding: [0xc3]
 183 ;
 184 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
 185 ; X64:       # %bb.0:
 186 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 187 ; X64-NEXT:    vcvtps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
 188 ; X64-NEXT:    vcvtps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0]
 189 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 190 ; X64-NEXT:    retq # encoding: [0xc3]
 191   %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
 192   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
 193   %res2 = add <4 x i64> %res, %res1
 194   ret <4 x i64> %res2
 195 }
 196
 197 declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64>, <4 x float>, i8)
 198
 199 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
 200 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
 201 ; X86:       # %bb.0:
 202 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 203 ; X86-NEXT:    vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
 204 ; X86-NEXT:    vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
 205 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 206 ; X86-NEXT:    retl # encoding: [0xc3]
 207 ;
 208 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
 209 ; X64:       # %bb.0:
 210 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 211 ; X64-NEXT:    vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
 212 ; X64-NEXT:    vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
 213 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 214 ; X64-NEXT:    retq # encoding: [0xc3]
 215   %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
 216   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
 217   %res2 = fadd <4 x float> %res, %res1
 218   ret <4 x float> %res2
 219 }
 220
 221 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
 222 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128_zext:
 223 ; X86:       # %bb.0:
 224 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 225 ; X86-NEXT:    vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
 226 ; X86-NEXT:    vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
 227 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 228 ; X86-NEXT:    retl # encoding: [0xc3]
 229 ;
 230 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128_zext:
 231 ; X64:       # %bb.0:
 232 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 233 ; X64-NEXT:    vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
 234 ; X64-NEXT:    vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
 235 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 236 ; X64-NEXT:    retq # encoding: [0xc3]
 237   %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
 238   %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 239   %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
 240   %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 241   %res4 = fadd <4 x float> %res1, %res3
 242   ret <4 x float> %res4
 243 }
 244
 245 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
 246 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
 247 ; X86:       # %bb.0:
 248 ; X86-NEXT:    vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
 249 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 250 ; X86-NEXT:    vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
 251 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
 252 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 253 ; X86-NEXT:    retl # encoding: [0xc3]
 254 ;
 255 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
 256 ; X64:       # %bb.0:
 257 ; X64-NEXT:    vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
 258 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 259 ; X64-NEXT:    vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
 260 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
 261 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 262 ; X64-NEXT:    retq # encoding: [0xc3]
 263   %cvt1 = sitofp <4 x i64> %x0 to <4 x float>
 264   %1 = bitcast i8 %x2 to <8 x i1>
 265   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 266   %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1
 267   %cvt = sitofp <4 x i64> %x0 to <4 x float>
 268   %res2 = fadd <4 x float> %2, %cvt
 269   ret <4 x float> %res2
 270 }
 271
 272 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8)
 273
 274 define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
 275 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
 276 ; X86:       # %bb.0:
 277 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 278 ; X86-NEXT:    vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
 279 ; X86-NEXT:    vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
 280 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 281 ; X86-NEXT:    retl # encoding: [0xc3]
 282 ;
 283 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
 284 ; X64:       # %bb.0:
 285 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 286 ; X64-NEXT:    vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
 287 ; X64-NEXT:    vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
 288 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 289 ; X64-NEXT:    retq # encoding: [0xc3]
 290   %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
 291   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
 292   %res2 = add <2 x i64> %res, %res1
 293   ret <2 x i64> %res2
 294 }
 295
 296 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8)
 297
 298 define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
 299 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
 300 ; X86:       # %bb.0:
 301 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 302 ; X86-NEXT:    vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
 303 ; X86-NEXT:    vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
 304 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 305 ; X86-NEXT:    retl # encoding: [0xc3]
 306 ;
 307 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
 308 ; X64:       # %bb.0:
 309 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 310 ; X64-NEXT:    vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
 311 ; X64-NEXT:    vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
 312 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 313 ; X64-NEXT:    retq # encoding: [0xc3]
 314   %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
 315   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
 316   %res2 = add <4 x i64> %res, %res1
 317   ret <4 x i64> %res2
 318 }
 319
 320 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8)
 321
 322 define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
 323 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
 324 ; X86:       # %bb.0:
 325 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 326 ; X86-NEXT:    vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
 327 ; X86-NEXT:    vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
 328 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 329 ; X86-NEXT:    retl # encoding: [0xc3]
 330 ;
 331 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
 332 ; X64:       # %bb.0:
 333 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 334 ; X64-NEXT:    vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
 335 ; X64-NEXT:    vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
 336 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 337 ; X64-NEXT:    retq # encoding: [0xc3]
 338   %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
 339   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
 340   %res2 = add <2 x i64> %res, %res1
 341   ret <2 x i64> %res2
 342 }
 343
 344 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8)
 345
 346 define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
 347 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
 348 ; X86:       # %bb.0:
 349 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 350 ; X86-NEXT:    vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
 351 ; X86-NEXT:    vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
 352 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 353 ; X86-NEXT:    retl # encoding: [0xc3]
 354 ;
 355 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
 356 ; X64:       # %bb.0:
 357 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 358 ; X64-NEXT:    vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
 359 ; X64-NEXT:    vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
 360 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 361 ; X64-NEXT:    retq # encoding: [0xc3]
 362   %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
 363   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
 364   %res2 = add <4 x i64> %res, %res1
 365   ret <4 x i64> %res2
 366 }
 367
 368 declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8)
 369
 370 define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
 371 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
 372 ; X86:       # %bb.0:
 373 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 374 ; X86-NEXT:    vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
 375 ; X86-NEXT:    vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
 376 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 377 ; X86-NEXT:    retl # encoding: [0xc3]
 378 ;
 379 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
 380 ; X64:       # %bb.0:
 381 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 382 ; X64-NEXT:    vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
 383 ; X64-NEXT:    vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
 384 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 385 ; X64-NEXT:    retq # encoding: [0xc3]
 386   %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
 387   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
 388   %res2 = add <2 x i64> %res, %res1
 389   ret <2 x i64> %res2
 390 }
 391
 392 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8)
 393
 394 define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
 395 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
 396 ; X86:       # %bb.0:
 397 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 398 ; X86-NEXT:    vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
 399 ; X86-NEXT:    vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
 400 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 401 ; X86-NEXT:    retl # encoding: [0xc3]
 402 ;
 403 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
 404 ; X64:       # %bb.0:
 405 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 406 ; X64-NEXT:    vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
 407 ; X64-NEXT:    vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
 408 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 409 ; X64-NEXT:    retq # encoding: [0xc3]
 410   %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
 411   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
 412   %res2 = add <4 x i64> %res, %res1
 413   ret <4 x i64> %res2
 414 }
 415
 416 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64>, <4 x float>, i8)
 417
 418 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
 419 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
 420 ; X86:       # %bb.0:
 421 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 422 ; X86-NEXT:    vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
 423 ; X86-NEXT:    vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
 424 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 425 ; X86-NEXT:    retl # encoding: [0xc3]
 426 ;
 427 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
 428 ; X64:       # %bb.0:
 429 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 430 ; X64-NEXT:    vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
 431 ; X64-NEXT:    vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
 432 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 433 ; X64-NEXT:    retq # encoding: [0xc3]
 434   %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
 435   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
 436   %res2 = fadd <4 x float> %res, %res1
 437   ret <4 x float> %res2
 438 }
 439
 440 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
 441 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128_zext:
 442 ; X86:       # %bb.0:
 443 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 444 ; X86-NEXT:    vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
 445 ; X86-NEXT:    vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
 446 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 447 ; X86-NEXT:    retl # encoding: [0xc3]
 448 ;
 449 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128_zext:
 450 ; X64:       # %bb.0:
 451 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 452 ; X64-NEXT:    vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
 453 ; X64-NEXT:    vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
 454 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 455 ; X64-NEXT:    retq # encoding: [0xc3]
 456   %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
 457   %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 458   %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
 459   %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 460   %res4 = fadd <4 x float> %res1, %res3
 461   ret <4 x float> %res4
 462 }
 463
 464 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
 465
 466 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
 467 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
 468 ; X86:       # %bb.0:
 469 ; X86-NEXT:    vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
 470 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 471 ; X86-NEXT:    vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
 472 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
 473 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 474 ; X86-NEXT:    retl # encoding: [0xc3]
 475 ;
 476 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
 477 ; X64:       # %bb.0:
 478 ; X64-NEXT:    vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
 479 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 480 ; X64-NEXT:    vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
 481 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
 482 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 483 ; X64-NEXT:    retq # encoding: [0xc3]
 484   %cvt1 = uitofp <4 x i64> %x0 to <4 x float>
 485   %1 = bitcast i8 %x2 to <8 x i1>
 486   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 487   %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1
 488   %cvt = uitofp <4 x i64> %x0 to <4 x float>
 489   %res2 = fadd <4 x float> %2, %cvt
 490   ret <4 x float> %res2
 491 }
 492
 493 declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i8)
 494
 495 define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
 496 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
 497 ; X86:       # %bb.0:
 498 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 499 ; X86-NEXT:    vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
 500 ; X86-NEXT:    vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
 501 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 502 ; X86-NEXT:    retl # encoding: [0xc3]
 503 ;
 504 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
 505 ; X64:       # %bb.0:
 506 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 507 ; X64-NEXT:    vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
 508 ; X64-NEXT:    vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
 509 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 510 ; X64-NEXT:    retq # encoding: [0xc3]
 511   %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
 512   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
 513   %res2 = add <2 x i64> %res, %res1
 514   ret <2 x i64> %res2
 515 }
 516
 517 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8)
 518
 519 define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
 520 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
 521 ; X86:       # %bb.0:
 522 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 523 ; X86-NEXT:    vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
 524 ; X86-NEXT:    vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
 525 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 526 ; X86-NEXT:    retl # encoding: [0xc3]
 527 ;
 528 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
 529 ; X64:       # %bb.0:
 530 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 531 ; X64-NEXT:    vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
 532 ; X64-NEXT:    vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
 533 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 534 ; X64-NEXT:    retq # encoding: [0xc3]
 535   %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
 536   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
 537   %res2 = add <4 x i64> %res, %res1
 538   ret <4 x i64> %res2
 539 }
 540
 541 declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8)
 542
 543 define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
 544 ; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
 545 ; X86:       # %bb.0:
 546 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 547 ; X86-NEXT:    vreducepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
 548 ; X86-NEXT:    vreducepd $8, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
 549 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
 550 ; X86-NEXT:    retl # encoding: [0xc3]
 551 ;
 552 ; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
 553 ; X64:       # %bb.0:
 554 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 555 ; X64-NEXT:    vreducepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
 556 ; X64-NEXT:    vreducepd $8, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
 557 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
 558 ; X64-NEXT:    retq # encoding: [0xc3]
 559   %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
 560   %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1)
 561   %res2 = fadd <2 x double> %res, %res1
 562   ret <2 x double> %res2
 563 }
 564
 565 declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8)
 566
 567 define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
 568 ; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
 569 ; X86:       # %bb.0:
 570 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 571 ; X86-NEXT:    vreducepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
 572 ; X86-NEXT:    vreducepd $0, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
 573 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
 574 ; X86-NEXT:    retl # encoding: [0xc3]
 575 ;
 576 ; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
 577 ; X64:       # %bb.0:
 578 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 579 ; X64-NEXT:    vreducepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
 580 ; X64-NEXT:    vreducepd $0, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
 581 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
 582 ; X64-NEXT:    retq # encoding: [0xc3]
 583   %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
 584   %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1)
 585   %res2 = fadd <4 x double> %res, %res1
 586   ret <4 x double> %res2
 587 }
 588
 589 declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8)
 590
 591 define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
 592 ; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
 593 ; X86:       # %bb.0:
 594 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 595 ; X86-NEXT:    vreduceps $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
 596 ; X86-NEXT:    vreduceps $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
 597 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 598 ; X86-NEXT:    retl # encoding: [0xc3]
 599 ;
 600 ; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
 601 ; X64:       # %bb.0:
 602 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 603 ; X64-NEXT:    vreduceps $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
 604 ; X64-NEXT:    vreduceps $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
 605 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 606 ; X64-NEXT:    retq # encoding: [0xc3]
 607   %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3)
 608   %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1)
 609   %res2 = fadd <4 x float> %res, %res1
 610   ret <4 x float> %res2
 611 }
 612
 613 declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8)
 614
 615 define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
 616 ; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
 617 ; X86:       # %bb.0:
 618 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 619 ; X86-NEXT:    vreduceps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
 620 ; X86-NEXT:    vreduceps $11, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0b]
 621 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
 622 ; X86-NEXT:    retl # encoding: [0xc3]
 623 ;
 624 ; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
 625 ; X64:       # %bb.0:
 626 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 627 ; X64-NEXT:    vreduceps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
 628 ; X64-NEXT:    vreduceps $11, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0b]
 629 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
 630 ; X64-NEXT:    retq # encoding: [0xc3]
 631   %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
 632   %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
 633   %res2 = fadd <8 x float> %res, %res1
 634   ret <8 x float> %res2
 635 }
 636
 637 declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
 638
 639 define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
 640 ; X86-LABEL: test_int_x86_avx512_mask_range_pd_128:
 641 ; X86:       # %bb.0:
 642 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 643 ; X86-NEXT:    vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
 644 ; X86-NEXT:    vrangepd $8, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
 645 ; X86-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
 646 ; X86-NEXT:    retl # encoding: [0xc3]
 647 ;
 648 ; X64-LABEL: test_int_x86_avx512_mask_range_pd_128:
 649 ; X64:       # %bb.0:
 650 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 651 ; X64-NEXT:    vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
 652 ; X64-NEXT:    vrangepd $8, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
 653 ; X64-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
 654 ; X64-NEXT:    retq # encoding: [0xc3]
 655   %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4)
 656   %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1)
 657   %res2 = fadd <2 x double> %res, %res1
 658   ret <2 x double> %res2
 659 }
 660
 661 declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
 662
 663 define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
 664 ; X86-LABEL: test_int_x86_avx512_mask_range_pd_256:
 665 ; X86:       # %bb.0:
 666 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 667 ; X86-NEXT:    vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
 668 ; X86-NEXT:    vrangepd $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
 669 ; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
 670 ; X86-NEXT:    retl # encoding: [0xc3]
 671 ;
 672 ; X64-LABEL: test_int_x86_avx512_mask_range_pd_256:
 673 ; X64:       # %bb.0:
 674 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 675 ; X64-NEXT:    vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
 676 ; X64-NEXT:    vrangepd $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
 677 ; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
 678 ; X64-NEXT:    retq # encoding: [0xc3]
 679   %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4)
 680   %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1)
 681   %res2 = fadd <4 x double> %res, %res1
 682   ret <4 x double> %res2
 683 }
 684
 685 declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
 686
 687 define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
 688 ; X86-LABEL: test_int_x86_avx512_mask_range_ps_128:
 689 ; X86:       # %bb.0:
 690 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 691 ; X86-NEXT:    vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
 692 ; X86-NEXT:    vrangeps $88, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
 693 ; X86-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
 694 ; X86-NEXT:    retl # encoding: [0xc3]
 695 ;
 696 ; X64-LABEL: test_int_x86_avx512_mask_range_ps_128:
 697 ; X64:       # %bb.0:
 698 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 699 ; X64-NEXT:    vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
 700 ; X64-NEXT:    vrangeps $88, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
 701 ; X64-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
 702 ; X64-NEXT:    retq # encoding: [0xc3]
 703   %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4)
 704   %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1)
 705   %res2 = fadd <4 x float> %res, %res1
 706   ret <4 x float> %res2
 707 }
 708
 709 declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
 710
 711 define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
 712 ; X86-LABEL: test_int_x86_avx512_mask_range_ps_256:
 713 ; X86:       # %bb.0:
 714 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 715 ; X86-NEXT:    vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
 716 ; X86-NEXT:    vrangeps $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
 717 ; X86-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
 718 ; X86-NEXT:    retl # encoding: [0xc3]
 719 ;
 720 ; X64-LABEL: test_int_x86_avx512_mask_range_ps_256:
 721 ; X64:       # %bb.0:
 722 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 723 ; X64-NEXT:    vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
 724 ; X64-NEXT:    vrangeps $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
 725 ; X64-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
 726 ; X64-NEXT:    retq # encoding: [0xc3]
 727   %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4)
 728   %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1)
 729   %res2 = fadd <8 x float> %res, %res1
 730   ret <8 x float> %res2
 731 }
 732
 733 declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
 734
 735 define i8 @test_int_x86_avx512_fpclass_ps_128(<4 x float> %x0) {
 736 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_128:
 737 ; CHECK:       # %bb.0:
 738 ; CHECK-NEXT:    vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
 739 ; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
 740 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 741 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 742 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 743   %res = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 2)
 744   %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 4)
 745   %1 = and <4 x i1> %res1, %res
 746   %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 747   %3 = bitcast <8 x i1> %2 to i8
 748   ret i8 %3
 749 }
 750
 751 declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
 752
 753 define i8 @test_int_x86_avx512_fpclass_ps_256(<8 x float> %x0) {
 754 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_256:
 755 ; CHECK:       # %bb.0:
 756 ; CHECK-NEXT:    vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
 757 ; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
 758 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 759 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 760 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 761 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 762   %res = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 2)
 763   %res1 = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 4)
 764   %1 = and <8 x i1> %res1, %res
 765   %2 = bitcast <8 x i1> %1 to i8
 766   ret i8 %2
 767 }
 768
 769 declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
 770
 771 define i8 @test_int_x86_avx512_fpclass_pd_128(<2 x double> %x0) {
 772 ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_128:
 773 ; CHECK:       # %bb.0:
 774 ; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
 775 ; CHECK-NEXT:    vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
 776 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 777 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 778 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 779   %res = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 4)
 780   %res1 = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 2)
 781   %1 = and <2 x i1> %res1, %res
 782   %2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
 783   %3 = bitcast <8 x i1> %2 to i8
 784   ret i8 %3
 785 }
 786
 787 declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
 788
 789 define i8 @test_int_x86_avx512_fpclass_pd_256(<4 x double> %x0) {
 790 ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_256:
 791 ; CHECK:       # %bb.0:
 792 ; CHECK-NEXT:    vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
 793 ; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
 794 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 795 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 796 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 797 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 798   %res = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 2)
 799   %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 4)
 800   %1 = and <4 x i1> %res1, %res
 801   %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 802   %3 = bitcast <8 x i1> %2 to i8
 803   ret i8 %3
 804 }