test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
   4
   5 define <4 x float> @test_mask_andnot_ps_rr_128(<4 x float> %a, <4 x float> %b) {
   6 ; CHECK-LABEL: test_mask_andnot_ps_rr_128:
   7 ; CHECK:       # %bb.0:
   8 ; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1]
   9 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
  10   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
  11   ret <4 x float> %res
  12 }
  13
  14 define <4 x float> @test_mask_andnot_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
  15 ; X86-LABEL: test_mask_andnot_ps_rrk_128:
  16 ; X86:       # %bb.0:
  17 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
  18 ; X86-NEXT:    vandnps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1]
  19 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
  20 ; X86-NEXT:    retl # encoding: [0xc3]
  21 ;
  22 ; X64-LABEL: test_mask_andnot_ps_rrk_128:
  23 ; X64:       # %bb.0:
  24 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  25 ; X64-NEXT:    vandnps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1]
  26 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
  27 ; X64-NEXT:    retq # encoding: [0xc3]
  28   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
  29   ret <4 x float> %res
  30 }
  31
  32 define <4 x float> @test_mask_andnot_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
  33 ; X86-LABEL: test_mask_andnot_ps_rrkz_128:
  34 ; X86:       # %bb.0:
  35 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
  36 ; X86-NEXT:    vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1]
  37 ; X86-NEXT:    retl # encoding: [0xc3]
  38 ;
  39 ; X64-LABEL: test_mask_andnot_ps_rrkz_128:
  40 ; X64:       # %bb.0:
  41 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  42 ; X64-NEXT:    vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1]
  43 ; X64-NEXT:    retq # encoding: [0xc3]
  44   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
  45   ret <4 x float> %res
  46 }
  47
  48 define <4 x float> @test_mask_andnot_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
  49 ; X86-LABEL: test_mask_andnot_ps_rm_128:
  50 ; X86:       # %bb.0:
  51 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  52 ; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0x00]
  53 ; X86-NEXT:    retl # encoding: [0xc3]
  54 ;
  55 ; X64-LABEL: test_mask_andnot_ps_rm_128:
  56 ; X64:       # %bb.0:
  57 ; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0x07]
  58 ; X64-NEXT:    retq # encoding: [0xc3]
  59   %b = load <4 x float>, <4 x float>* %ptr_b
  60   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
  61   ret <4 x float> %res
  62 }
  63
  64 define <4 x float> @test_mask_andnot_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
  65 ; X86-LABEL: test_mask_andnot_ps_rmk_128:
  66 ; X86:       # %bb.0:
  67 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  68 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
  69 ; X86-NEXT:    vandnps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0x08]
  70 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
  71 ; X86-NEXT:    retl # encoding: [0xc3]
  72 ;
  73 ; X64-LABEL: test_mask_andnot_ps_rmk_128:
  74 ; X64:       # %bb.0:
  75 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
  76 ; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f]
  77 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
  78 ; X64-NEXT:    retq # encoding: [0xc3]
  79   %b = load <4 x float>, <4 x float>* %ptr_b
  80   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
  81   ret <4 x float> %res
  82 }
  83
  84 define <4 x float> @test_mask_andnot_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
  85 ; X86-LABEL: test_mask_andnot_ps_rmkz_128:
  86 ; X86:       # %bb.0:
  87 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  88 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
  89 ; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0x00]
  90 ; X86-NEXT:    retl # encoding: [0xc3]
  91 ;
  92 ; X64-LABEL: test_mask_andnot_ps_rmkz_128:
  93 ; X64:       # %bb.0:
  94 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
  95 ; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0x07]
  96 ; X64-NEXT:    retq # encoding: [0xc3]
  97   %b = load <4 x float>, <4 x float>* %ptr_b
  98   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
  99   ret <4 x float> %res
 100 }
 101
 102 define <4 x float> @test_mask_andnot_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
 103 ; X86-LABEL: test_mask_andnot_ps_rmb_128:
 104 ; X86:       # %bb.0:
 105 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 106 ; X86-NEXT:    vandnps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x55,0x00]
 107 ; X86-NEXT:    retl # encoding: [0xc3]
 108 ;
 109 ; X64-LABEL: test_mask_andnot_ps_rmb_128:
 110 ; X64:       # %bb.0:
 111 ; X64-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x55,0x07]
 112 ; X64-NEXT:    retq # encoding: [0xc3]
 113   %q = load float, float* %ptr_b
 114   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
 115   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
 116   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
 117   ret <4 x float> %res
 118 }
 119
 120 define <4 x float> @test_mask_andnot_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
 121 ; X86-LABEL: test_mask_andnot_ps_rmbk_128:
 122 ; X86:       # %bb.0:
 123 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 124 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 125 ; X86-NEXT:    vandnps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x55,0x08]
 126 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
 127 ; X86-NEXT:    retl # encoding: [0xc3]
 128 ;
 129 ; X64-LABEL: test_mask_andnot_ps_rmbk_128:
 130 ; X64:       # %bb.0:
 131 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 132 ; X64-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f]
 133 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
 134 ; X64-NEXT:    retq # encoding: [0xc3]
 135   %q = load float, float* %ptr_b
 136   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
 137   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
 138   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
 139   ret <4 x float> %res
 140 }
 141
 142 define <4 x float> @test_mask_andnot_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
 143 ; X86-LABEL: test_mask_andnot_ps_rmbkz_128:
 144 ; X86:       # %bb.0:
 145 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 146 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 147 ; X86-NEXT:    vandnps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x55,0x00]
 148 ; X86-NEXT:    retl # encoding: [0xc3]
 149 ;
 150 ; X64-LABEL: test_mask_andnot_ps_rmbkz_128:
 151 ; X64:       # %bb.0:
 152 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 153 ; X64-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x55,0x07]
 154 ; X64-NEXT:    retq # encoding: [0xc3]
 155   %q = load float, float* %ptr_b
 156   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
 157   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
 158   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
 159   ret <4 x float> %res
 160 }
 161
 162 declare <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
 163
 164 define <8 x float> @test_mask_andnot_ps_rr_256(<8 x float> %a, <8 x float> %b) {
 165 ; CHECK-LABEL: test_mask_andnot_ps_rr_256:
 166 ; CHECK:       # %bb.0:
 167 ; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0xc1]
 168 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 169   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
 170   ret <8 x float> %res
 171 }
 172
 173 define <8 x float> @test_mask_andnot_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
 174 ; X86-LABEL: test_mask_andnot_ps_rrk_256:
 175 ; X86:       # %bb.0:
 176 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 177 ; X86-NEXT:    vandnps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1]
 178 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 179 ; X86-NEXT:    retl # encoding: [0xc3]
 180 ;
 181 ; X64-LABEL: test_mask_andnot_ps_rrk_256:
 182 ; X64:       # %bb.0:
 183 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 184 ; X64-NEXT:    vandnps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1]
 185 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 186 ; X64-NEXT:    retq # encoding: [0xc3]
 187   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
 188   ret <8 x float> %res
 189 }
 190
 191 define <8 x float> @test_mask_andnot_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
 192 ; X86-LABEL: test_mask_andnot_ps_rrkz_256:
 193 ; X86:       # %bb.0:
 194 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 195 ; X86-NEXT:    vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1]
 196 ; X86-NEXT:    retl # encoding: [0xc3]
 197 ;
 198 ; X64-LABEL: test_mask_andnot_ps_rrkz_256:
 199 ; X64:       # %bb.0:
 200 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 201 ; X64-NEXT:    vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1]
 202 ; X64-NEXT:    retq # encoding: [0xc3]
 203   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
 204   ret <8 x float> %res
 205 }
 206
 207 define <8 x float> @test_mask_andnot_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
 208 ; X86-LABEL: test_mask_andnot_ps_rm_256:
 209 ; X86:       # %bb.0:
 210 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 211 ; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0x00]
 212 ; X86-NEXT:    retl # encoding: [0xc3]
 213 ;
 214 ; X64-LABEL: test_mask_andnot_ps_rm_256:
 215 ; X64:       # %bb.0:
 216 ; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0x07]
 217 ; X64-NEXT:    retq # encoding: [0xc3]
 218   %b = load <8 x float>, <8 x float>* %ptr_b
 219   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
 220   ret <8 x float> %res
 221 }
 222
 223 define <8 x float> @test_mask_andnot_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
 224 ; X86-LABEL: test_mask_andnot_ps_rmk_256:
 225 ; X86:       # %bb.0:
 226 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 227 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 228 ; X86-NEXT:    vandnps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0x08]
 229 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
 230 ; X86-NEXT:    retl # encoding: [0xc3]
 231 ;
 232 ; X64-LABEL: test_mask_andnot_ps_rmk_256:
 233 ; X64:       # %bb.0:
 234 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 235 ; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f]
 236 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
 237 ; X64-NEXT:    retq # encoding: [0xc3]
 238   %b = load <8 x float>, <8 x float>* %ptr_b
 239   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
 240   ret <8 x float> %res
 241 }
 242
 243 define <8 x float> @test_mask_andnot_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
 244 ; X86-LABEL: test_mask_andnot_ps_rmkz_256:
 245 ; X86:       # %bb.0:
 246 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 247 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 248 ; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x00]
 249 ; X86-NEXT:    retl # encoding: [0xc3]
 250 ;
 251 ; X64-LABEL: test_mask_andnot_ps_rmkz_256:
 252 ; X64:       # %bb.0:
 253 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 254 ; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x07]
 255 ; X64-NEXT:    retq # encoding: [0xc3]
 256   %b = load <8 x float>, <8 x float>* %ptr_b
 257   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
 258   ret <8 x float> %res
 259 }
 260
 261 define <8 x float> @test_mask_andnot_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
 262 ; X86-LABEL: test_mask_andnot_ps_rmb_256:
 263 ; X86:       # %bb.0:
 264 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 265 ; X86-NEXT:    vandnps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x55,0x00]
 266 ; X86-NEXT:    retl # encoding: [0xc3]
 267 ;
 268 ; X64-LABEL: test_mask_andnot_ps_rmb_256:
 269 ; X64:       # %bb.0:
 270 ; X64-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x55,0x07]
 271 ; X64-NEXT:    retq # encoding: [0xc3]
 272   %q = load float, float* %ptr_b
 273   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
 274   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
 275   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
 276   ret <8 x float> %res
 277 }
 278
 279 define <8 x float> @test_mask_andnot_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
 280 ; X86-LABEL: test_mask_andnot_ps_rmbk_256:
 281 ; X86:       # %bb.0:
 282 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 283 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 284 ; X86-NEXT:    vandnps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x55,0x08]
 285 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
 286 ; X86-NEXT:    retl # encoding: [0xc3]
 287 ;
 288 ; X64-LABEL: test_mask_andnot_ps_rmbk_256:
 289 ; X64:       # %bb.0:
 290 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 291 ; X64-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f]
 292 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
 293 ; X64-NEXT:    retq # encoding: [0xc3]
 294   %q = load float, float* %ptr_b
 295   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
 296   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
 297   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
 298   ret <8 x float> %res
 299 }
 300
 301 define <8 x float> @test_mask_andnot_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
 302 ; X86-LABEL: test_mask_andnot_ps_rmbkz_256:
 303 ; X86:       # %bb.0:
 304 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 305 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 306 ; X86-NEXT:    vandnps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x00]
 307 ; X86-NEXT:    retl # encoding: [0xc3]
 308 ;
 309 ; X64-LABEL: test_mask_andnot_ps_rmbkz_256:
 310 ; X64:       # %bb.0:
 311 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 312 ; X64-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x07]
 313 ; X64-NEXT:    retq # encoding: [0xc3]
 314   %q = load float, float* %ptr_b
 315   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
 316   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
 317   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
 318   ret <8 x float> %res
 319 }
 320
 321 declare <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
 322
 323 define <16 x float> @test_mask_andnot_ps_rr_512(<16 x float> %a, <16 x float> %b) {
 324 ; CHECK-LABEL: test_mask_andnot_ps_rr_512:
 325 ; CHECK:       # %bb.0:
 326 ; CHECK-NEXT:    vandnps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0xc1]
 327 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 328   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
 329   ret <16 x float> %res
 330 }
 331
 332 define <16 x float> @test_mask_andnot_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
 333 ; X86-LABEL: test_mask_andnot_ps_rrk_512:
 334 ; X86:       # %bb.0:
 335 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 336 ; X86-NEXT:    vandnps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1]
 337 ; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
 338 ; X86-NEXT:    retl # encoding: [0xc3]
 339 ;
 340 ; X64-LABEL: test_mask_andnot_ps_rrk_512:
 341 ; X64:       # %bb.0:
 342 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 343 ; X64-NEXT:    vandnps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1]
 344 ; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
 345 ; X64-NEXT:    retq # encoding: [0xc3]
 346   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
 347   ret <16 x float> %res
 348 }
 349
 350 define <16 x float> @test_mask_andnot_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
 351 ; X86-LABEL: test_mask_andnot_ps_rrkz_512:
 352 ; X86:       # %bb.0:
 353 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 354 ; X86-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1]
 355 ; X86-NEXT:    retl # encoding: [0xc3]
 356 ;
 357 ; X64-LABEL: test_mask_andnot_ps_rrkz_512:
 358 ; X64:       # %bb.0:
 359 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 360 ; X64-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1]
 361 ; X64-NEXT:    retq # encoding: [0xc3]
 362   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
 363   ret <16 x float> %res
 364 }
 365
 366 define <16 x float> @test_mask_andnot_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
 367 ; X86-LABEL: test_mask_andnot_ps_rm_512:
 368 ; X86:       # %bb.0:
 369 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 370 ; X86-NEXT:    vandnps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0x00]
 371 ; X86-NEXT:    retl # encoding: [0xc3]
 372 ;
 373 ; X64-LABEL: test_mask_andnot_ps_rm_512:
 374 ; X64:       # %bb.0:
 375 ; X64-NEXT:    vandnps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0x07]
 376 ; X64-NEXT:    retq # encoding: [0xc3]
 377   %b = load <16 x float>, <16 x float>* %ptr_b
 378   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
 379   ret <16 x float> %res
 380 }
 381
 382 define <16 x float> @test_mask_andnot_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
 383 ; X86-LABEL: test_mask_andnot_ps_rmk_512:
 384 ; X86:       # %bb.0:
 385 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 386 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 387 ; X86-NEXT:    vandnps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0x08]
 388 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
 389 ; X86-NEXT:    retl # encoding: [0xc3]
 390 ;
 391 ; X64-LABEL: test_mask_andnot_ps_rmk_512:
 392 ; X64:       # %bb.0:
 393 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 394 ; X64-NEXT:    vandnps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0x0f]
 395 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
 396 ; X64-NEXT:    retq # encoding: [0xc3]
 397   %b = load <16 x float>, <16 x float>* %ptr_b
 398   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
 399   ret <16 x float> %res
 400 }
 401
 402 define <16 x float> @test_mask_andnot_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
 403 ; X86-LABEL: test_mask_andnot_ps_rmkz_512:
 404 ; X86:       # %bb.0:
 405 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 406 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 407 ; X86-NEXT:    vandnps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x00]
 408 ; X86-NEXT:    retl # encoding: [0xc3]
 409 ;
 410 ; X64-LABEL: test_mask_andnot_ps_rmkz_512:
 411 ; X64:       # %bb.0:
 412 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 413 ; X64-NEXT:    vandnps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x07]
 414 ; X64-NEXT:    retq # encoding: [0xc3]
 415   %b = load <16 x float>, <16 x float>* %ptr_b
 416   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
 417   ret <16 x float> %res
 418 }
 419
 420 define <16 x float> @test_mask_andnot_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
 421 ; X86-LABEL: test_mask_andnot_ps_rmb_512:
 422 ; X86:       # %bb.0:
 423 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 424 ; X86-NEXT:    vandnps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x55,0x00]
 425 ; X86-NEXT:    retl # encoding: [0xc3]
 426 ;
 427 ; X64-LABEL: test_mask_andnot_ps_rmb_512:
 428 ; X64:       # %bb.0:
 429 ; X64-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x55,0x07]
 430 ; X64-NEXT:    retq # encoding: [0xc3]
 431   %q = load float, float* %ptr_b
 432   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
 433   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
 434   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
 435   ret <16 x float> %res
 436 }
 437
 438 define <16 x float> @test_mask_andnot_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
 439 ; X86-LABEL: test_mask_andnot_ps_rmbk_512:
 440 ; X86:       # %bb.0:
 441 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 442 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 443 ; X86-NEXT:    vandnps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x55,0x08]
 444 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
 445 ; X86-NEXT:    retl # encoding: [0xc3]
 446 ;
 447 ; X64-LABEL: test_mask_andnot_ps_rmbk_512:
 448 ; X64:       # %bb.0:
 449 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 450 ; X64-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x55,0x0f]
 451 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
 452 ; X64-NEXT:    retq # encoding: [0xc3]
 453   %q = load float, float* %ptr_b
 454   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
 455   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
 456   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
 457   ret <16 x float> %res
 458 }
 459
 460 define <16 x float> @test_mask_andnot_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
 461 ; X86-LABEL: test_mask_andnot_ps_rmbkz_512:
 462 ; X86:       # %bb.0:
 463 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 464 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 465 ; X86-NEXT:    vandnps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x00]
 466 ; X86-NEXT:    retl # encoding: [0xc3]
 467 ;
 468 ; X64-LABEL: test_mask_andnot_ps_rmbkz_512:
 469 ; X64:       # %bb.0:
 470 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 471 ; X64-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x07]
 472 ; X64-NEXT:    retq # encoding: [0xc3]
 473   %q = load float, float* %ptr_b
 474   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
 475   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
 476   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
 477   ret <16 x float> %res
 478 }
 479
 480 declare <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
 481
 482 define <4 x float> @test_mask_and_ps_rr_128(<4 x float> %a, <4 x float> %b) {
 483 ; CHECK-LABEL: test_mask_and_ps_rr_128:
 484 ; CHECK:       # %bb.0:
 485 ; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
 486 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 487   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
 488   ret <4 x float> %res
 489 }
 490
 491 define <4 x float> @test_mask_and_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
 492 ; X86-LABEL: test_mask_and_ps_rrk_128:
 493 ; X86:       # %bb.0:
 494 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 495 ; X86-NEXT:    vandps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1]
 496 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
 497 ; X86-NEXT:    retl # encoding: [0xc3]
 498 ;
 499 ; X64-LABEL: test_mask_and_ps_rrk_128:
 500 ; X64:       # %bb.0:
 501 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 502 ; X64-NEXT:    vandps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1]
 503 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
 504 ; X64-NEXT:    retq # encoding: [0xc3]
 505   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
 506   ret <4 x float> %res
 507 }
 508
 509 define <4 x float> @test_mask_and_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
 510 ; X86-LABEL: test_mask_and_ps_rrkz_128:
 511 ; X86:       # %bb.0:
 512 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 513 ; X86-NEXT:    vandps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1]
 514 ; X86-NEXT:    retl # encoding: [0xc3]
 515 ;
 516 ; X64-LABEL: test_mask_and_ps_rrkz_128:
 517 ; X64:       # %bb.0:
 518 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 519 ; X64-NEXT:    vandps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1]
 520 ; X64-NEXT:    retq # encoding: [0xc3]
 521   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
 522   ret <4 x float> %res
 523 }
 524
 525 define <4 x float> @test_mask_and_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
 526 ; X86-LABEL: test_mask_and_ps_rm_128:
 527 ; X86:       # %bb.0:
 528 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 529 ; X86-NEXT:    vandps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0x00]
 530 ; X86-NEXT:    retl # encoding: [0xc3]
 531 ;
 532 ; X64-LABEL: test_mask_and_ps_rm_128:
 533 ; X64:       # %bb.0:
 534 ; X64-NEXT:    vandps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0x07]
 535 ; X64-NEXT:    retq # encoding: [0xc3]
 536   %b = load <4 x float>, <4 x float>* %ptr_b
 537   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
 538   ret <4 x float> %res
 539 }
 540
 541 define <4 x float> @test_mask_and_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
 542 ; X86-LABEL: test_mask_and_ps_rmk_128:
 543 ; X86:       # %bb.0:
 544 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 545 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 546 ; X86-NEXT:    vandps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0x08]
 547 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
 548 ; X86-NEXT:    retl # encoding: [0xc3]
 549 ;
 550 ; X64-LABEL: test_mask_and_ps_rmk_128:
 551 ; X64:       # %bb.0:
 552 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 553 ; X64-NEXT:    vandps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f]
 554 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
 555 ; X64-NEXT:    retq # encoding: [0xc3]
 556   %b = load <4 x float>, <4 x float>* %ptr_b
 557   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
 558   ret <4 x float> %res
 559 }
 560
 561 define <4 x float> @test_mask_and_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
 562 ; X86-LABEL: test_mask_and_ps_rmkz_128:
 563 ; X86:       # %bb.0:
 564 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 565 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 566 ; X86-NEXT:    vandps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0x00]
 567 ; X86-NEXT:    retl # encoding: [0xc3]
 568 ;
 569 ; X64-LABEL: test_mask_and_ps_rmkz_128:
 570 ; X64:       # %bb.0:
 571 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 572 ; X64-NEXT:    vandps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0x07]
 573 ; X64-NEXT:    retq # encoding: [0xc3]
 574   %b = load <4 x float>, <4 x float>* %ptr_b
 575   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
 576   ret <4 x float> %res
 577 }
 578
 579 define <4 x float> @test_mask_and_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
 580 ; X86-LABEL: test_mask_and_ps_rmb_128:
 581 ; X86:       # %bb.0:
 582 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 583 ; X86-NEXT:    vandps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x54,0x00]
 584 ; X86-NEXT:    retl # encoding: [0xc3]
 585 ;
 586 ; X64-LABEL: test_mask_and_ps_rmb_128:
 587 ; X64:       # %bb.0:
 588 ; X64-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x54,0x07]
 589 ; X64-NEXT:    retq # encoding: [0xc3]
 590   %q = load float, float* %ptr_b
 591   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
 592   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
 593   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
 594   ret <4 x float> %res
 595 }
 596
 597 define <4 x float> @test_mask_and_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
 598 ; X86-LABEL: test_mask_and_ps_rmbk_128:
 599 ; X86:       # %bb.0:
 600 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 601 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 602 ; X86-NEXT:    vandps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x54,0x08]
 603 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
 604 ; X86-NEXT:    retl # encoding: [0xc3]
 605 ;
 606 ; X64-LABEL: test_mask_and_ps_rmbk_128:
 607 ; X64:       # %bb.0:
 608 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 609 ; X64-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f]
 610 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
 611 ; X64-NEXT:    retq # encoding: [0xc3]
 612   %q = load float, float* %ptr_b
 613   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
 614   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
 615   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
 616   ret <4 x float> %res
 617 }
 618
 619 define <4 x float> @test_mask_and_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
 620 ; X86-LABEL: test_mask_and_ps_rmbkz_128:
 621 ; X86:       # %bb.0:
 622 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 623 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 624 ; X86-NEXT:    vandps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x54,0x00]
 625 ; X86-NEXT:    retl # encoding: [0xc3]
 626 ;
 627 ; X64-LABEL: test_mask_and_ps_rmbkz_128:
 628 ; X64:       # %bb.0:
 629 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 630 ; X64-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x54,0x07]
 631 ; X64-NEXT:    retq # encoding: [0xc3]
 632   %q = load float, float* %ptr_b
 633   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
 634   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
 635   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
 636   ret <4 x float> %res
 637 }
 638
 639 declare <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
 640
 641 define <8 x float> @test_mask_and_ps_rr_256(<8 x float> %a, <8 x float> %b) {
 642 ; CHECK-LABEL: test_mask_and_ps_rr_256:
 643 ; CHECK:       # %bb.0:
 644 ; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0xc1]
 645 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 646   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
 647   ret <8 x float> %res
 648 }
 649
 650 define <8 x float> @test_mask_and_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
 651 ; X86-LABEL: test_mask_and_ps_rrk_256:
 652 ; X86:       # %bb.0:
 653 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 654 ; X86-NEXT:    vandps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1]
 655 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 656 ; X86-NEXT:    retl # encoding: [0xc3]
 657 ;
 658 ; X64-LABEL: test_mask_and_ps_rrk_256:
 659 ; X64:       # %bb.0:
 660 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 661 ; X64-NEXT:    vandps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1]
 662 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 663 ; X64-NEXT:    retq # encoding: [0xc3]
 664   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
 665   ret <8 x float> %res
 666 }
 667
 668 define <8 x float> @test_mask_and_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
 669 ; X86-LABEL: test_mask_and_ps_rrkz_256:
 670 ; X86:       # %bb.0:
 671 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 672 ; X86-NEXT:    vandps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1]
 673 ; X86-NEXT:    retl # encoding: [0xc3]
 674 ;
 675 ; X64-LABEL: test_mask_and_ps_rrkz_256:
 676 ; X64:       # %bb.0:
 677 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 678 ; X64-NEXT:    vandps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1]
 679 ; X64-NEXT:    retq # encoding: [0xc3]
 680   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
 681   ret <8 x float> %res
 682 }
 683
 684 define <8 x float> @test_mask_and_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
 685 ; X86-LABEL: test_mask_and_ps_rm_256:
 686 ; X86:       # %bb.0:
 687 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 688 ; X86-NEXT:    vandps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0x00]
 689 ; X86-NEXT:    retl # encoding: [0xc3]
 690 ;
 691 ; X64-LABEL: test_mask_and_ps_rm_256:
 692 ; X64:       # %bb.0:
 693 ; X64-NEXT:    vandps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0x07]
 694 ; X64-NEXT:    retq # encoding: [0xc3]
 695   %b = load <8 x float>, <8 x float>* %ptr_b
 696   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
 697   ret <8 x float> %res
 698 }
 699
 700 define <8 x float> @test_mask_and_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
 701 ; X86-LABEL: test_mask_and_ps_rmk_256:
 702 ; X86:       # %bb.0:
 703 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 704 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 705 ; X86-NEXT:    vandps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0x08]
 706 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
 707 ; X86-NEXT:    retl # encoding: [0xc3]
 708 ;
 709 ; X64-LABEL: test_mask_and_ps_rmk_256:
 710 ; X64:       # %bb.0:
 711 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 712 ; X64-NEXT:    vandps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f]
 713 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
 714 ; X64-NEXT:    retq # encoding: [0xc3]
 715   %b = load <8 x float>, <8 x float>* %ptr_b
 716   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
 717   ret <8 x float> %res
 718 }
 719
 720 define <8 x float> @test_mask_and_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
 721 ; X86-LABEL: test_mask_and_ps_rmkz_256:
 722 ; X86:       # %bb.0:
 723 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 724 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 725 ; X86-NEXT:    vandps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x00]
 726 ; X86-NEXT:    retl # encoding: [0xc3]
 727 ;
 728 ; X64-LABEL: test_mask_and_ps_rmkz_256:
 729 ; X64:       # %bb.0:
 730 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 731 ; X64-NEXT:    vandps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x07]
 732 ; X64-NEXT:    retq # encoding: [0xc3]
 733   %b = load <8 x float>, <8 x float>* %ptr_b
 734   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
 735   ret <8 x float> %res
 736 }
 737
 738 define <8 x float> @test_mask_and_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
 739 ; X86-LABEL: test_mask_and_ps_rmb_256:
 740 ; X86:       # %bb.0:
 741 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 742 ; X86-NEXT:    vandps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x54,0x00]
 743 ; X86-NEXT:    retl # encoding: [0xc3]
 744 ;
 745 ; X64-LABEL: test_mask_and_ps_rmb_256:
 746 ; X64:       # %bb.0:
 747 ; X64-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x54,0x07]
 748 ; X64-NEXT:    retq # encoding: [0xc3]
 749   %q = load float, float* %ptr_b
 750   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
 751   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
 752   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
 753   ret <8 x float> %res
 754 }
 755
 756 define <8 x float> @test_mask_and_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
 757 ; X86-LABEL: test_mask_and_ps_rmbk_256:
 758 ; X86:       # %bb.0:
 759 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 760 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 761 ; X86-NEXT:    vandps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x54,0x08]
 762 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
 763 ; X86-NEXT:    retl # encoding: [0xc3]
 764 ;
 765 ; X64-LABEL: test_mask_and_ps_rmbk_256:
 766 ; X64:       # %bb.0:
 767 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 768 ; X64-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f]
 769 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
 770 ; X64-NEXT:    retq # encoding: [0xc3]
 771   %q = load float, float* %ptr_b
 772   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
 773   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
 774   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
 775   ret <8 x float> %res
 776 }
 777
 778 define <8 x float> @test_mask_and_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
 779 ; X86-LABEL: test_mask_and_ps_rmbkz_256:
 780 ; X86:       # %bb.0:
 781 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 782 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
 783 ; X86-NEXT:    vandps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x00]
 784 ; X86-NEXT:    retl # encoding: [0xc3]
 785 ;
 786 ; X64-LABEL: test_mask_and_ps_rmbkz_256:
 787 ; X64:       # %bb.0:
 788 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 789 ; X64-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x07]
 790 ; X64-NEXT:    retq # encoding: [0xc3]
 791   %q = load float, float* %ptr_b
 792   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
 793   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
 794   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
 795   ret <8 x float> %res
 796 }
 797
 798 declare <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
 799
 800 define <16 x float> @test_mask_and_ps_rr_512(<16 x float> %a, <16 x float> %b) {
 801 ; CHECK-LABEL: test_mask_and_ps_rr_512:
 802 ; CHECK:       # %bb.0:
 803 ; CHECK-NEXT:    vandps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0xc1]
 804 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 805   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
 806   ret <16 x float> %res
 807 }
 808
 809 define <16 x float> @test_mask_and_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
 810 ; X86-LABEL: test_mask_and_ps_rrk_512:
 811 ; X86:       # %bb.0:
 812 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 813 ; X86-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1]
 814 ; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
 815 ; X86-NEXT:    retl # encoding: [0xc3]
 816 ;
 817 ; X64-LABEL: test_mask_and_ps_rrk_512:
 818 ; X64:       # %bb.0:
 819 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 820 ; X64-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1]
 821 ; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
 822 ; X64-NEXT:    retq # encoding: [0xc3]
 823   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
 824   ret <16 x float> %res
 825 }
 826
 827 define <16 x float> @test_mask_and_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
 828 ; X86-LABEL: test_mask_and_ps_rrkz_512:
 829 ; X86:       # %bb.0:
 830 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 831 ; X86-NEXT:    vandps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1]
 832 ; X86-NEXT:    retl # encoding: [0xc3]
 833 ;
 834 ; X64-LABEL: test_mask_and_ps_rrkz_512:
 835 ; X64:       # %bb.0:
 836 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 837 ; X64-NEXT:    vandps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1]
 838 ; X64-NEXT:    retq # encoding: [0xc3]
 839   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
 840   ret <16 x float> %res
 841 }
 842
 843 define <16 x float> @test_mask_and_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
 844 ; X86-LABEL: test_mask_and_ps_rm_512:
 845 ; X86:       # %bb.0:
 846 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 847 ; X86-NEXT:    vandps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0x00]
 848 ; X86-NEXT:    retl # encoding: [0xc3]
 849 ;
 850 ; X64-LABEL: test_mask_and_ps_rm_512:
 851 ; X64:       # %bb.0:
 852 ; X64-NEXT:    vandps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0x07]
 853 ; X64-NEXT:    retq # encoding: [0xc3]
 854   %b = load <16 x float>, <16 x float>* %ptr_b
 855   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
 856   ret <16 x float> %res
 857 }
 858
 859 define <16 x float> @test_mask_and_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
 860 ; X86-LABEL: test_mask_and_ps_rmk_512:
 861 ; X86:       # %bb.0:
 862 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 863 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 864 ; X86-NEXT:    vandps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0x08]
 865 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
 866 ; X86-NEXT:    retl # encoding: [0xc3]
 867 ;
 868 ; X64-LABEL: test_mask_and_ps_rmk_512:
 869 ; X64:       # %bb.0:
 870 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 871 ; X64-NEXT:    vandps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0x0f]
 872 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
 873 ; X64-NEXT:    retq # encoding: [0xc3]
 874   %b = load <16 x float>, <16 x float>* %ptr_b
 875   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
 876   ret <16 x float> %res
 877 }
 878
 879 define <16 x float> @test_mask_and_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
 880 ; X86-LABEL: test_mask_and_ps_rmkz_512:
 881 ; X86:       # %bb.0:
 882 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 883 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 884 ; X86-NEXT:    vandps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x00]
 885 ; X86-NEXT:    retl # encoding: [0xc3]
 886 ;
 887 ; X64-LABEL: test_mask_and_ps_rmkz_512:
 888 ; X64:       # %bb.0:
 889 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 890 ; X64-NEXT:    vandps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x07]
 891 ; X64-NEXT:    retq # encoding: [0xc3]
 892   %b = load <16 x float>, <16 x float>* %ptr_b
 893   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
 894   ret <16 x float> %res
 895 }
 896
 897 define <16 x float> @test_mask_and_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
 898 ; X86-LABEL: test_mask_and_ps_rmb_512:
 899 ; X86:       # %bb.0:
 900 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 901 ; X86-NEXT:    vandps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x54,0x00]
 902 ; X86-NEXT:    retl # encoding: [0xc3]
 903 ;
 904 ; X64-LABEL: test_mask_and_ps_rmb_512:
 905 ; X64:       # %bb.0:
 906 ; X64-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x54,0x07]
 907 ; X64-NEXT:    retq # encoding: [0xc3]
 908   %q = load float, float* %ptr_b
 909   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
 910   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
 911   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
 912   ret <16 x float> %res
 913 }
 914
 915 define <16 x float> @test_mask_and_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
 916 ; X86-LABEL: test_mask_and_ps_rmbk_512:
 917 ; X86:       # %bb.0:
 918 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 919 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 920 ; X86-NEXT:    vandps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x54,0x08]
 921 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
 922 ; X86-NEXT:    retl # encoding: [0xc3]
 923 ;
 924 ; X64-LABEL: test_mask_and_ps_rmbk_512:
 925 ; X64:       # %bb.0:
 926 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 927 ; X64-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x54,0x0f]
 928 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
 929 ; X64-NEXT:    retq # encoding: [0xc3]
 930   %q = load float, float* %ptr_b
 931   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
 932   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
 933   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
 934   ret <16 x float> %res
 935 }
 936
 937 define <16 x float> @test_mask_and_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
 938 ; X86-LABEL: test_mask_and_ps_rmbkz_512:
 939 ; X86:       # %bb.0:
 940 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 941 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 942 ; X86-NEXT:    vandps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x00]
 943 ; X86-NEXT:    retl # encoding: [0xc3]
 944 ;
 945 ; X64-LABEL: test_mask_and_ps_rmbkz_512:
 946 ; X64:       # %bb.0:
 947 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 948 ; X64-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x07]
 949 ; X64-NEXT:    retq # encoding: [0xc3]
 950   %q = load float, float* %ptr_b
 951   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
 952   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
 953   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
 954   ret <16 x float> %res
 955 }
 956
 957 declare <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
 958
 959 define <4 x float> @test_mask_or_ps_rr_128(<4 x float> %a, <4 x float> %b) {
 960 ; CHECK-LABEL: test_mask_or_ps_rr_128:
 961 ; CHECK:       # %bb.0:
 962 ; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
 963 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 964   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
 965   ret <4 x float> %res
 966 }
 967
 968 define <4 x float> @test_mask_or_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
 969 ; X86-LABEL: test_mask_or_ps_rrk_128:
 970 ; X86:       # %bb.0:
 971 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 972 ; X86-NEXT:    vorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1]
 973 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
 974 ; X86-NEXT:    retl # encoding: [0xc3]
 975 ;
 976 ; X64-LABEL: test_mask_or_ps_rrk_128:
 977 ; X64:       # %bb.0:
 978 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 979 ; X64-NEXT:    vorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1]
 980 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
 981 ; X64-NEXT:    retq # encoding: [0xc3]
 982   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
 983   ret <4 x float> %res
 984 }
 985
 986 define <4 x float> @test_mask_or_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
 987 ; X86-LABEL: test_mask_or_ps_rrkz_128:
 988 ; X86:       # %bb.0:
 989 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
 990 ; X86-NEXT:    vorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1]
 991 ; X86-NEXT:    retl # encoding: [0xc3]
 992 ;
 993 ; X64-LABEL: test_mask_or_ps_rrkz_128:
 994 ; X64:       # %bb.0:
 995 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 996 ; X64-NEXT:    vorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1]
 997 ; X64-NEXT:    retq # encoding: [0xc3]
 998   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
 999   ret <4 x float> %res
1000 }
1001
1002 define <4 x float> @test_mask_or_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
1003 ; X86-LABEL: test_mask_or_ps_rm_128:
1004 ; X86:       # %bb.0:
1005 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1006 ; X86-NEXT:    vorps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0x00]
1007 ; X86-NEXT:    retl # encoding: [0xc3]
1008 ;
1009 ; X64-LABEL: test_mask_or_ps_rm_128:
1010 ; X64:       # %bb.0:
1011 ; X64-NEXT:    vorps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0x07]
1012 ; X64-NEXT:    retq # encoding: [0xc3]
1013   %b = load <4 x float>, <4 x float>* %ptr_b
1014   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1015   ret <4 x float> %res
1016 }
1017
1018 define <4 x float> @test_mask_or_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
1019 ; X86-LABEL: test_mask_or_ps_rmk_128:
1020 ; X86:       # %bb.0:
1021 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1022 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1023 ; X86-NEXT:    vorps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0x08]
1024 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1025 ; X86-NEXT:    retl # encoding: [0xc3]
1026 ;
1027 ; X64-LABEL: test_mask_or_ps_rmk_128:
1028 ; X64:       # %bb.0:
1029 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1030 ; X64-NEXT:    vorps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f]
1031 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1032 ; X64-NEXT:    retq # encoding: [0xc3]
1033   %b = load <4 x float>, <4 x float>* %ptr_b
1034   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1035   ret <4 x float> %res
1036 }
1037
1038 define <4 x float> @test_mask_or_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
1039 ; X86-LABEL: test_mask_or_ps_rmkz_128:
1040 ; X86:       # %bb.0:
1041 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1042 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1043 ; X86-NEXT:    vorps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0x00]
1044 ; X86-NEXT:    retl # encoding: [0xc3]
1045 ;
1046 ; X64-LABEL: test_mask_or_ps_rmkz_128:
1047 ; X64:       # %bb.0:
1048 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1049 ; X64-NEXT:    vorps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0x07]
1050 ; X64-NEXT:    retq # encoding: [0xc3]
1051   %b = load <4 x float>, <4 x float>* %ptr_b
1052   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1053   ret <4 x float> %res
1054 }
1055
1056 define <4 x float> @test_mask_or_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
1057 ; X86-LABEL: test_mask_or_ps_rmb_128:
1058 ; X86:       # %bb.0:
1059 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1060 ; X86-NEXT:    vorps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x56,0x00]
1061 ; X86-NEXT:    retl # encoding: [0xc3]
1062 ;
1063 ; X64-LABEL: test_mask_or_ps_rmb_128:
1064 ; X64:       # %bb.0:
1065 ; X64-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x56,0x07]
1066 ; X64-NEXT:    retq # encoding: [0xc3]
1067   %q = load float, float* %ptr_b
1068   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1069   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1070   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1071   ret <4 x float> %res
1072 }
1073
1074 define <4 x float> @test_mask_or_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
1075 ; X86-LABEL: test_mask_or_ps_rmbk_128:
1076 ; X86:       # %bb.0:
1077 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1078 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1079 ; X86-NEXT:    vorps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x56,0x08]
1080 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1081 ; X86-NEXT:    retl # encoding: [0xc3]
1082 ;
1083 ; X64-LABEL: test_mask_or_ps_rmbk_128:
1084 ; X64:       # %bb.0:
1085 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1086 ; X64-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f]
1087 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1088 ; X64-NEXT:    retq # encoding: [0xc3]
1089   %q = load float, float* %ptr_b
1090   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1091   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1092   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1093   ret <4 x float> %res
1094 }
1095
1096 define <4 x float> @test_mask_or_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
1097 ; X86-LABEL: test_mask_or_ps_rmbkz_128:
1098 ; X86:       # %bb.0:
1099 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1100 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1101 ; X86-NEXT:    vorps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x56,0x00]
1102 ; X86-NEXT:    retl # encoding: [0xc3]
1103 ;
1104 ; X64-LABEL: test_mask_or_ps_rmbkz_128:
1105 ; X64:       # %bb.0:
1106 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1107 ; X64-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x56,0x07]
1108 ; X64-NEXT:    retq # encoding: [0xc3]
1109   %q = load float, float* %ptr_b
1110   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1111   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1112   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1113   ret <4 x float> %res
1114 }
1115
1116 declare <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1117
1118 define <8 x float> @test_mask_or_ps_rr_256(<8 x float> %a, <8 x float> %b) {
1119 ; CHECK-LABEL: test_mask_or_ps_rr_256:
1120 ; CHECK:       # %bb.0:
1121 ; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0xc1]
1122 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1123   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1124   ret <8 x float> %res
1125 }
1126
1127 define <8 x float> @test_mask_or_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
1128 ; X86-LABEL: test_mask_or_ps_rrk_256:
1129 ; X86:       # %bb.0:
1130 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1131 ; X86-NEXT:    vorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1]
1132 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1133 ; X86-NEXT:    retl # encoding: [0xc3]
1134 ;
1135 ; X64-LABEL: test_mask_or_ps_rrk_256:
1136 ; X64:       # %bb.0:
1137 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1138 ; X64-NEXT:    vorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1]
1139 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1140 ; X64-NEXT:    retq # encoding: [0xc3]
1141   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1142   ret <8 x float> %res
1143 }
1144
1145 define <8 x float> @test_mask_or_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
1146 ; X86-LABEL: test_mask_or_ps_rrkz_256:
1147 ; X86:       # %bb.0:
1148 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1149 ; X86-NEXT:    vorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1]
1150 ; X86-NEXT:    retl # encoding: [0xc3]
1151 ;
1152 ; X64-LABEL: test_mask_or_ps_rrkz_256:
1153 ; X64:       # %bb.0:
1154 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1155 ; X64-NEXT:    vorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1]
1156 ; X64-NEXT:    retq # encoding: [0xc3]
1157   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1158   ret <8 x float> %res
1159 }
1160
1161 define <8 x float> @test_mask_or_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
1162 ; X86-LABEL: test_mask_or_ps_rm_256:
1163 ; X86:       # %bb.0:
1164 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1165 ; X86-NEXT:    vorps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0x00]
1166 ; X86-NEXT:    retl # encoding: [0xc3]
1167 ;
1168 ; X64-LABEL: test_mask_or_ps_rm_256:
1169 ; X64:       # %bb.0:
1170 ; X64-NEXT:    vorps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0x07]
1171 ; X64-NEXT:    retq # encoding: [0xc3]
1172   %b = load <8 x float>, <8 x float>* %ptr_b
1173   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1174   ret <8 x float> %res
1175 }
1176
1177 define <8 x float> @test_mask_or_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
1178 ; X86-LABEL: test_mask_or_ps_rmk_256:
1179 ; X86:       # %bb.0:
1180 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1181 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1182 ; X86-NEXT:    vorps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0x08]
1183 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1184 ; X86-NEXT:    retl # encoding: [0xc3]
1185 ;
1186 ; X64-LABEL: test_mask_or_ps_rmk_256:
1187 ; X64:       # %bb.0:
1188 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1189 ; X64-NEXT:    vorps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f]
1190 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1191 ; X64-NEXT:    retq # encoding: [0xc3]
1192   %b = load <8 x float>, <8 x float>* %ptr_b
1193   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1194   ret <8 x float> %res
1195 }
1196
1197 define <8 x float> @test_mask_or_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
1198 ; X86-LABEL: test_mask_or_ps_rmkz_256:
1199 ; X86:       # %bb.0:
1200 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1201 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1202 ; X86-NEXT:    vorps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x00]
1203 ; X86-NEXT:    retl # encoding: [0xc3]
1204 ;
1205 ; X64-LABEL: test_mask_or_ps_rmkz_256:
1206 ; X64:       # %bb.0:
1207 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1208 ; X64-NEXT:    vorps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x07]
1209 ; X64-NEXT:    retq # encoding: [0xc3]
1210   %b = load <8 x float>, <8 x float>* %ptr_b
1211   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1212   ret <8 x float> %res
1213 }
1214
1215 define <8 x float> @test_mask_or_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
1216 ; X86-LABEL: test_mask_or_ps_rmb_256:
1217 ; X86:       # %bb.0:
1218 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1219 ; X86-NEXT:    vorps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x56,0x00]
1220 ; X86-NEXT:    retl # encoding: [0xc3]
1221 ;
1222 ; X64-LABEL: test_mask_or_ps_rmb_256:
1223 ; X64:       # %bb.0:
1224 ; X64-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x56,0x07]
1225 ; X64-NEXT:    retq # encoding: [0xc3]
1226   %q = load float, float* %ptr_b
1227   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1228   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1229   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1230   ret <8 x float> %res
1231 }
1232
1233 define <8 x float> @test_mask_or_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
1234 ; X86-LABEL: test_mask_or_ps_rmbk_256:
1235 ; X86:       # %bb.0:
1236 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1237 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1238 ; X86-NEXT:    vorps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x56,0x08]
1239 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1240 ; X86-NEXT:    retl # encoding: [0xc3]
1241 ;
1242 ; X64-LABEL: test_mask_or_ps_rmbk_256:
1243 ; X64:       # %bb.0:
1244 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1245 ; X64-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f]
1246 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1247 ; X64-NEXT:    retq # encoding: [0xc3]
1248   %q = load float, float* %ptr_b
1249   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1250   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1251   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1252   ret <8 x float> %res
1253 }
1254
1255 define <8 x float> @test_mask_or_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
1256 ; X86-LABEL: test_mask_or_ps_rmbkz_256:
1257 ; X86:       # %bb.0:
1258 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1259 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1260 ; X86-NEXT:    vorps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x00]
1261 ; X86-NEXT:    retl # encoding: [0xc3]
1262 ;
1263 ; X64-LABEL: test_mask_or_ps_rmbkz_256:
1264 ; X64:       # %bb.0:
1265 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1266 ; X64-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x07]
1267 ; X64-NEXT:    retq # encoding: [0xc3]
1268   %q = load float, float* %ptr_b
1269   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1270   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1271   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1272   ret <8 x float> %res
1273 }
1274
1275 declare <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1276
1277 define <16 x float> @test_mask_or_ps_rr_512(<16 x float> %a, <16 x float> %b) {
1278 ; CHECK-LABEL: test_mask_or_ps_rr_512:
1279 ; CHECK:       # %bb.0:
1280 ; CHECK-NEXT:    vorps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0xc1]
1281 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1282   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1283   ret <16 x float> %res
1284 }
1285
1286 define <16 x float> @test_mask_or_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
1287 ; X86-LABEL: test_mask_or_ps_rrk_512:
1288 ; X86:       # %bb.0:
1289 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1290 ; X86-NEXT:    vorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1]
1291 ; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1292 ; X86-NEXT:    retl # encoding: [0xc3]
1293 ;
1294 ; X64-LABEL: test_mask_or_ps_rrk_512:
1295 ; X64:       # %bb.0:
1296 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1297 ; X64-NEXT:    vorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1]
1298 ; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1299 ; X64-NEXT:    retq # encoding: [0xc3]
1300   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1301   ret <16 x float> %res
1302 }
1303
1304 define <16 x float> @test_mask_or_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
1305 ; X86-LABEL: test_mask_or_ps_rrkz_512:
1306 ; X86:       # %bb.0:
1307 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1308 ; X86-NEXT:    vorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1]
1309 ; X86-NEXT:    retl # encoding: [0xc3]
1310 ;
1311 ; X64-LABEL: test_mask_or_ps_rrkz_512:
1312 ; X64:       # %bb.0:
1313 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1314 ; X64-NEXT:    vorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1]
1315 ; X64-NEXT:    retq # encoding: [0xc3]
1316   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1317   ret <16 x float> %res
1318 }
1319
1320 define <16 x float> @test_mask_or_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
1321 ; X86-LABEL: test_mask_or_ps_rm_512:
1322 ; X86:       # %bb.0:
1323 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1324 ; X86-NEXT:    vorps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0x00]
1325 ; X86-NEXT:    retl # encoding: [0xc3]
1326 ;
1327 ; X64-LABEL: test_mask_or_ps_rm_512:
1328 ; X64:       # %bb.0:
1329 ; X64-NEXT:    vorps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0x07]
1330 ; X64-NEXT:    retq # encoding: [0xc3]
1331   %b = load <16 x float>, <16 x float>* %ptr_b
1332   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1333   ret <16 x float> %res
1334 }
1335
1336 define <16 x float> @test_mask_or_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
1337 ; X86-LABEL: test_mask_or_ps_rmk_512:
1338 ; X86:       # %bb.0:
1339 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1340 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1341 ; X86-NEXT:    vorps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0x08]
1342 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1343 ; X86-NEXT:    retl # encoding: [0xc3]
1344 ;
1345 ; X64-LABEL: test_mask_or_ps_rmk_512:
1346 ; X64:       # %bb.0:
1347 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1348 ; X64-NEXT:    vorps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0x0f]
1349 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1350 ; X64-NEXT:    retq # encoding: [0xc3]
1351   %b = load <16 x float>, <16 x float>* %ptr_b
1352   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1353   ret <16 x float> %res
1354 }
1355
1356 define <16 x float> @test_mask_or_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
1357 ; X86-LABEL: test_mask_or_ps_rmkz_512:
1358 ; X86:       # %bb.0:
1359 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1360 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1361 ; X86-NEXT:    vorps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x00]
1362 ; X86-NEXT:    retl # encoding: [0xc3]
1363 ;
1364 ; X64-LABEL: test_mask_or_ps_rmkz_512:
1365 ; X64:       # %bb.0:
1366 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1367 ; X64-NEXT:    vorps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x07]
1368 ; X64-NEXT:    retq # encoding: [0xc3]
1369   %b = load <16 x float>, <16 x float>* %ptr_b
1370   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1371   ret <16 x float> %res
1372 }
1373
1374 define <16 x float> @test_mask_or_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
1375 ; X86-LABEL: test_mask_or_ps_rmb_512:
1376 ; X86:       # %bb.0:
1377 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1378 ; X86-NEXT:    vorps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x56,0x00]
1379 ; X86-NEXT:    retl # encoding: [0xc3]
1380 ;
1381 ; X64-LABEL: test_mask_or_ps_rmb_512:
1382 ; X64:       # %bb.0:
1383 ; X64-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x56,0x07]
1384 ; X64-NEXT:    retq # encoding: [0xc3]
1385   %q = load float, float* %ptr_b
1386   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1387   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1388   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1389   ret <16 x float> %res
1390 }
1391
1392 define <16 x float> @test_mask_or_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
1393 ; X86-LABEL: test_mask_or_ps_rmbk_512:
1394 ; X86:       # %bb.0:
1395 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1396 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1397 ; X86-NEXT:    vorps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x56,0x08]
1398 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1399 ; X86-NEXT:    retl # encoding: [0xc3]
1400 ;
1401 ; X64-LABEL: test_mask_or_ps_rmbk_512:
1402 ; X64:       # %bb.0:
1403 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1404 ; X64-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x56,0x0f]
1405 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1406 ; X64-NEXT:    retq # encoding: [0xc3]
1407   %q = load float, float* %ptr_b
1408   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1409   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1410   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1411   ret <16 x float> %res
1412 }
1413
1414 define <16 x float> @test_mask_or_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
1415 ; X86-LABEL: test_mask_or_ps_rmbkz_512:
1416 ; X86:       # %bb.0:
1417 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1418 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1419 ; X86-NEXT:    vorps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x00]
1420 ; X86-NEXT:    retl # encoding: [0xc3]
1421 ;
1422 ; X64-LABEL: test_mask_or_ps_rmbkz_512:
1423 ; X64:       # %bb.0:
1424 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1425 ; X64-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x07]
1426 ; X64-NEXT:    retq # encoding: [0xc3]
1427   %q = load float, float* %ptr_b
1428   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1429   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1430   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1431   ret <16 x float> %res
1432 }
1433
1434 declare <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1435
1436 define <4 x float> @test_mask_xor_ps_rr_128(<4 x float> %a, <4 x float> %b) {
1437 ; CHECK-LABEL: test_mask_xor_ps_rr_128:
1438 ; CHECK:       # %bb.0:
1439 ; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
1440 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1441   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1442   ret <4 x float> %res
1443 }
1444
1445 define <4 x float> @test_mask_xor_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
1446 ; X86-LABEL: test_mask_xor_ps_rrk_128:
1447 ; X86:       # %bb.0:
1448 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1449 ; X86-NEXT:    vxorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1]
1450 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1451 ; X86-NEXT:    retl # encoding: [0xc3]
1452 ;
1453 ; X64-LABEL: test_mask_xor_ps_rrk_128:
1454 ; X64:       # %bb.0:
1455 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1456 ; X64-NEXT:    vxorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1]
1457 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1458 ; X64-NEXT:    retq # encoding: [0xc3]
1459   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1460   ret <4 x float> %res
1461 }
1462
1463 define <4 x float> @test_mask_xor_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
1464 ; X86-LABEL: test_mask_xor_ps_rrkz_128:
1465 ; X86:       # %bb.0:
1466 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1467 ; X86-NEXT:    vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1]
1468 ; X86-NEXT:    retl # encoding: [0xc3]
1469 ;
1470 ; X64-LABEL: test_mask_xor_ps_rrkz_128:
1471 ; X64:       # %bb.0:
1472 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1473 ; X64-NEXT:    vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1]
1474 ; X64-NEXT:    retq # encoding: [0xc3]
1475   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1476   ret <4 x float> %res
1477 }
1478
1479 define <4 x float> @test_mask_xor_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
1480 ; X86-LABEL: test_mask_xor_ps_rm_128:
1481 ; X86:       # %bb.0:
1482 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1483 ; X86-NEXT:    vxorps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0x00]
1484 ; X86-NEXT:    retl # encoding: [0xc3]
1485 ;
1486 ; X64-LABEL: test_mask_xor_ps_rm_128:
1487 ; X64:       # %bb.0:
1488 ; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0x07]
1489 ; X64-NEXT:    retq # encoding: [0xc3]
1490   %b = load <4 x float>, <4 x float>* %ptr_b
1491   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1492   ret <4 x float> %res
1493 }
1494
1495 define <4 x float> @test_mask_xor_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
1496 ; X86-LABEL: test_mask_xor_ps_rmk_128:
1497 ; X86:       # %bb.0:
1498 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1499 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1500 ; X86-NEXT:    vxorps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0x08]
1501 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1502 ; X86-NEXT:    retl # encoding: [0xc3]
1503 ;
1504 ; X64-LABEL: test_mask_xor_ps_rmk_128:
1505 ; X64:       # %bb.0:
1506 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1507 ; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f]
1508 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1509 ; X64-NEXT:    retq # encoding: [0xc3]
1510   %b = load <4 x float>, <4 x float>* %ptr_b
1511   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1512   ret <4 x float> %res
1513 }
1514
1515 define <4 x float> @test_mask_xor_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
1516 ; X86-LABEL: test_mask_xor_ps_rmkz_128:
1517 ; X86:       # %bb.0:
1518 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1519 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1520 ; X86-NEXT:    vxorps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0x00]
1521 ; X86-NEXT:    retl # encoding: [0xc3]
1522 ;
1523 ; X64-LABEL: test_mask_xor_ps_rmkz_128:
1524 ; X64:       # %bb.0:
1525 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1526 ; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0x07]
1527 ; X64-NEXT:    retq # encoding: [0xc3]
1528   %b = load <4 x float>, <4 x float>* %ptr_b
1529   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1530   ret <4 x float> %res
1531 }
1532
1533 define <4 x float> @test_mask_xor_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
1534 ; X86-LABEL: test_mask_xor_ps_rmb_128:
1535 ; X86:       # %bb.0:
1536 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1537 ; X86-NEXT:    vxorps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x57,0x00]
1538 ; X86-NEXT:    retl # encoding: [0xc3]
1539 ;
1540 ; X64-LABEL: test_mask_xor_ps_rmb_128:
1541 ; X64:       # %bb.0:
1542 ; X64-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x57,0x07]
1543 ; X64-NEXT:    retq # encoding: [0xc3]
1544   %q = load float, float* %ptr_b
1545   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1546   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1547   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1548   ret <4 x float> %res
1549 }
1550
1551 define <4 x float> @test_mask_xor_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
1552 ; X86-LABEL: test_mask_xor_ps_rmbk_128:
1553 ; X86:       # %bb.0:
1554 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1555 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1556 ; X86-NEXT:    vxorps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x57,0x08]
1557 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1558 ; X86-NEXT:    retl # encoding: [0xc3]
1559 ;
1560 ; X64-LABEL: test_mask_xor_ps_rmbk_128:
1561 ; X64:       # %bb.0:
1562 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1563 ; X64-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f]
1564 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1565 ; X64-NEXT:    retq # encoding: [0xc3]
1566   %q = load float, float* %ptr_b
1567   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1568   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1569   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1570   ret <4 x float> %res
1571 }
1572
1573 define <4 x float> @test_mask_xor_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
1574 ; X86-LABEL: test_mask_xor_ps_rmbkz_128:
1575 ; X86:       # %bb.0:
1576 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1577 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1578 ; X86-NEXT:    vxorps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x57,0x00]
1579 ; X86-NEXT:    retl # encoding: [0xc3]
1580 ;
1581 ; X64-LABEL: test_mask_xor_ps_rmbkz_128:
1582 ; X64:       # %bb.0:
1583 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1584 ; X64-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x57,0x07]
1585 ; X64-NEXT:    retq # encoding: [0xc3]
1586   %q = load float, float* %ptr_b
1587   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1588   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1589   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1590   ret <4 x float> %res
1591 }
1592
1593 declare <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1594
1595 define <8 x float> @test_mask_xor_ps_rr_256(<8 x float> %a, <8 x float> %b) {
1596 ; CHECK-LABEL: test_mask_xor_ps_rr_256:
1597 ; CHECK:       # %bb.0:
1598 ; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0xc1]
1599 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1600   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1601   ret <8 x float> %res
1602 }
1603
1604 define <8 x float> @test_mask_xor_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
1605 ; X86-LABEL: test_mask_xor_ps_rrk_256:
1606 ; X86:       # %bb.0:
1607 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1608 ; X86-NEXT:    vxorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1]
1609 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1610 ; X86-NEXT:    retl # encoding: [0xc3]
1611 ;
1612 ; X64-LABEL: test_mask_xor_ps_rrk_256:
1613 ; X64:       # %bb.0:
1614 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1615 ; X64-NEXT:    vxorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1]
1616 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1617 ; X64-NEXT:    retq # encoding: [0xc3]
1618   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1619   ret <8 x float> %res
1620 }
1621
1622 define <8 x float> @test_mask_xor_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
1623 ; X86-LABEL: test_mask_xor_ps_rrkz_256:
1624 ; X86:       # %bb.0:
1625 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1626 ; X86-NEXT:    vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1]
1627 ; X86-NEXT:    retl # encoding: [0xc3]
1628 ;
1629 ; X64-LABEL: test_mask_xor_ps_rrkz_256:
1630 ; X64:       # %bb.0:
1631 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1632 ; X64-NEXT:    vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1]
1633 ; X64-NEXT:    retq # encoding: [0xc3]
1634   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1635   ret <8 x float> %res
1636 }
1637
1638 define <8 x float> @test_mask_xor_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
1639 ; X86-LABEL: test_mask_xor_ps_rm_256:
1640 ; X86:       # %bb.0:
1641 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1642 ; X86-NEXT:    vxorps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0x00]
1643 ; X86-NEXT:    retl # encoding: [0xc3]
1644 ;
1645 ; X64-LABEL: test_mask_xor_ps_rm_256:
1646 ; X64:       # %bb.0:
1647 ; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0x07]
1648 ; X64-NEXT:    retq # encoding: [0xc3]
1649   %b = load <8 x float>, <8 x float>* %ptr_b
1650   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1651   ret <8 x float> %res
1652 }
1653
1654 define <8 x float> @test_mask_xor_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
1655 ; X86-LABEL: test_mask_xor_ps_rmk_256:
1656 ; X86:       # %bb.0:
1657 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1658 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1659 ; X86-NEXT:    vxorps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0x08]
1660 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1661 ; X86-NEXT:    retl # encoding: [0xc3]
1662 ;
1663 ; X64-LABEL: test_mask_xor_ps_rmk_256:
1664 ; X64:       # %bb.0:
1665 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1666 ; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f]
1667 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1668 ; X64-NEXT:    retq # encoding: [0xc3]
1669   %b = load <8 x float>, <8 x float>* %ptr_b
1670   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1671   ret <8 x float> %res
1672 }
1673
1674 define <8 x float> @test_mask_xor_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
1675 ; X86-LABEL: test_mask_xor_ps_rmkz_256:
1676 ; X86:       # %bb.0:
1677 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1678 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1679 ; X86-NEXT:    vxorps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x00]
1680 ; X86-NEXT:    retl # encoding: [0xc3]
1681 ;
1682 ; X64-LABEL: test_mask_xor_ps_rmkz_256:
1683 ; X64:       # %bb.0:
1684 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1685 ; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x07]
1686 ; X64-NEXT:    retq # encoding: [0xc3]
1687   %b = load <8 x float>, <8 x float>* %ptr_b
1688   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1689   ret <8 x float> %res
1690 }
1691
1692 define <8 x float> @test_mask_xor_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
1693 ; X86-LABEL: test_mask_xor_ps_rmb_256:
1694 ; X86:       # %bb.0:
1695 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1696 ; X86-NEXT:    vxorps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x57,0x00]
1697 ; X86-NEXT:    retl # encoding: [0xc3]
1698 ;
1699 ; X64-LABEL: test_mask_xor_ps_rmb_256:
1700 ; X64:       # %bb.0:
1701 ; X64-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x57,0x07]
1702 ; X64-NEXT:    retq # encoding: [0xc3]
1703   %q = load float, float* %ptr_b
1704   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1705   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1706   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1707   ret <8 x float> %res
1708 }
1709
1710 define <8 x float> @test_mask_xor_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
1711 ; X86-LABEL: test_mask_xor_ps_rmbk_256:
1712 ; X86:       # %bb.0:
1713 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1714 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1715 ; X86-NEXT:    vxorps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x57,0x08]
1716 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1717 ; X86-NEXT:    retl # encoding: [0xc3]
1718 ;
1719 ; X64-LABEL: test_mask_xor_ps_rmbk_256:
1720 ; X64:       # %bb.0:
1721 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1722 ; X64-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f]
1723 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1724 ; X64-NEXT:    retq # encoding: [0xc3]
1725   %q = load float, float* %ptr_b
1726   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1727   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1728   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1729   ret <8 x float> %res
1730 }
1731
1732 define <8 x float> @test_mask_xor_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
1733 ; X86-LABEL: test_mask_xor_ps_rmbkz_256:
1734 ; X86:       # %bb.0:
1735 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1736 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1737 ; X86-NEXT:    vxorps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x00]
1738 ; X86-NEXT:    retl # encoding: [0xc3]
1739 ;
1740 ; X64-LABEL: test_mask_xor_ps_rmbkz_256:
1741 ; X64:       # %bb.0:
1742 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1743 ; X64-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x07]
1744 ; X64-NEXT:    retq # encoding: [0xc3]
1745   %q = load float, float* %ptr_b
1746   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1747   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1748   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1749   ret <8 x float> %res
1750 }
1751
1752 declare <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1753
1754 define <16 x float> @test_mask_xor_ps_rr_512(<16 x float> %a, <16 x float> %b) {
1755 ; CHECK-LABEL: test_mask_xor_ps_rr_512:
1756 ; CHECK:       # %bb.0:
1757 ; CHECK-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0xc1]
1758 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1759   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1760   ret <16 x float> %res
1761 }
1762
1763 define <16 x float> @test_mask_xor_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
1764 ; X86-LABEL: test_mask_xor_ps_rrk_512:
1765 ; X86:       # %bb.0:
1766 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1767 ; X86-NEXT:    vxorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1]
1768 ; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1769 ; X86-NEXT:    retl # encoding: [0xc3]
1770 ;
1771 ; X64-LABEL: test_mask_xor_ps_rrk_512:
1772 ; X64:       # %bb.0:
1773 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1774 ; X64-NEXT:    vxorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1]
1775 ; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1776 ; X64-NEXT:    retq # encoding: [0xc3]
1777   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1778   ret <16 x float> %res
1779 }
1780
1781 define <16 x float> @test_mask_xor_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
1782 ; X86-LABEL: test_mask_xor_ps_rrkz_512:
1783 ; X86:       # %bb.0:
1784 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1785 ; X86-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1]
1786 ; X86-NEXT:    retl # encoding: [0xc3]
1787 ;
1788 ; X64-LABEL: test_mask_xor_ps_rrkz_512:
1789 ; X64:       # %bb.0:
1790 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1791 ; X64-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1]
1792 ; X64-NEXT:    retq # encoding: [0xc3]
1793   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1794   ret <16 x float> %res
1795 }
1796
1797 define <16 x float> @test_mask_xor_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
1798 ; X86-LABEL: test_mask_xor_ps_rm_512:
1799 ; X86:       # %bb.0:
1800 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1801 ; X86-NEXT:    vxorps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0x00]
1802 ; X86-NEXT:    retl # encoding: [0xc3]
1803 ;
1804 ; X64-LABEL: test_mask_xor_ps_rm_512:
1805 ; X64:       # %bb.0:
1806 ; X64-NEXT:    vxorps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0x07]
1807 ; X64-NEXT:    retq # encoding: [0xc3]
1808   %b = load <16 x float>, <16 x float>* %ptr_b
1809   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1810   ret <16 x float> %res
1811 }
1812
1813 define <16 x float> @test_mask_xor_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
1814 ; X86-LABEL: test_mask_xor_ps_rmk_512:
1815 ; X86:       # %bb.0:
1816 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1817 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1818 ; X86-NEXT:    vxorps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0x08]
1819 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1820 ; X86-NEXT:    retl # encoding: [0xc3]
1821 ;
1822 ; X64-LABEL: test_mask_xor_ps_rmk_512:
1823 ; X64:       # %bb.0:
1824 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1825 ; X64-NEXT:    vxorps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0x0f]
1826 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1827 ; X64-NEXT:    retq # encoding: [0xc3]
1828   %b = load <16 x float>, <16 x float>* %ptr_b
1829   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1830   ret <16 x float> %res
1831 }
1832
1833 define <16 x float> @test_mask_xor_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
1834 ; X86-LABEL: test_mask_xor_ps_rmkz_512:
1835 ; X86:       # %bb.0:
1836 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1837 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1838 ; X86-NEXT:    vxorps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x00]
1839 ; X86-NEXT:    retl # encoding: [0xc3]
1840 ;
1841 ; X64-LABEL: test_mask_xor_ps_rmkz_512:
1842 ; X64:       # %bb.0:
1843 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1844 ; X64-NEXT:    vxorps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x07]
1845 ; X64-NEXT:    retq # encoding: [0xc3]
1846   %b = load <16 x float>, <16 x float>* %ptr_b
1847   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1848   ret <16 x float> %res
1849 }
1850
1851 define <16 x float> @test_mask_xor_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
1852 ; X86-LABEL: test_mask_xor_ps_rmb_512:
1853 ; X86:       # %bb.0:
1854 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1855 ; X86-NEXT:    vxorps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x57,0x00]
1856 ; X86-NEXT:    retl # encoding: [0xc3]
1857 ;
1858 ; X64-LABEL: test_mask_xor_ps_rmb_512:
1859 ; X64:       # %bb.0:
1860 ; X64-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x57,0x07]
1861 ; X64-NEXT:    retq # encoding: [0xc3]
1862   %q = load float, float* %ptr_b
1863   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1864   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1865   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1866   ret <16 x float> %res
1867 }
1868
1869 define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
1870 ; X86-LABEL: test_mask_xor_ps_rmbk_512:
1871 ; X86:       # %bb.0:
1872 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1873 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1874 ; X86-NEXT:    vxorps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x57,0x08]
1875 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1876 ; X86-NEXT:    retl # encoding: [0xc3]
1877 ;
1878 ; X64-LABEL: test_mask_xor_ps_rmbk_512:
1879 ; X64:       # %bb.0:
1880 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1881 ; X64-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x57,0x0f]
1882 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1883 ; X64-NEXT:    retq # encoding: [0xc3]
1884   %q = load float, float* %ptr_b
1885   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1886   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1887   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1888   ret <16 x float> %res
1889 }
1890
1891 define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
1892 ; X86-LABEL: test_mask_xor_ps_rmbkz_512:
1893 ; X86:       # %bb.0:
1894 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1895 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1896 ; X86-NEXT:    vxorps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x00]
1897 ; X86-NEXT:    retl # encoding: [0xc3]
1898 ;
1899 ; X64-LABEL: test_mask_xor_ps_rmbkz_512:
1900 ; X64:       # %bb.0:
1901 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1902 ; X64-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x07]
1903 ; X64-NEXT:    retq # encoding: [0xc3]
1904   %q = load float, float* %ptr_b
1905   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1906   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1907   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1908   ret <16 x float> %res
1909 }
1910
1911 declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1912
1913 define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) {
1914 ; CHECK-LABEL: test_mask_mullo_epi64_rr_512:
1915 ; CHECK:       # %bb.0:
1916 ; CHECK-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1]
1917 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1918   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1919   ret <8 x i64> %res
1920 }
1921
1922 define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1923 ; X86-LABEL: test_mask_mullo_epi64_rrk_512:
1924 ; X86:       # %bb.0:
1925 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1926 ; X86-NEXT:    vpmullq %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
1927 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1928 ; X86-NEXT:    retl # encoding: [0xc3]
1929 ;
1930 ; X64-LABEL: test_mask_mullo_epi64_rrk_512:
1931 ; X64:       # %bb.0:
1932 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1933 ; X64-NEXT:    vpmullq %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
1934 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1935 ; X64-NEXT:    retq # encoding: [0xc3]
1936   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1937   ret <8 x i64> %res
1938 }
1939
1940 define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1941 ; X86-LABEL: test_mask_mullo_epi64_rrkz_512:
1942 ; X86:       # %bb.0:
1943 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1944 ; X86-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
1945 ; X86-NEXT:    retl # encoding: [0xc3]
1946 ;
1947 ; X64-LABEL: test_mask_mullo_epi64_rrkz_512:
1948 ; X64:       # %bb.0:
1949 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1950 ; X64-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
1951 ; X64-NEXT:    retq # encoding: [0xc3]
1952   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1953   ret <8 x i64> %res
1954 }
1955
1956 define <8 x i64> @test_mask_mullo_epi64_rm_512(<8 x i64> %a, <8 x i64>* %ptr_b) {
1957 ; X86-LABEL: test_mask_mullo_epi64_rm_512:
1958 ; X86:       # %bb.0:
1959 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1960 ; X86-NEXT:    vpmullq (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0x00]
1961 ; X86-NEXT:    retl # encoding: [0xc3]
1962 ;
1963 ; X64-LABEL: test_mask_mullo_epi64_rm_512:
1964 ; X64:       # %bb.0:
1965 ; X64-NEXT:    vpmullq (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0x07]
1966 ; X64-NEXT:    retq # encoding: [0xc3]
1967   %b = load <8 x i64>, <8 x i64>* %ptr_b
1968   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1969   ret <8 x i64> %res
1970 }
1971
1972 define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1973 ; X86-LABEL: test_mask_mullo_epi64_rmk_512:
1974 ; X86:       # %bb.0:
1975 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1976 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1977 ; X86-NEXT:    vpmullq (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0x08]
1978 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1979 ; X86-NEXT:    retl # encoding: [0xc3]
1980 ;
1981 ; X64-LABEL: test_mask_mullo_epi64_rmk_512:
1982 ; X64:       # %bb.0:
1983 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1984 ; X64-NEXT:    vpmullq (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f]
1985 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1986 ; X64-NEXT:    retq # encoding: [0xc3]
1987   %b = load <8 x i64>, <8 x i64>* %ptr_b
1988   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1989   ret <8 x i64> %res
1990 }
1991
1992 define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1993 ; X86-LABEL: test_mask_mullo_epi64_rmkz_512:
1994 ; X86:       # %bb.0:
1995 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1996 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1997 ; X86-NEXT:    vpmullq (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x00]
1998 ; X86-NEXT:    retl # encoding: [0xc3]
1999 ;
2000 ; X64-LABEL: test_mask_mullo_epi64_rmkz_512:
2001 ; X64:       # %bb.0:
2002 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2003 ; X64-NEXT:    vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07]
2004 ; X64-NEXT:    retq # encoding: [0xc3]
2005   %b = load <8 x i64>, <8 x i64>* %ptr_b
2006   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2007   ret <8 x i64> %res
2008 }
2009
2010 define <8 x i64> @test_mask_mullo_epi64_rmb_512(<8 x i64> %a, i64* %ptr_b) {
2011 ; X86-LABEL: test_mask_mullo_epi64_rmb_512:
2012 ; X86:       # %bb.0:
2013 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2014 ; X86-NEXT:    vpmullq (%eax){1to8}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x58,0x40,0x00]
2015 ; X86-NEXT:    retl # encoding: [0xc3]
2016 ;
2017 ; X64-LABEL: test_mask_mullo_epi64_rmb_512:
2018 ; X64:       # %bb.0:
2019 ; X64-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x58,0x40,0x07]
2020 ; X64-NEXT:    retq # encoding: [0xc3]
2021   %q = load i64, i64* %ptr_b
2022   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2023   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2024   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2025   ret <8 x i64> %res
2026 }
2027
2028 define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2029 ; X86-LABEL: test_mask_mullo_epi64_rmbk_512:
2030 ; X86:       # %bb.0:
2031 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2032 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2033 ; X86-NEXT:    vpmullq (%eax){1to8}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x59,0x40,0x08]
2034 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2035 ; X86-NEXT:    retl # encoding: [0xc3]
2036 ;
2037 ; X64-LABEL: test_mask_mullo_epi64_rmbk_512:
2038 ; X64:       # %bb.0:
2039 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2040 ; X64-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f]
2041 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2042 ; X64-NEXT:    retq # encoding: [0xc3]
2043   %q = load i64, i64* %ptr_b
2044   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2045   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2046   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2047   ret <8 x i64> %res
2048 }
2049
2050 define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2051 ; X86-LABEL: test_mask_mullo_epi64_rmbkz_512:
2052 ; X86:       # %bb.0:
2053 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2054 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2055 ; X86-NEXT:    vpmullq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x00]
2056 ; X86-NEXT:    retl # encoding: [0xc3]
2057 ;
2058 ; X64-LABEL: test_mask_mullo_epi64_rmbkz_512:
2059 ; X64:       # %bb.0:
2060 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2061 ; X64-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07]
2062 ; X64-NEXT:    retq # encoding: [0xc3]
2063   %q = load i64, i64* %ptr_b
2064   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2065   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2066   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2067   ret <8 x i64> %res
2068 }
2069 declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2070
2071 define <4 x i64> @test_mask_mullo_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
2072 ; CHECK-LABEL: test_mask_mullo_epi64_rr_256:
2073 ; CHECK:       # %bb.0:
2074 ; CHECK-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1]
2075 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2076   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2077   ret <4 x i64> %res
2078 }
2079
2080 define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
2081 ; X86-LABEL: test_mask_mullo_epi64_rrk_256:
2082 ; X86:       # %bb.0:
2083 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2084 ; X86-NEXT:    vpmullq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1]
2085 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2086 ; X86-NEXT:    retl # encoding: [0xc3]
2087 ;
2088 ; X64-LABEL: test_mask_mullo_epi64_rrk_256:
2089 ; X64:       # %bb.0:
2090 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2091 ; X64-NEXT:    vpmullq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1]
2092 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2093 ; X64-NEXT:    retq # encoding: [0xc3]
2094   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2095   ret <4 x i64> %res
2096 }
2097
2098 define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
2099 ; X86-LABEL: test_mask_mullo_epi64_rrkz_256:
2100 ; X86:       # %bb.0:
2101 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2102 ; X86-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
2103 ; X86-NEXT:    retl # encoding: [0xc3]
2104 ;
2105 ; X64-LABEL: test_mask_mullo_epi64_rrkz_256:
2106 ; X64:       # %bb.0:
2107 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2108 ; X64-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
2109 ; X64-NEXT:    retq # encoding: [0xc3]
2110   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2111   ret <4 x i64> %res
2112 }
2113
2114 define <4 x i64> @test_mask_mullo_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
2115 ; X86-LABEL: test_mask_mullo_epi64_rm_256:
2116 ; X86:       # %bb.0:
2117 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2118 ; X86-NEXT:    vpmullq (%eax), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0x00]
2119 ; X86-NEXT:    retl # encoding: [0xc3]
2120 ;
2121 ; X64-LABEL: test_mask_mullo_epi64_rm_256:
2122 ; X64:       # %bb.0:
2123 ; X64-NEXT:    vpmullq (%rdi), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0x07]
2124 ; X64-NEXT:    retq # encoding: [0xc3]
2125   %b = load <4 x i64>, <4 x i64>* %ptr_b
2126   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2127   ret <4 x i64> %res
2128 }
2129
2130 define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
2131 ; X86-LABEL: test_mask_mullo_epi64_rmk_256:
2132 ; X86:       # %bb.0:
2133 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2134 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2135 ; X86-NEXT:    vpmullq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0x08]
2136 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2137 ; X86-NEXT:    retl # encoding: [0xc3]
2138 ;
2139 ; X64-LABEL: test_mask_mullo_epi64_rmk_256:
2140 ; X64:       # %bb.0:
2141 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2142 ; X64-NEXT:    vpmullq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f]
2143 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2144 ; X64-NEXT:    retq # encoding: [0xc3]
2145   %b = load <4 x i64>, <4 x i64>* %ptr_b
2146   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2147   ret <4 x i64> %res
2148 }
2149
2150 define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
2151 ; X86-LABEL: test_mask_mullo_epi64_rmkz_256:
2152 ; X86:       # %bb.0:
2153 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2154 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2155 ; X86-NEXT:    vpmullq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x00]
2156 ; X86-NEXT:    retl # encoding: [0xc3]
2157 ;
2158 ; X64-LABEL: test_mask_mullo_epi64_rmkz_256:
2159 ; X64:       # %bb.0:
2160 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2161 ; X64-NEXT:    vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07]
2162 ; X64-NEXT:    retq # encoding: [0xc3]
2163   %b = load <4 x i64>, <4 x i64>* %ptr_b
2164   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2165   ret <4 x i64> %res
2166 }
2167
2168 define <4 x i64> @test_mask_mullo_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
2169 ; X86-LABEL: test_mask_mullo_epi64_rmb_256:
2170 ; X86:       # %bb.0:
2171 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2172 ; X86-NEXT:    vpmullq (%eax){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x40,0x00]
2173 ; X86-NEXT:    retl # encoding: [0xc3]
2174 ;
2175 ; X64-LABEL: test_mask_mullo_epi64_rmb_256:
2176 ; X64:       # %bb.0:
2177 ; X64-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x40,0x07]
2178 ; X64-NEXT:    retq # encoding: [0xc3]
2179   %q = load i64, i64* %ptr_b
2180   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2181   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2182   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2183   ret <4 x i64> %res
2184 }
2185
2186 define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
2187 ; X86-LABEL: test_mask_mullo_epi64_rmbk_256:
2188 ; X86:       # %bb.0:
2189 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2190 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2191 ; X86-NEXT:    vpmullq (%eax){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x40,0x08]
2192 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2193 ; X86-NEXT:    retl # encoding: [0xc3]
2194 ;
2195 ; X64-LABEL: test_mask_mullo_epi64_rmbk_256:
2196 ; X64:       # %bb.0:
2197 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2198 ; X64-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f]
2199 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2200 ; X64-NEXT:    retq # encoding: [0xc3]
2201   %q = load i64, i64* %ptr_b
2202   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2203   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2204   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2205   ret <4 x i64> %res
2206 }
2207
2208 define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
2209 ; X86-LABEL: test_mask_mullo_epi64_rmbkz_256:
2210 ; X86:       # %bb.0:
2211 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2212 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2213 ; X86-NEXT:    vpmullq (%eax){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x00]
2214 ; X86-NEXT:    retl # encoding: [0xc3]
2215 ;
2216 ; X64-LABEL: test_mask_mullo_epi64_rmbkz_256:
2217 ; X64:       # %bb.0:
2218 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2219 ; X64-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07]
2220 ; X64-NEXT:    retq # encoding: [0xc3]
2221   %q = load i64, i64* %ptr_b
2222   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2223   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2224   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2225   ret <4 x i64> %res
2226 }
2227
2228 declare <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2229
2230 define <2 x i64> @test_mask_mullo_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
2231 ; CHECK-LABEL: test_mask_mullo_epi64_rr_128:
2232 ; CHECK:       # %bb.0:
2233 ; CHECK-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
2234 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2235   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2236   ret <2 x i64> %res
2237 }
2238
2239 define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
2240 ; X86-LABEL: test_mask_mullo_epi64_rrk_128:
2241 ; X86:       # %bb.0:
2242 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2243 ; X86-NEXT:    vpmullq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1]
2244 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2245 ; X86-NEXT:    retl # encoding: [0xc3]
2246 ;
2247 ; X64-LABEL: test_mask_mullo_epi64_rrk_128:
2248 ; X64:       # %bb.0:
2249 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2250 ; X64-NEXT:    vpmullq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1]
2251 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2252 ; X64-NEXT:    retq # encoding: [0xc3]
2253   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2254   ret <2 x i64> %res
2255 }
2256
2257 define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
2258 ; X86-LABEL: test_mask_mullo_epi64_rrkz_128:
2259 ; X86:       # %bb.0:
2260 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2261 ; X86-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
2262 ; X86-NEXT:    retl # encoding: [0xc3]
2263 ;
2264 ; X64-LABEL: test_mask_mullo_epi64_rrkz_128:
2265 ; X64:       # %bb.0:
2266 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2267 ; X64-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
2268 ; X64-NEXT:    retq # encoding: [0xc3]
2269   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2270   ret <2 x i64> %res
2271 }
2272
2273 define <2 x i64> @test_mask_mullo_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
2274 ; X86-LABEL: test_mask_mullo_epi64_rm_128:
2275 ; X86:       # %bb.0:
2276 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2277 ; X86-NEXT:    vpmullq (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0x00]
2278 ; X86-NEXT:    retl # encoding: [0xc3]
2279 ;
2280 ; X64-LABEL: test_mask_mullo_epi64_rm_128:
2281 ; X64:       # %bb.0:
2282 ; X64-NEXT:    vpmullq (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0x07]
2283 ; X64-NEXT:    retq # encoding: [0xc3]
2284   %b = load <2 x i64>, <2 x i64>* %ptr_b
2285   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2286   ret <2 x i64> %res
2287 }
2288
2289 define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
2290 ; X86-LABEL: test_mask_mullo_epi64_rmk_128:
2291 ; X86:       # %bb.0:
2292 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2293 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2294 ; X86-NEXT:    vpmullq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0x08]
2295 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2296 ; X86-NEXT:    retl # encoding: [0xc3]
2297 ;
2298 ; X64-LABEL: test_mask_mullo_epi64_rmk_128:
2299 ; X64:       # %bb.0:
2300 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2301 ; X64-NEXT:    vpmullq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f]
2302 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2303 ; X64-NEXT:    retq # encoding: [0xc3]
2304   %b = load <2 x i64>, <2 x i64>* %ptr_b
2305   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2306   ret <2 x i64> %res
2307 }
2308
2309 define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
2310 ; X86-LABEL: test_mask_mullo_epi64_rmkz_128:
2311 ; X86:       # %bb.0:
2312 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2313 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2314 ; X86-NEXT:    vpmullq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0x00]
2315 ; X86-NEXT:    retl # encoding: [0xc3]
2316 ;
2317 ; X64-LABEL: test_mask_mullo_epi64_rmkz_128:
2318 ; X64:       # %bb.0:
2319 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2320 ; X64-NEXT:    vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07]
2321 ; X64-NEXT:    retq # encoding: [0xc3]
2322   %b = load <2 x i64>, <2 x i64>* %ptr_b
2323   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2324   ret <2 x i64> %res
2325 }
2326
2327 define <2 x i64> @test_mask_mullo_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
2328 ; X86-LABEL: test_mask_mullo_epi64_rmb_128:
2329 ; X86:       # %bb.0:
2330 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2331 ; X86-NEXT:    vpmullq (%eax){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x40,0x00]
2332 ; X86-NEXT:    retl # encoding: [0xc3]
2333 ;
2334 ; X64-LABEL: test_mask_mullo_epi64_rmb_128:
2335 ; X64:       # %bb.0:
2336 ; X64-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x40,0x07]
2337 ; X64-NEXT:    retq # encoding: [0xc3]
2338   %q = load i64, i64* %ptr_b
2339   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2340   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2341   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2342   ret <2 x i64> %res
2343 }
2344
2345 define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
2346 ; X86-LABEL: test_mask_mullo_epi64_rmbk_128:
2347 ; X86:       # %bb.0:
2348 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2349 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2350 ; X86-NEXT:    vpmullq (%eax){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x40,0x08]
2351 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2352 ; X86-NEXT:    retl # encoding: [0xc3]
2353 ;
2354 ; X64-LABEL: test_mask_mullo_epi64_rmbk_128:
2355 ; X64:       # %bb.0:
2356 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2357 ; X64-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f]
2358 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2359 ; X64-NEXT:    retq # encoding: [0xc3]
2360   %q = load i64, i64* %ptr_b
2361   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2362   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2363   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2364   ret <2 x i64> %res
2365 }
2366
2367 define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
2368 ; X86-LABEL: test_mask_mullo_epi64_rmbkz_128:
2369 ; X86:       # %bb.0:
2370 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2371 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2372 ; X86-NEXT:    vpmullq (%eax){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x40,0x00]
2373 ; X86-NEXT:    retl # encoding: [0xc3]
2374 ;
2375 ; X64-LABEL: test_mask_mullo_epi64_rmbkz_128:
2376 ; X64:       # %bb.0:
2377 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2378 ; X64-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07]
2379 ; X64-NEXT:    retq # encoding: [0xc3]
2380   %q = load i64, i64* %ptr_b
2381   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2382   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2383   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2384   ret <2 x i64> %res
2385 }
2386
2387 declare <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2388
2389 declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, <2 x double>, i8)
2390
2391 define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) {
2392 ; X86-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
2393 ; X86:       # %bb.0:
2394 ; X86-NEXT:    vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
2395 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2396 ; X86-NEXT:    vextractf64x2 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01]
2397 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca]
2398 ; X86-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01]
2399 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
2400 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2401 ; X86-NEXT:    retl # encoding: [0xc3]
2402 ;
2403 ; X64-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
2404 ; X64:       # %bb.0:
2405 ; X64-NEXT:    vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
2406 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2407 ; X64-NEXT:    vextractf64x2 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01]
2408 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca]
2409 ; X64-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01]
2410 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
2411 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2412 ; X64-NEXT:    retq # encoding: [0xc3]
2413   %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
2414   %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
2415   %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
2416   %res3 = fadd <2 x double> %res, %res1
2417   %res4 = fadd <2 x double> %res3, %res2
2418   ret <2 x double> %res4
2419 }
2420
2421 declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8)
2422
2423 define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) {
2424 ; X86-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
2425 ; X86:       # %bb.0:
2426 ; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
2427 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2428 ; X86-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01]
2429 ; X86-NEXT:    vaddpd %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xd3]
2430 ; X86-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01]
2431 ; X86-NEXT:    vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2]
2432 ; X86-NEXT:    retl # encoding: [0xc3]
2433 ;
2434 ; X64-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
2435 ; X64:       # %bb.0:
2436 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
2437 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2438 ; X64-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01]
2439 ; X64-NEXT:    vaddpd %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xd3]
2440 ; X64-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01]
2441 ; X64-NEXT:    vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2]
2442 ; X64-NEXT:    retq # encoding: [0xc3]
2443   %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4)
2444   %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1)
2445   %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4)
2446   %res3 = fadd <4 x double> %res, %res1
2447   %res4 = fadd <4 x double> %res2, %res3
2448   ret <4 x double> %res4
2449 }
2450
2451 declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8)
2452
2453 define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) {
2454 ; X86-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
2455 ; X86:       # %bb.0:
2456 ; X86-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
2457 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2458 ; X86-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01]
2459 ; X86-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01]
2460 ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
2461 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
2462 ; X86-NEXT:    retl # encoding: [0xc3]
2463 ;
2464 ; X64-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
2465 ; X64:       # %bb.0:
2466 ; X64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
2467 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2468 ; X64-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01]
2469 ; X64-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01]
2470 ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
2471 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
2472 ; X64-NEXT:    retq # encoding: [0xc3]
2473   %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4)
2474   %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1)
2475   %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4)
2476   %res3 = add <4 x i64> %res, %res1
2477   %res4 = add <4 x i64> %res3, %res2
2478   ret <4 x i64> %res4
2479 }
2480
2481 declare <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8)
2482
2483 define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) {
2484 ; X86-LABEL: test_int_x86_avx512_cvtmask2d_128:
2485 ; X86:       # %bb.0:
2486 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
2487 ; X86-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
2488 ; X86-NEXT:    retl # encoding: [0xc3]
2489 ;
2490 ; X64-LABEL: test_int_x86_avx512_cvtmask2d_128:
2491 ; X64:       # %bb.0:
2492 ; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
2493 ; X64-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
2494 ; X64-NEXT:    retq # encoding: [0xc3]
2495   %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0)
2496   ret <4 x i32> %res
2497 }
2498
2499 declare <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8)
2500
2501 define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) {
2502 ; X86-LABEL: test_int_x86_avx512_cvtmask2d_256:
2503 ; X86:       # %bb.0:
2504 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
2505 ; X86-NEXT:    vpmovm2d %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
2506 ; X86-NEXT:    retl # encoding: [0xc3]
2507 ;
2508 ; X64-LABEL: test_int_x86_avx512_cvtmask2d_256:
2509 ; X64:       # %bb.0:
2510 ; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
2511 ; X64-NEXT:    vpmovm2d %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
2512 ; X64-NEXT:    retq # encoding: [0xc3]
2513   %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0)
2514   ret <8 x i32> %res
2515 }
2516
2517 declare <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8)
2518
2519 define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) {
2520 ; X86-LABEL: test_int_x86_avx512_cvtmask2q_128:
2521 ; X86:       # %bb.0:
2522 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
2523 ; X86-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
2524 ; X86-NEXT:    retl # encoding: [0xc3]
2525 ;
2526 ; X64-LABEL: test_int_x86_avx512_cvtmask2q_128:
2527 ; X64:       # %bb.0:
2528 ; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
2529 ; X64-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
2530 ; X64-NEXT:    retq # encoding: [0xc3]
2531   %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0)
2532   ret <2 x i64> %res
2533 }
2534
2535 declare <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8)
2536
2537 define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) {
2538 ; X86-LABEL: test_int_x86_avx512_cvtmask2q_256:
2539 ; X86:       # %bb.0:
2540 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
2541 ; X86-NEXT:    vpmovm2q %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0]
2542 ; X86-NEXT:    retl # encoding: [0xc3]
2543 ;
2544 ; X64-LABEL: test_int_x86_avx512_cvtmask2q_256:
2545 ; X64:       # %bb.0:
2546 ; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
2547 ; X64-NEXT:    vpmovm2q %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0]
2548 ; X64-NEXT:    retq # encoding: [0xc3]
2549   %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0)
2550   ret <4 x i64> %res
2551 }
2552
2553 declare <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double>, <4 x double>, i8)
2554
2555 define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2, i8 %mask) {
2556 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256:
2557 ; X86:       # %bb.0:
2558 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2559 ; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
2560 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2561 ; X86-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xc8,0x01]
2562 ; X86-NEXT:    vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9]
2563 ; X86-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc0,0x01]
2564 ; X86-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
2565 ; X86-NEXT:    retl # encoding: [0xc3]
2566 ;
2567 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256:
2568 ; X64:       # %bb.0:
2569 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2570 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
2571 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2572 ; X64-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xc8,0x01]
2573 ; X64-NEXT:    vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9]
2574 ; X64-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc0,0x01]
2575 ; X64-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
2576 ; X64-NEXT:    retq # encoding: [0xc3]
2577
2578   %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 -1)
2579   %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask)
2580   %res3 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> zeroinitializer, i8 %mask)
2581   %res4 = fadd <4 x double> %res1, %res2
2582   %res5 = fadd <4 x double> %res3, %res4
2583   ret <4 x double> %res5
2584 }
2585
2586 define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256_load(<2 x double>* %x0ptr, <4 x double> %x2, i8 %mask) {
2587 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256_load:
2588 ; X86:       # %bb.0:
2589 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2590 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2591 ; X86-NEXT:    vbroadcastf64x2 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1a,0x00]
2592 ; X86-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
2593 ; X86-NEXT:    retl # encoding: [0xc3]
2594 ;
2595 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256_load:
2596 ; X64:       # %bb.0:
2597 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2598 ; X64-NEXT:    vbroadcastf64x2 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1a,0x07]
2599 ; X64-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
2600 ; X64-NEXT:    retq # encoding: [0xc3]
2601
2602   %x0 = load <2 x double>, <2 x double>* %x0ptr
2603   %res = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask)
2604   ret <4 x double> %res
2605 }
2606
2607 declare <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64>, <4 x i64>, i8)
2608
2609 define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) {
2610 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256:
2611 ; X86:       # %bb.0:
2612 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2613 ; X86-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
2614 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2615 ; X86-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xc8,0x01]
2616 ; X86-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc0,0x01]
2617 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
2618 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
2619 ; X86-NEXT:    retl # encoding: [0xc3]
2620 ;
2621 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256:
2622 ; X64:       # %bb.0:
2623 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2624 ; X64-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
2625 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2626 ; X64-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xc8,0x01]
2627 ; X64-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc0,0x01]
2628 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
2629 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
2630 ; X64-NEXT:    retq # encoding: [0xc3]
2631
2632   %res1 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 -1)
2633   %res2 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask)
2634   %res3 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> zeroinitializer, i8 %mask)
2635   %res4 = add <4 x i64> %res1, %res2
2636   %res5 = add <4 x i64> %res3, %res4
2637   ret <4 x i64> %res5
2638 }
2639
2640 define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256_load(<2 x i64>* %x0ptr, <4 x i64> %x2, i8 %mask) {
2641 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256_load:
2642 ; X86:       # %bb.0:
2643 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2644 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2645 ; X86-NEXT:    vbroadcasti64x2 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x5a,0x00]
2646 ; X86-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
2647 ; X86-NEXT:    retl # encoding: [0xc3]
2648 ;
2649 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256_load:
2650 ; X64:       # %bb.0:
2651 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2652 ; X64-NEXT:    vbroadcasti64x2 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x5a,0x07]
2653 ; X64-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
2654 ; X64-NEXT:    retq # encoding: [0xc3]
2655
2656   %x0 = load <2 x i64>, <2 x i64>* %x0ptr
2657   %res = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask)
2658   ret <4 x i64> %res
2659 }
2660
2661 declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>, <8 x float>, i8)
2662
2663 define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, <8 x float> %x2, i8 %x3) {
2664 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256:
2665 ; X86:       # %bb.0:
2666 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2667 ; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
2668 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2669 ; X86-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
2670 ; X86-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
2671 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
2672 ; X86-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
2673 ; X86-NEXT:    retl # encoding: [0xc3]
2674 ;
2675 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256:
2676 ; X64:       # %bb.0:
2677 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2678 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
2679 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2680 ; X64-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
2681 ; X64-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
2682 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
2683 ; X64-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
2684 ; X64-NEXT:    retq # encoding: [0xc3]
2685   %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>  %x0, <8 x float> %x2, i8 %x3)
2686   %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %x3)
2687   %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
2688   %res3 = fadd <8 x float> %res, %res1
2689   %res4 = fadd <8 x float> %res3, %res2
2690   ret <8 x float> %res4
2691 }
2692
2693 declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>, <8 x i32>, i8)
2694
2695 define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3, i64 * %y_ptr) {
2696 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256:
2697 ; X86:       # %bb.0:
2698 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2699 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2700 ; X86-NEXT:    vpbroadcastq (%eax), %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x10]
2701 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2702 ; X86-NEXT:    vmovdqa32 %ymm2, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xca]
2703 ; X86-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
2704 ; X86-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
2705 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
2706 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
2707 ; X86-NEXT:    retl # encoding: [0xc3]
2708 ;
2709 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256:
2710 ; X64:       # %bb.0:
2711 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2712 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2713 ; X64-NEXT:    vbroadcasti32x2 (%rsi), %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x59,0x0e]
2714 ; X64-NEXT:    # ymm1 {%k1} = mem[0,1,0,1,0,1,0,1]
2715 ; X64-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
2716 ; X64-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
2717 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
2718 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
2719 ; X64-NEXT:    retq # encoding: [0xc3]
2720   %y_64  = load i64, i64 * %y_ptr
2721   %y_v2i64 = insertelement <2 x i64> undef, i64 %y_64, i32 0
2722   %y = bitcast <2 x i64> %y_v2i64 to <4 x i32>
2723   %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>  %y, <8 x i32> %x2, i8 %x3)
2724   %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %x3)
2725   %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
2726   %res3 = add <8 x i32> %res, %res1
2727   %res4 = add <8 x i32> %res3, %res2
2728   ret <8 x i32> %res4
2729 }
2730
2731 declare <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>, <4 x i32>, i8)
2732
2733 define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) {
2734 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128:
2735 ; X86:       # %bb.0:
2736 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2737 ; X86-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
2738 ; X86-NEXT:    vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0]
2739 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
2740 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2741 ; X86-NEXT:    retl # encoding: [0xc3]
2742 ;
2743 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128:
2744 ; X64:       # %bb.0:
2745 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2746 ; X64-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
2747 ; X64-NEXT:    vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0]
2748 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
2749 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2750 ; X64-NEXT:    retq # encoding: [0xc3]
2751   %res = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>  %x0, <4 x i32> %x2, i8 %x3)
2752   %res1 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x3)
2753   %res2 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 -1)
2754   %res3 = add <4 x i32> %res, %res1
2755   %res4 = add <4 x i32> %res3, %res2
2756   ret <4 x i32> %res4
2757 }
2758
2759 declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
2760
2761 define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
2762 ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
2763 ; CHECK:       # %bb.0:
2764 ; CHECK-NEXT:    vpmovd2m %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
2765 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2766 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2767 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2768     %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
2769     ret i8 %res
2770 }
2771
2772 declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
2773
2774 define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
2775 ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
2776 ; CHECK:       # %bb.0:
2777 ; CHECK-NEXT:    vpmovd2m %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0]
2778 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2779 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2780 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2781 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2782     %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
2783     ret i8 %res
2784 }
2785
2786 declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
2787
2788 define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
2789 ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
2790 ; CHECK:       # %bb.0:
2791 ; CHECK-NEXT:    vpmovq2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
2792 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2793 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2794 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2795     %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
2796     ret i8 %res
2797 }
2798
2799 declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
2800
2801 define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
2802 ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
2803 ; CHECK:       # %bb.0:
2804 ; CHECK-NEXT:    vpmovq2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
2805 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2806 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2807 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2808 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2809     %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
2810     ret i8 %res
2811 }
2812
2813 declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8)
2814
2815 define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
2816 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128:
2817 ; X86:       # %bb.0:
2818 ; X86-NEXT:    vcvtqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xd0]
2819 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2820 ; X86-NEXT:    vcvtqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8]
2821 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
2822 ; X86-NEXT:    retl # encoding: [0xc3]
2823 ;
2824 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128:
2825 ; X64:       # %bb.0:
2826 ; X64-NEXT:    vcvtqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xd0]
2827 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2828 ; X64-NEXT:    vcvtqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8]
2829 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
2830 ; X64-NEXT:    retq # encoding: [0xc3]
2831   %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
2832   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
2833   %res2 = fadd <2 x double> %res, %res1
2834   ret <2 x double> %res2
2835 }
2836
2837 declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8)
2838
2839 define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
2840 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256:
2841 ; X86:       # %bb.0:
2842 ; X86-NEXT:    vcvtqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xd0]
2843 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2844 ; X86-NEXT:    vcvtqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8]
2845 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
2846 ; X86-NEXT:    retl # encoding: [0xc3]
2847 ;
2848 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256:
2849 ; X64:       # %bb.0:
2850 ; X64-NEXT:    vcvtqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xd0]
2851 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2852 ; X64-NEXT:    vcvtqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8]
2853 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
2854 ; X64-NEXT:    retq # encoding: [0xc3]
2855   %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
2856   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
2857   %res2 = fadd <4 x double> %res, %res1
2858   ret <4 x double> %res2
2859 }
2860
2861 declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8)
2862
2863 define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
2864 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128:
2865 ; X86:       # %bb.0:
2866 ; X86-NEXT:    vcvtuqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xd0]
2867 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2868 ; X86-NEXT:    vcvtuqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8]
2869 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
2870 ; X86-NEXT:    retl # encoding: [0xc3]
2871 ;
2872 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128:
2873 ; X64:       # %bb.0:
2874 ; X64-NEXT:    vcvtuqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xd0]
2875 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2876 ; X64-NEXT:    vcvtuqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8]
2877 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
2878 ; X64-NEXT:    retq # encoding: [0xc3]
2879   %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
2880   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
2881   %res2 = fadd <2 x double> %res, %res1
2882   ret <2 x double> %res2
2883 }
2884
2885 declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8)
2886
2887 define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
2888 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256:
2889 ; X86:       # %bb.0:
2890 ; X86-NEXT:    vcvtuqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xd0]
2891 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2892 ; X86-NEXT:    vcvtuqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8]
2893 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
2894 ; X86-NEXT:    retl # encoding: [0xc3]
2895 ;
2896 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256:
2897 ; X64:       # %bb.0:
2898 ; X64-NEXT:    vcvtuqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xd0]
2899 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2900 ; X64-NEXT:    vcvtuqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8]
2901 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
2902 ; X64-NEXT:    retq # encoding: [0xc3]
2903   %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
2904   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
2905   %res2 = fadd <4 x double> %res, %res1
2906   ret <4 x double> %res2
2907 }
2908
2909 declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8)
2910
2911 define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {
2912 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128:
2913 ; CHECK:       # %bb.0:
2914 ; CHECK-NEXT:    vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
2915 ; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
2916 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2917 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2918 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2919   %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 -1)
2920   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 %res)
2921   ret i8 %res1
2922 }
2923
2924 declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8)
2925
2926 define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {
2927 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256:
2928 ; CHECK:       # %bb.0:
2929 ; CHECK-NEXT:    vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
2930 ; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
2931 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2932 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2933 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2934 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2935   %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 -1)
2936   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 %res)
2937   ret i8 %res1
2938 }
2939
2940 declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8)
2941
2942 define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {
2943 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128:
2944 ; CHECK:       # %bb.0:
2945 ; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
2946 ; CHECK-NEXT:    vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
2947 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2948 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2949 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2950   %res =  call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 -1)
2951   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 %res)
2952   ret i8 %res1
2953 }
2954
2955 declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8)
2956
2957 define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {
2958 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256:
2959 ; CHECK:       # %bb.0:
2960 ; CHECK-NEXT:    vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
2961 ; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
2962 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2963 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2964 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2965 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2966   %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 -1)
2967   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res)
2968   ret i8 %res1
2969 }
2970
2971 declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8)
2972
2973 define <4 x float> @test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
2974 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
2975 ; X86:       # %bb.0:
2976 ; X86-NEXT:    vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
2977 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2978 ; X86-NEXT:    vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
2979 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
2980 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2981 ; X86-NEXT:    retl # encoding: [0xc3]
2982 ;
2983 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
2984 ; X64:       # %bb.0:
2985 ; X64-NEXT:    vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
2986 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2987 ; X64-NEXT:    vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
2988 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
2989 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2990 ; X64-NEXT:    retq # encoding: [0xc3]
2991   %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
2992   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
2993   %res2 = fadd <4 x float> %res, %res1
2994   ret <4 x float> %res2
2995 }
2996
2997 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
2998
2999 define <4 x float> @test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
3000 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
3001 ; X86:       # %bb.0:
3002 ; X86-NEXT:    vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
3003 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
3004 ; X86-NEXT:    vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
3005 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
3006 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3007 ; X86-NEXT:    retl # encoding: [0xc3]
3008 ;
3009 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
3010 ; X64:       # %bb.0:
3011 ; X64-NEXT:    vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
3012 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3013 ; X64-NEXT:    vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
3014 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
3015 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3016 ; X64-NEXT:    retq # encoding: [0xc3]
3017   %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
3018   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
3019   %res2 = fadd <4 x float> %res, %res1
3020   ret <4 x float> %res2
3021 }