test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
   4
   5 declare <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32, <4 x i32>, i8)
   6
   7 define <4 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) {
   8 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
   9 ; X86:       # %bb.0:
  10 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  11 ; X86-NEXT:    vpbroadcastd %eax, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc8]
  12 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
  13 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
  14 ; X86-NEXT:    vpbroadcastd %eax, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc0]
  15 ; X86-NEXT:    vpbroadcastd %eax, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd0]
  16 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
  17 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
  18 ; X86-NEXT:    retl # encoding: [0xc3]
  19 ;
  20 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
  21 ; X64:       # %bb.0:
  22 ; X64-NEXT:    vpbroadcastd %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xcf]
  23 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
  24 ; X64-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7]
  25 ; X64-NEXT:    vpbroadcastd %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd7]
  26 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
  27 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
  28 ; X64-NEXT:    retq # encoding: [0xc3]
  29   %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1)
  30   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask)
  31   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask)
  32   %res3 = add <4 x i32> %res, %res1
  33   %res4 = add <4 x i32> %res2, %res3
  34   ret <4 x i32> %res4
  35 }
  36
  37
  38 declare <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64, <2 x i64>, i8)
  39
  40 define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) {
  41 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
  42 ; X86:       # %bb.0:
  43 ; X86-NEXT:    vmovq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04]
  44 ; X86-NEXT:    # xmm1 = mem[0],zero
  45 ; X86-NEXT:    vpbroadcastq %xmm1, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd1]
  46 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
  47 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
  48 ; X86-NEXT:    vpbroadcastq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc1]
  49 ; X86-NEXT:    vpbroadcastq %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc9]
  50 ; X86-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1]
  51 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
  52 ; X86-NEXT:    retl # encoding: [0xc3]
  53 ;
  54 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
  55 ; X64:       # %bb.0:
  56 ; X64-NEXT:    vpbroadcastq %rdi, %xmm1 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xcf]
  57 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
  58 ; X64-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7]
  59 ; X64-NEXT:    vpbroadcastq %rdi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xd7]
  60 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
  61 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
  62 ; X64-NEXT:    retq # encoding: [0xc3]
  63   %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1)
  64   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask)
  65   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask)
  66   %res3 = add <2 x i64> %res, %res1
  67   %res4 = add <2 x i64> %res2, %res3
  68   ret <2 x i64> %res4
  69 }
  70
  71
  72  declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8)
  73
  74   define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) {
  75 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
  76 ; X86:       # %bb.0:
  77 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  78 ; X86-NEXT:    vpbroadcastd %eax, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xc8]
  79 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
  80 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
  81 ; X86-NEXT:    vpbroadcastd %eax, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc0]
  82 ; X86-NEXT:    vpbroadcastd %eax, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd0]
  83 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
  84 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
  85 ; X86-NEXT:    retl # encoding: [0xc3]
  86 ;
  87 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
  88 ; X64:       # %bb.0:
  89 ; X64-NEXT:    vpbroadcastd %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xcf]
  90 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
  91 ; X64-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7]
  92 ; X64-NEXT:    vpbroadcastd %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd7]
  93 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
  94 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
  95 ; X64-NEXT:    retq # encoding: [0xc3]
  96     %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1)
  97     %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask)
  98     %res2 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask)
  99     %res3 = add <8 x i32> %res, %res1
 100     %res4 = add <8 x i32> %res2, %res3
 101     ret <8 x i32> %res4
 102   }
 103
 104  declare <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64, <4 x i64>, i8)
 105
 106   define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) {
 107 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
 108 ; X86:       # %bb.0:
 109 ; X86-NEXT:    vmovq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04]
 110 ; X86-NEXT:    # xmm1 = mem[0],zero
 111 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd1]
 112 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
 113 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 114 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc1]
 115 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc9]
 116 ; X86-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1]
 117 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
 118 ; X86-NEXT:    retl # encoding: [0xc3]
 119 ;
 120 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
 121 ; X64:       # %bb.0:
 122 ; X64-NEXT:    vpbroadcastq %rdi, %ymm1 # encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xcf]
 123 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 124 ; X64-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7]
 125 ; X64-NEXT:    vpbroadcastq %rdi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xd7]
 126 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
 127 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 128 ; X64-NEXT:    retq # encoding: [0xc3]
 129     %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1)
 130     %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask)
 131     %res2 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask)
 132     %res3 = add <4 x i64> %res, %res1
 133     %res4 = add <4 x i64> %res2, %res3
 134     ret <4 x i64> %res4
 135   }
 136
 137
 138
 139 declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
 140
 141 define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask, i32 * %y_ptr) {
 142 ; X86-LABEL: test_int_x86_avx512_pbroadcastd_256:
 143 ; X86:       # %bb.0:
 144 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
 145 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04]
 146 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
 147 ; X86-NEXT:    vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8]
 148 ; X86-NEXT:    vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0]
 149 ; X86-NEXT:    vpaddd (%eax){1to8}, %ymm1, %ymm1 # encoding: [0x62,0xf1,0x75,0x38,0xfe,0x08]
 150 ; X86-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
 151 ; X86-NEXT:    retl # encoding: [0xc3]
 152 ;
 153 ; X64-LABEL: test_int_x86_avx512_pbroadcastd_256:
 154 ; X64:       # %bb.0:
 155 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 156 ; X64-NEXT:    vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8]
 157 ; X64-NEXT:    vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0]
 158 ; X64-NEXT:    vpaddd (%rsi){1to8}, %ymm1, %ymm1 # encoding: [0x62,0xf1,0x75,0x38,0xfe,0x0e]
 159 ; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
 160 ; X64-NEXT:    retq # encoding: [0xc3]
 161   %y_32  = load i32, i32 * %y_ptr
 162   %y = insertelement <4 x i32> undef, i32 %y_32, i32 0
 163   %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1)
 164   %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
 165   %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
 166   %res3 = add <8 x i32> %res, %res1
 167   %res4 = add <8 x i32> %res2, %res3
 168   ret <8 x i32> %res4
 169 }
 170
 171 declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
 172
 173 define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
 174 ; X86-LABEL: test_int_x86_avx512_pbroadcastd_128:
 175 ; X86:       # %bb.0:
 176 ; X86-NEXT:    vpbroadcastd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0xd0]
 177 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 178 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 179 ; X86-NEXT:    vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8]
 180 ; X86-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0]
 181 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
 182 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
 183 ; X86-NEXT:    retl # encoding: [0xc3]
 184 ;
 185 ; X64-LABEL: test_int_x86_avx512_pbroadcastd_128:
 186 ; X64:       # %bb.0:
 187 ; X64-NEXT:    vpbroadcastd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0xd0]
 188 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 189 ; X64-NEXT:    vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8]
 190 ; X64-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0]
 191 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
 192 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
 193 ; X64-NEXT:    retq # encoding: [0xc3]
 194   %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
 195   %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
 196   %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
 197   %res3 = add <4 x i32> %res, %res1
 198   %res4 = add <4 x i32> %res2, %res3
 199   ret <4 x i32> %res4
 200 }
 201
 202 declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
 203
 204 define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
 205 ; X86-LABEL: test_int_x86_avx512_pbroadcastq_256:
 206 ; X86:       # %bb.0:
 207 ; X86-NEXT:    vpbroadcastq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd0]
 208 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 209 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 210 ; X86-NEXT:    vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8]
 211 ; X86-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0]
 212 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 213 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
 214 ; X86-NEXT:    retl # encoding: [0xc3]
 215 ;
 216 ; X64-LABEL: test_int_x86_avx512_pbroadcastq_256:
 217 ; X64:       # %bb.0:
 218 ; X64-NEXT:    vpbroadcastq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd0]
 219 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 220 ; X64-NEXT:    vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8]
 221 ; X64-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0]
 222 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 223 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
 224 ; X64-NEXT:    retq # encoding: [0xc3]
 225   %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
 226   %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
 227   %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
 228   %res3 = add <4 x i64> %res, %res1
 229   %res4 = add <4 x i64> %res2, %res3
 230   ret <4 x i64> %res4
 231 }
 232
 233 declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
 234
 235 define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
 236 ; X86-LABEL: test_int_x86_avx512_pbroadcastq_128:
 237 ; X86:       # %bb.0:
 238 ; X86-NEXT:    vpbroadcastq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd0]
 239 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 240 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 241 ; X86-NEXT:    vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8]
 242 ; X86-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0]
 243 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 244 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
 245 ; X86-NEXT:    retl # encoding: [0xc3]
 246 ;
 247 ; X64-LABEL: test_int_x86_avx512_pbroadcastq_128:
 248 ; X64:       # %bb.0:
 249 ; X64-NEXT:    vpbroadcastq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd0]
 250 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 251 ; X64-NEXT:    vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8]
 252 ; X64-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0]
 253 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
 254 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
 255 ; X64-NEXT:    retq # encoding: [0xc3]
 256   %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
 257   %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
 258   %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
 259   %res3 = add <2 x i64> %res, %res1
 260   %res4 = add <2 x i64> %res2, %res3
 261   ret <2 x i64> %res4
 262 }
 263
 264 declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly
 265
 266 define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) {
 267 ; X86-LABEL: test_x86_vbroadcast_sd_pd_256:
 268 ; X86:       # %bb.0:
 269 ; X86-NEXT:    vbroadcastsd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xd0]
 270 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 271 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 272 ; X86-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8]
 273 ; X86-NEXT:    vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9]
 274 ; X86-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0]
 275 ; X86-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
 276 ; X86-NEXT:    retl # encoding: [0xc3]
 277 ;
 278 ; X64-LABEL: test_x86_vbroadcast_sd_pd_256:
 279 ; X64:       # %bb.0:
 280 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xd0]
 281 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 282 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8]
 283 ; X64-NEXT:    vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9]
 284 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0]
 285 ; X64-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
 286 ; X64-NEXT:    retq # encoding: [0xc3]
 287   %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1)
 288   %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask)
 289   %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
 290   %res3 = fadd <4 x double> %res, %res1
 291   %res4 = fadd <4 x double> %res2, %res3
 292   ret <4 x double> %res4
 293 }
 294
 295 declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly
 296
 297 define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) {
 298 ; X86-LABEL: test_x86_vbroadcast_ss_ps_256:
 299 ; X86:       # %bb.0:
 300 ; X86-NEXT:    vbroadcastss %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0xd0]
 301 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 302 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 303 ; X86-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8]
 304 ; X86-NEXT:    vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
 305 ; X86-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0]
 306 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
 307 ; X86-NEXT:    retl # encoding: [0xc3]
 308 ;
 309 ; X64-LABEL: test_x86_vbroadcast_ss_ps_256:
 310 ; X64:       # %bb.0:
 311 ; X64-NEXT:    vbroadcastss %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0xd0]
 312 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 313 ; X64-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8]
 314 ; X64-NEXT:    vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
 315 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0]
 316 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
 317 ; X64-NEXT:    retq # encoding: [0xc3]
 318   %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1)
 319   %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask)
 320   %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
 321   %res3 = fadd <8 x float> %res, %res1
 322   %res4 = fadd <8 x float> %res2, %res3
 323   ret <8 x float> %res4
 324 }
 325
 326 declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
 327
 328 define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) {
 329 ; X86-LABEL: test_x86_vbroadcast_ss_ps_128:
 330 ; X86:       # %bb.0:
 331 ; X86-NEXT:    vbroadcastss %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xd0]
 332 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 333 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 334 ; X86-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8]
 335 ; X86-NEXT:    vaddps %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc9]
 336 ; X86-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0]
 337 ; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
 338 ; X86-NEXT:    retl # encoding: [0xc3]
 339 ;
 340 ; X64-LABEL: test_x86_vbroadcast_ss_ps_128:
 341 ; X64:       # %bb.0:
 342 ; X64-NEXT:    vbroadcastss %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xd0]
 343 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 344 ; X64-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8]
 345 ; X64-NEXT:    vaddps %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc9]
 346 ; X64-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0]
 347 ; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
 348 ; X64-NEXT:    retq # encoding: [0xc3]
 349   %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
 350   %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
 351   %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
 352   %res3 = fadd <4 x float> %res, %res1
 353   %res4 = fadd <4 x float> %res2, %res3
 354   ret <4 x float> %res4
 355 }
 356
 357 declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
 358
 359 define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
 360 ; X86-LABEL: test_int_x86_avx512_mask_movsldup_128:
 361 ; X86:       # %bb.0:
 362 ; X86-NEXT:    vmovsldup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x12,0xd0]
 363 ; X86-NEXT:    # xmm2 = xmm0[0,0,2,2]
 364 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 365 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 366 ; X86-NEXT:    vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
 367 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0,0,2,2]
 368 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
 369 ; X86-NEXT:    vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0]
 370 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0,2,2]
 371 ; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
 372 ; X86-NEXT:    retl # encoding: [0xc3]
 373 ;
 374 ; X64-LABEL: test_int_x86_avx512_mask_movsldup_128:
 375 ; X64:       # %bb.0:
 376 ; X64-NEXT:    vmovsldup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x12,0xd0]
 377 ; X64-NEXT:    # xmm2 = xmm0[0,0,2,2]
 378 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 379 ; X64-NEXT:    vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
 380 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0,0,2,2]
 381 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
 382 ; X64-NEXT:    vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0]
 383 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0,2,2]
 384 ; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
 385 ; X64-NEXT:    retq # encoding: [0xc3]
 386   %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
 387   %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
 388   %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
 389   %res3 = fadd <4 x float> %res, %res1
 390   %res4 = fadd <4 x float> %res2, %res3
 391   ret <4 x float> %res4
 392 }
 393
 394 declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
 395
 396 define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
 397 ; X86-LABEL: test_int_x86_avx512_mask_movsldup_256:
 398 ; X86:       # %bb.0:
 399 ; X86-NEXT:    vmovsldup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x12,0xd0]
 400 ; X86-NEXT:    # ymm2 = ymm0[0,0,2,2,4,4,6,6]
 401 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 402 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 403 ; X86-NEXT:    vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
 404 ; X86-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
 405 ; X86-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
 406 ; X86-NEXT:    vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0]
 407 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
 408 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
 409 ; X86-NEXT:    retl # encoding: [0xc3]
 410 ;
 411 ; X64-LABEL: test_int_x86_avx512_mask_movsldup_256:
 412 ; X64:       # %bb.0:
 413 ; X64-NEXT:    vmovsldup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x12,0xd0]
 414 ; X64-NEXT:    # ymm2 = ymm0[0,0,2,2,4,4,6,6]
 415 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 416 ; X64-NEXT:    vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
 417 ; X64-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
 418 ; X64-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
 419 ; X64-NEXT:    vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0]
 420 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
 421 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
 422 ; X64-NEXT:    retq # encoding: [0xc3]
 423   %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
 424   %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
 425   %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
 426   %res3 = fadd <8 x float> %res, %res1
 427   %res4 = fadd <8 x float> %res2, %res3
 428   ret <8 x float> %res4
 429 }
 430
 431 declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
 432
 433 define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
 434 ; X86-LABEL: test_int_x86_avx512_mask_movshdup_128:
 435 ; X86:       # %bb.0:
 436 ; X86-NEXT:    vmovshdup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x16,0xd0]
 437 ; X86-NEXT:    # xmm2 = xmm0[1,1,3,3]
 438 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 439 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 440 ; X86-NEXT:    vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
 441 ; X86-NEXT:    # xmm1 {%k1} = xmm0[1,1,3,3]
 442 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
 443 ; X86-NEXT:    vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0]
 444 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[1,1,3,3]
 445 ; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
 446 ; X86-NEXT:    retl # encoding: [0xc3]
 447 ;
 448 ; X64-LABEL: test_int_x86_avx512_mask_movshdup_128:
 449 ; X64:       # %bb.0:
 450 ; X64-NEXT:    vmovshdup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x16,0xd0]
 451 ; X64-NEXT:    # xmm2 = xmm0[1,1,3,3]
 452 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 453 ; X64-NEXT:    vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
 454 ; X64-NEXT:    # xmm1 {%k1} = xmm0[1,1,3,3]
 455 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
 456 ; X64-NEXT:    vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0]
 457 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[1,1,3,3]
 458 ; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
 459 ; X64-NEXT:    retq # encoding: [0xc3]
 460   %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
 461   %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
 462   %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
 463   %res3 = fadd <4 x float> %res, %res1
 464   %res4 = fadd <4 x float> %res2, %res3
 465   ret <4 x float> %res4
 466 }
 467
 468 declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
 469
 470 define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
 471 ; X86-LABEL: test_int_x86_avx512_mask_movshdup_256:
 472 ; X86:       # %bb.0:
 473 ; X86-NEXT:    vmovshdup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x16,0xd0]
 474 ; X86-NEXT:    # ymm2 = ymm0[1,1,3,3,5,5,7,7]
 475 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 476 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 477 ; X86-NEXT:    vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
 478 ; X86-NEXT:    # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
 479 ; X86-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
 480 ; X86-NEXT:    vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0]
 481 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
 482 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
 483 ; X86-NEXT:    retl # encoding: [0xc3]
 484 ;
 485 ; X64-LABEL: test_int_x86_avx512_mask_movshdup_256:
 486 ; X64:       # %bb.0:
 487 ; X64-NEXT:    vmovshdup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x16,0xd0]
 488 ; X64-NEXT:    # ymm2 = ymm0[1,1,3,3,5,5,7,7]
 489 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 490 ; X64-NEXT:    vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
 491 ; X64-NEXT:    # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
 492 ; X64-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
 493 ; X64-NEXT:    vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0]
 494 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
 495 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
 496 ; X64-NEXT:    retq # encoding: [0xc3]
 497   %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
 498   %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
 499   %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
 500   %res3 = fadd <8 x float> %res, %res1
 501   %res4 = fadd <8 x float> %res2, %res3
 502   ret <8 x float> %res4
 503 }
 504 declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
 505
 506 define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
 507 ; X86-LABEL: test_int_x86_avx512_mask_movddup_128:
 508 ; X86:       # %bb.0:
 509 ; X86-NEXT:    vmovddup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xd0]
 510 ; X86-NEXT:    # xmm2 = xmm0[0,0]
 511 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 512 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 513 ; X86-NEXT:    vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
 514 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0,0]
 515 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca]
 516 ; X86-NEXT:    vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0]
 517 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0]
 518 ; X86-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
 519 ; X86-NEXT:    retl # encoding: [0xc3]
 520 ;
 521 ; X64-LABEL: test_int_x86_avx512_mask_movddup_128:
 522 ; X64:       # %bb.0:
 523 ; X64-NEXT:    vmovddup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xd0]
 524 ; X64-NEXT:    # xmm2 = xmm0[0,0]
 525 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 526 ; X64-NEXT:    vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
 527 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0,0]
 528 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca]
 529 ; X64-NEXT:    vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0]
 530 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0]
 531 ; X64-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
 532 ; X64-NEXT:    retq # encoding: [0xc3]
 533   %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
 534   %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
 535   %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
 536   %res3 = fadd <2 x double> %res, %res1
 537   %res4 = fadd <2 x double> %res2, %res3
 538   ret <2 x double> %res4
 539 }
 540
 541 declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
 542
 543 define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
 544 ; X86-LABEL: test_int_x86_avx512_mask_movddup_256:
 545 ; X86:       # %bb.0:
 546 ; X86-NEXT:    vmovddup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x12,0xd0]
 547 ; X86-NEXT:    # ymm2 = ymm0[0,0,2,2]
 548 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 549 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 550 ; X86-NEXT:    vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
 551 ; X86-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2]
 552 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xca]
 553 ; X86-NEXT:    vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0]
 554 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2]
 555 ; X86-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
 556 ; X86-NEXT:    retl # encoding: [0xc3]
 557 ;
 558 ; X64-LABEL: test_int_x86_avx512_mask_movddup_256:
 559 ; X64:       # %bb.0:
 560 ; X64-NEXT:    vmovddup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x12,0xd0]
 561 ; X64-NEXT:    # ymm2 = ymm0[0,0,2,2]
 562 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 563 ; X64-NEXT:    vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
 564 ; X64-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2]
 565 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xca]
 566 ; X64-NEXT:    vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0]
 567 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2]
 568 ; X64-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
 569 ; X64-NEXT:    retq # encoding: [0xc3]
 570   %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
 571   %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
 572   %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
 573   %res3 = fadd <4 x double> %res, %res1
 574   %res4 = fadd <4 x double> %res2, %res3
 575   ret <4 x double> %res4
 576 }
 577
 578 declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
 579
 580 define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
 581 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
 582 ; X86:       # %bb.0:
 583 ; X86-NEXT:    vpermilpd $6, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xd0,0x06]
 584 ; X86-NEXT:    # ymm2 = ymm0[0,1,3,2]
 585 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 586 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 587 ; X86-NEXT:    vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06]
 588 ; X86-NEXT:    # ymm1 {%k1} = ymm0[0,1,3,2]
 589 ; X86-NEXT:    vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06]
 590 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,3,2]
 591 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
 592 ; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
 593 ; X86-NEXT:    retl # encoding: [0xc3]
 594 ;
 595 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
 596 ; X64:       # %bb.0:
 597 ; X64-NEXT:    vpermilpd $6, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xd0,0x06]
 598 ; X64-NEXT:    # ymm2 = ymm0[0,1,3,2]
 599 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 600 ; X64-NEXT:    vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06]
 601 ; X64-NEXT:    # ymm1 {%k1} = ymm0[0,1,3,2]
 602 ; X64-NEXT:    vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06]
 603 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,3,2]
 604 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
 605 ; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
 606 ; X64-NEXT:    retq # encoding: [0xc3]
 607   %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
 608   %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
 609   %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
 610   %res3 = fadd <4 x double> %res, %res1
 611   %res4 = fadd <4 x double> %res2, %res3
 612   ret <4 x double> %res4
 613 }
 614
 615 declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
 616
 617 define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
 618 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
 619 ; X86:       # %bb.0:
 620 ; X86-NEXT:    vpermilpd $1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xd0,0x01]
 621 ; X86-NEXT:    # xmm2 = xmm0[1,0]
 622 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 623 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 624 ; X86-NEXT:    vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
 625 ; X86-NEXT:    # xmm1 {%k1} = xmm0[1,0]
 626 ; X86-NEXT:    vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01]
 627 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[1,0]
 628 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
 629 ; X86-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
 630 ; X86-NEXT:    retl # encoding: [0xc3]
 631 ;
 632 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
 633 ; X64:       # %bb.0:
 634 ; X64-NEXT:    vpermilpd $1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xd0,0x01]
 635 ; X64-NEXT:    # xmm2 = xmm0[1,0]
 636 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 637 ; X64-NEXT:    vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
 638 ; X64-NEXT:    # xmm1 {%k1} = xmm0[1,0]
 639 ; X64-NEXT:    vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01]
 640 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[1,0]
 641 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
 642 ; X64-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
 643 ; X64-NEXT:    retq # encoding: [0xc3]
 644   %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
 645   %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
 646   %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
 647   %res3 = fadd <2 x double> %res, %res1
 648   %res4 = fadd <2 x double> %res3, %res2
 649   ret <2 x double> %res4
 650 }
 651
 652 declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
 653
 654 define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
 655 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
 656 ; X86:       # %bb.0:
 657 ; X86-NEXT:    vpermilps $22, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xd0,0x16]
 658 ; X86-NEXT:    # ymm2 = ymm0[2,1,1,0,6,5,5,4]
 659 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 660 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 661 ; X86-NEXT:    vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
 662 ; X86-NEXT:    # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
 663 ; X86-NEXT:    vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16]
 664 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
 665 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
 666 ; X86-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
 667 ; X86-NEXT:    retl # encoding: [0xc3]
 668 ;
 669 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
 670 ; X64:       # %bb.0:
 671 ; X64-NEXT:    vpermilps $22, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xd0,0x16]
 672 ; X64-NEXT:    # ymm2 = ymm0[2,1,1,0,6,5,5,4]
 673 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 674 ; X64-NEXT:    vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
 675 ; X64-NEXT:    # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
 676 ; X64-NEXT:    vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16]
 677 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
 678 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
 679 ; X64-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
 680 ; X64-NEXT:    retq # encoding: [0xc3]
 681   %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
 682   %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
 683   %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
 684   %res3 = fadd <8 x float> %res, %res1
 685   %res4 = fadd <8 x float> %res3, %res2
 686   ret <8 x float> %res4
 687 }
 688
 689 declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
 690
 691 define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
 692 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
 693 ; X86:       # %bb.0:
 694 ; X86-NEXT:    vpermilps $22, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xd0,0x16]
 695 ; X86-NEXT:    # xmm2 = xmm0[2,1,1,0]
 696 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 697 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 698 ; X86-NEXT:    vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
 699 ; X86-NEXT:    # xmm1 {%k1} = xmm0[2,1,1,0]
 700 ; X86-NEXT:    vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16]
 701 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[2,1,1,0]
 702 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 703 ; X86-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
 704 ; X86-NEXT:    retl # encoding: [0xc3]
 705 ;
 706 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
 707 ; X64:       # %bb.0:
 708 ; X64-NEXT:    vpermilps $22, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xd0,0x16]
 709 ; X64-NEXT:    # xmm2 = xmm0[2,1,1,0]
 710 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 711 ; X64-NEXT:    vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
 712 ; X64-NEXT:    # xmm1 {%k1} = xmm0[2,1,1,0]
 713 ; X64-NEXT:    vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16]
 714 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[2,1,1,0]
 715 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
 716 ; X64-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
 717 ; X64-NEXT:    retq # encoding: [0xc3]
 718   %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
 719   %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
 720   %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
 721   %res3 = fadd <4 x float> %res, %res1
 722   %res4 = fadd <4 x float> %res2, %res3
 723   ret <4 x float> %res4
 724 }
 725
 726 declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8)
 727
 728 define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) {
 729 ; X86-LABEL: test_int_x86_avx512_mask_perm_df_256:
 730 ; X86:       # %bb.0:
 731 ; X86-NEXT:    vpermpd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xd0,0x03]
 732 ; X86-NEXT:    # ymm2 = ymm0[3,0,0,0]
 733 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
 734 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 735 ; X86-NEXT:    vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
 736 ; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
 737 ; X86-NEXT:    vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03]
 738 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
 739 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
 740 ; X86-NEXT:    vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2]
 741 ; X86-NEXT:    retl # encoding: [0xc3]
 742 ;
 743 ; X64-LABEL: test_int_x86_avx512_mask_perm_df_256:
 744 ; X64:       # %bb.0:
 745 ; X64-NEXT:    vpermpd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xd0,0x03]
 746 ; X64-NEXT:    # ymm2 = ymm0[3,0,0,0]
 747 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 748 ; X64-NEXT:    vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
 749 ; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
 750 ; X64-NEXT:    vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03]
 751 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
 752 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
 753 ; X64-NEXT:    vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2]
 754 ; X64-NEXT:    retq # encoding: [0xc3]
 755   %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3)
 756   %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3)
 757   %res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1)
 758   %res3 = fadd <4 x double> %res, %res1
 759   %res4 = fadd <4 x double> %res3, %res2
 760   ret <4 x double> %res4
 761 }
 762
 763 declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8)
 764
 765 define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
 766 ; X86-LABEL: test_int_x86_avx512_mask_perm_di_256:
 767 ; X86:       # %bb.0:
 768 ; X86-NEXT:    vpermq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x00,0xd0,0x03]
 769 ; X86-NEXT:    # ymm2 = ymm0[3,0,0,0]
 770 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
 771 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 772 ; X86-NEXT:    vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
 773 ; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
 774 ; X86-NEXT:    vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03]
 775 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
 776 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
 777 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 778 ; X86-NEXT:    retl # encoding: [0xc3]
 779 ;
 780 ; X64-LABEL: test_int_x86_avx512_mask_perm_di_256:
 781 ; X64:       # %bb.0:
 782 ; X64-NEXT:    vpermq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x00,0xd0,0x03]
 783 ; X64-NEXT:    # ymm2 = ymm0[3,0,0,0]
 784 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
 785 ; X64-NEXT:    vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
 786 ; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
 787 ; X64-NEXT:    vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03]
 788 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
 789 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
 790 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
 791 ; X64-NEXT:    retq # encoding: [0xc3]
 792   %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
 793   %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
 794   %res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
 795   %res3 = add <4 x i64> %res, %res1
 796   %res4 = add <4 x i64> %res3, %res2
 797   ret <4 x i64> %res4
 798 }
 799
 800 declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8)
 801
 802 define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
 803 ; X86-LABEL: test_int_x86_avx512_mask_store_pd_128:
 804 ; X86:       # %bb.0:
 805 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
 806 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 807 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
 808 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 809 ; X86-NEXT:    vmovapd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x01]
 810 ; X86-NEXT:    vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
 811 ; X86-NEXT:    retl # encoding: [0xc3]
 812 ;
 813 ; X64-LABEL: test_int_x86_avx512_mask_store_pd_128:
 814 ; X64:       # %bb.0:
 815 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 816 ; X64-NEXT:    vmovapd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07]
 817 ; X64-NEXT:    vmovapd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x06]
 818 ; X64-NEXT:    retq # encoding: [0xc3]
 819   call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
 820   call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
 821   ret void
 822 }
 823
 824 declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8)
 825
 826 define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
 827 ; X86-LABEL: test_int_x86_avx512_mask_store_pd_256:
 828 ; X86:       # %bb.0:
 829 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
 830 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 831 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
 832 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 833 ; X86-NEXT:    vmovapd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x01]
 834 ; X86-NEXT:    vmovapd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x00]
 835 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 836 ; X86-NEXT:    retl # encoding: [0xc3]
 837 ;
 838 ; X64-LABEL: test_int_x86_avx512_mask_store_pd_256:
 839 ; X64:       # %bb.0:
 840 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 841 ; X64-NEXT:    vmovapd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07]
 842 ; X64-NEXT:    vmovapd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x06]
 843 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 844 ; X64-NEXT:    retq # encoding: [0xc3]
 845   call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
 846   call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
 847   ret void
 848 }
 849
 850 declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8)
 851
 852 define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
 853 ; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
 854 ; X86:       # %bb.0:
 855 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
 856 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 857 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
 858 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 859 ; X86-NEXT:    vmovupd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x01]
 860 ; X86-NEXT:    vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
 861 ; X86-NEXT:    retl # encoding: [0xc3]
 862 ;
 863 ; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
 864 ; X64:       # %bb.0:
 865 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 866 ; X64-NEXT:    vmovupd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07]
 867 ; X64-NEXT:    vmovupd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x06]
 868 ; X64-NEXT:    retq # encoding: [0xc3]
 869   call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
 870   call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
 871   ret void
 872 }
 873
 874 declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8)
 875
 876 define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
 877 ; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
 878 ; X86:       # %bb.0:
 879 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
 880 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 881 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
 882 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 883 ; X86-NEXT:    vmovupd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x01]
 884 ; X86-NEXT:    vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00]
 885 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 886 ; X86-NEXT:    retl # encoding: [0xc3]
 887 ;
 888 ; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
 889 ; X64:       # %bb.0:
 890 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 891 ; X64-NEXT:    vmovupd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07]
 892 ; X64-NEXT:    vmovupd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x06]
 893 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 894 ; X64-NEXT:    retq # encoding: [0xc3]
 895   call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
 896   call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
 897   ret void
 898 }
 899
 900 declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8)
 901
 902 define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
 903 ; X86-LABEL: test_int_x86_avx512_mask_store_ps_128:
 904 ; X86:       # %bb.0:
 905 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
 906 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 907 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
 908 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 909 ; X86-NEXT:    vmovaps %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x01]
 910 ; X86-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
 911 ; X86-NEXT:    retl # encoding: [0xc3]
 912 ;
 913 ; X64-LABEL: test_int_x86_avx512_mask_store_ps_128:
 914 ; X64:       # %bb.0:
 915 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 916 ; X64-NEXT:    vmovaps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07]
 917 ; X64-NEXT:    vmovaps %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x06]
 918 ; X64-NEXT:    retq # encoding: [0xc3]
 919     call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
 920     call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
 921     ret void
 922 }
 923
 924 declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8)
 925
 926 define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
 927 ; X86-LABEL: test_int_x86_avx512_mask_store_ps_256:
 928 ; X86:       # %bb.0:
 929 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
 930 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 931 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
 932 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 933 ; X86-NEXT:    vmovaps %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x01]
 934 ; X86-NEXT:    vmovaps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x00]
 935 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 936 ; X86-NEXT:    retl # encoding: [0xc3]
 937 ;
 938 ; X64-LABEL: test_int_x86_avx512_mask_store_ps_256:
 939 ; X64:       # %bb.0:
 940 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 941 ; X64-NEXT:    vmovaps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07]
 942 ; X64-NEXT:    vmovaps %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x06]
 943 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 944 ; X64-NEXT:    retq # encoding: [0xc3]
 945     call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
 946     call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
 947     ret void
 948 }
 949
 950 declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8)
 951
 952 define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
 953 ; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
 954 ; X86:       # %bb.0:
 955 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
 956 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 957 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
 958 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 959 ; X86-NEXT:    vmovups %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x01]
 960 ; X86-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
 961 ; X86-NEXT:    retl # encoding: [0xc3]
 962 ;
 963 ; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
 964 ; X64:       # %bb.0:
 965 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 966 ; X64-NEXT:    vmovups %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07]
 967 ; X64-NEXT:    vmovups %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x06]
 968 ; X64-NEXT:    retq # encoding: [0xc3]
 969     call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
 970     call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
 971     ret void
 972 }
 973
 974 declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8)
 975
 976 define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
 977 ; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
 978 ; X86:       # %bb.0:
 979 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
 980 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 981 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
 982 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 983 ; X86-NEXT:    vmovups %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x01]
 984 ; X86-NEXT:    vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
 985 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 986 ; X86-NEXT:    retl # encoding: [0xc3]
 987 ;
 988 ; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
 989 ; X64:       # %bb.0:
 990 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
 991 ; X64-NEXT:    vmovups %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07]
 992 ; X64-NEXT:    vmovups %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x06]
 993 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 994 ; X64-NEXT:    retq # encoding: [0xc3]
 995     call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
 996     call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
 997     ret void
 998 }
 999
1000 declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8)
1001
1002 define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
1003 ; X86-LABEL: test_int_x86_avx512_mask_storeu_q_128:
1004 ; X86:       # %bb.0:
1005 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1006 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1007 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1008 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1009 ; X86-NEXT:    vmovdqu64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x01]
1010 ; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
1011 ; X86-NEXT:    retl # encoding: [0xc3]
1012 ;
1013 ; X64-LABEL: test_int_x86_avx512_mask_storeu_q_128:
1014 ; X64:       # %bb.0:
1015 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1016 ; X64-NEXT:    vmovdqu64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07]
1017 ; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
1018 ; X64-NEXT:    retq # encoding: [0xc3]
1019   call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
1020   call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
1021   ret void
1022 }
1023
1024 declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8)
1025
1026 define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
1027 ; X86-LABEL: test_int_x86_avx512_mask_storeu_q_256:
1028 ; X86:       # %bb.0:
1029 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1030 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1031 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1032 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1033 ; X86-NEXT:    vmovdqu64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x01]
1034 ; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
1035 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1036 ; X86-NEXT:    retl # encoding: [0xc3]
1037 ;
1038 ; X64-LABEL: test_int_x86_avx512_mask_storeu_q_256:
1039 ; X64:       # %bb.0:
1040 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1041 ; X64-NEXT:    vmovdqu64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07]
1042 ; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
1043 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1044 ; X64-NEXT:    retq # encoding: [0xc3]
1045   call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
1046   call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
1047   ret void
1048 }
1049
1050 declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8)
1051
1052 define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
1053 ; X86-LABEL: test_int_x86_avx512_mask_storeu_d_128:
1054 ; X86:       # %bb.0:
1055 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1056 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1057 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1058 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1059 ; X86-NEXT:    vmovdqu32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x01]
1060 ; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
1061 ; X86-NEXT:    retl # encoding: [0xc3]
1062 ;
1063 ; X64-LABEL: test_int_x86_avx512_mask_storeu_d_128:
1064 ; X64:       # %bb.0:
1065 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1066 ; X64-NEXT:    vmovdqu32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07]
1067 ; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
1068 ; X64-NEXT:    retq # encoding: [0xc3]
1069   call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
1070   call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
1071   ret void
1072 }
1073
1074 declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8)
1075
1076 define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
1077 ; X86-LABEL: test_int_x86_avx512_mask_storeu_d_256:
1078 ; X86:       # %bb.0:
1079 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1080 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1081 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1082 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1083 ; X86-NEXT:    vmovdqu32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x01]
1084 ; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
1085 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1086 ; X86-NEXT:    retl # encoding: [0xc3]
1087 ;
1088 ; X64-LABEL: test_int_x86_avx512_mask_storeu_d_256:
1089 ; X64:       # %bb.0:
1090 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1091 ; X64-NEXT:    vmovdqu32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07]
1092 ; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
1093 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1094 ; X64-NEXT:    retq # encoding: [0xc3]
1095   call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
1096   call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
1097   ret void
1098 }
1099
1100 declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8)
1101
1102 define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
1103 ; X86-LABEL: test_int_x86_avx512_mask_store_q_128:
1104 ; X86:       # %bb.0:
1105 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1106 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1107 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1108 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1109 ; X86-NEXT:    vmovdqa64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x01]
1110 ; X86-NEXT:    vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00]
1111 ; X86-NEXT:    retl # encoding: [0xc3]
1112 ;
1113 ; X64-LABEL: test_int_x86_avx512_mask_store_q_128:
1114 ; X64:       # %bb.0:
1115 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1116 ; X64-NEXT:    vmovdqa64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07]
1117 ; X64-NEXT:    vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06]
1118 ; X64-NEXT:    retq # encoding: [0xc3]
1119   call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
1120   call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
1121   ret void
1122 }
1123
1124 declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8)
1125
1126 define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
1127 ; X86-LABEL: test_int_x86_avx512_mask_store_q_256:
1128 ; X86:       # %bb.0:
1129 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1130 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1131 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1132 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1133 ; X86-NEXT:    vmovdqa64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x01]
1134 ; X86-NEXT:    vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00]
1135 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1136 ; X86-NEXT:    retl # encoding: [0xc3]
1137 ;
1138 ; X64-LABEL: test_int_x86_avx512_mask_store_q_256:
1139 ; X64:       # %bb.0:
1140 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1141 ; X64-NEXT:    vmovdqa64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07]
1142 ; X64-NEXT:    vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06]
1143 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1144 ; X64-NEXT:    retq # encoding: [0xc3]
1145   call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
1146   call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
1147   ret void
1148 }
1149
1150 declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8)
1151
1152 define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
1153 ; X86-LABEL: test_int_x86_avx512_mask_store_d_128:
1154 ; X86:       # %bb.0:
1155 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1156 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1157 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1158 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1159 ; X86-NEXT:    vmovdqa32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x01]
1160 ; X86-NEXT:    vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00]
1161 ; X86-NEXT:    retl # encoding: [0xc3]
1162 ;
1163 ; X64-LABEL: test_int_x86_avx512_mask_store_d_128:
1164 ; X64:       # %bb.0:
1165 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1166 ; X64-NEXT:    vmovdqa32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07]
1167 ; X64-NEXT:    vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06]
1168 ; X64-NEXT:    retq # encoding: [0xc3]
1169   call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
1170   call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
1171   ret void
1172 }
1173
1174 declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8)
1175
1176 define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
1177 ; X86-LABEL: test_int_x86_avx512_mask_store_d_256:
1178 ; X86:       # %bb.0:
1179 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1180 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1181 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1182 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1183 ; X86-NEXT:    vmovdqa32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x01]
1184 ; X86-NEXT:    vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00]
1185 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1186 ; X86-NEXT:    retl # encoding: [0xc3]
1187 ;
1188 ; X64-LABEL: test_int_x86_avx512_mask_store_d_256:
1189 ; X64:       # %bb.0:
1190 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1191 ; X64-NEXT:    vmovdqa32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07]
1192 ; X64-NEXT:    vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06]
1193 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1194 ; X64-NEXT:    retq # encoding: [0xc3]
1195   call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
1196   call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
1197   ret void
1198 }
1199
1200 define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
1201 ; X86-LABEL: test_mask_load_aligned_ps_256:
1202 ; X86:       # %bb.0:
1203 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1204 ; X86-NEXT:    vmovaps (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x00]
1205 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1206 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1207 ; X86-NEXT:    vmovaps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x00]
1208 ; X86-NEXT:    vmovaps (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x08]
1209 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1210 ; X86-NEXT:    retl # encoding: [0xc3]
1211 ;
1212 ; X64-LABEL: test_mask_load_aligned_ps_256:
1213 ; X64:       # %bb.0:
1214 ; X64-NEXT:    vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
1215 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1216 ; X64-NEXT:    vmovaps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
1217 ; X64-NEXT:    vmovaps (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x0f]
1218 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1219 ; X64-NEXT:    retq # encoding: [0xc3]
1220   %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
1221   %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
1222   %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
1223   %res4 = fadd <8 x float> %res2, %res1
1224   ret <8 x float> %res4
1225 }
1226
1227 declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8)
1228
1229 define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
1230 ; X86-LABEL: test_mask_load_unaligned_ps_256:
1231 ; X86:       # %bb.0:
1232 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1233 ; X86-NEXT:    vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00]
1234 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1235 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1236 ; X86-NEXT:    vmovups (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x00]
1237 ; X86-NEXT:    vmovups (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x08]
1238 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1239 ; X86-NEXT:    retl # encoding: [0xc3]
1240 ;
1241 ; X64-LABEL: test_mask_load_unaligned_ps_256:
1242 ; X64:       # %bb.0:
1243 ; X64-NEXT:    vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
1244 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1245 ; X64-NEXT:    vmovups (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
1246 ; X64-NEXT:    vmovups (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x0f]
1247 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1248 ; X64-NEXT:    retq # encoding: [0xc3]
1249   %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
1250   %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
1251   %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
1252   %res4 = fadd <8 x float> %res2, %res1
1253   ret <8 x float> %res4
1254 }
1255
1256 declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8)
1257
1258 define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
1259 ; X86-LABEL: test_mask_load_aligned_pd_256:
1260 ; X86:       # %bb.0:
1261 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1262 ; X86-NEXT:    vmovapd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x00]
1263 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1264 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1265 ; X86-NEXT:    vmovapd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x00]
1266 ; X86-NEXT:    vmovapd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x08]
1267 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1268 ; X86-NEXT:    retl # encoding: [0xc3]
1269 ;
1270 ; X64-LABEL: test_mask_load_aligned_pd_256:
1271 ; X64:       # %bb.0:
1272 ; X64-NEXT:    vmovapd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x07]
1273 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1274 ; X64-NEXT:    vmovapd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
1275 ; X64-NEXT:    vmovapd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x0f]
1276 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1277 ; X64-NEXT:    retq # encoding: [0xc3]
1278   %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
1279   %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
1280   %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
1281   %res4 = fadd <4 x double> %res2, %res1
1282   ret <4 x double> %res4
1283 }
1284
1285 declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8)
1286
1287 define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
1288 ; X86-LABEL: test_mask_load_unaligned_pd_256:
1289 ; X86:       # %bb.0:
1290 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1291 ; X86-NEXT:    vmovupd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x00]
1292 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1293 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1294 ; X86-NEXT:    vmovupd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x00]
1295 ; X86-NEXT:    vmovupd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x08]
1296 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1297 ; X86-NEXT:    retl # encoding: [0xc3]
1298 ;
1299 ; X64-LABEL: test_mask_load_unaligned_pd_256:
1300 ; X64:       # %bb.0:
1301 ; X64-NEXT:    vmovupd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x07]
1302 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1303 ; X64-NEXT:    vmovupd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
1304 ; X64-NEXT:    vmovupd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x0f]
1305 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1306 ; X64-NEXT:    retq # encoding: [0xc3]
1307   %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
1308   %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
1309   %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
1310   %res4 = fadd <4 x double> %res2, %res1
1311   ret <4 x double> %res4
1312 }
1313
1314 declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8)
1315
1316 define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
1317 ; X86-LABEL: test_mask_load_aligned_ps_128:
1318 ; X86:       # %bb.0:
1319 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1320 ; X86-NEXT:    vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
1321 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1322 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1323 ; X86-NEXT:    vmovaps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x00]
1324 ; X86-NEXT:    vmovaps (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x08]
1325 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1326 ; X86-NEXT:    retl # encoding: [0xc3]
1327 ;
1328 ; X64-LABEL: test_mask_load_aligned_ps_128:
1329 ; X64:       # %bb.0:
1330 ; X64-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
1331 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1332 ; X64-NEXT:    vmovaps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
1333 ; X64-NEXT:    vmovaps (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x0f]
1334 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1335 ; X64-NEXT:    retq # encoding: [0xc3]
1336   %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
1337   %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
1338   %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
1339   %res4 = fadd <4 x float> %res2, %res1
1340   ret <4 x float> %res4
1341 }
1342
1343 declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8)
1344
1345 define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
1346 ; X86-LABEL: test_mask_load_unaligned_ps_128:
1347 ; X86:       # %bb.0:
1348 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1349 ; X86-NEXT:    vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
1350 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1351 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1352 ; X86-NEXT:    vmovups (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x00]
1353 ; X86-NEXT:    vmovups (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x08]
1354 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1355 ; X86-NEXT:    retl # encoding: [0xc3]
1356 ;
1357 ; X64-LABEL: test_mask_load_unaligned_ps_128:
1358 ; X64:       # %bb.0:
1359 ; X64-NEXT:    vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
1360 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1361 ; X64-NEXT:    vmovups (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
1362 ; X64-NEXT:    vmovups (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x0f]
1363 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1364 ; X64-NEXT:    retq # encoding: [0xc3]
1365   %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
1366   %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
1367   %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
1368   %res4 = fadd <4 x float> %res2, %res1
1369   ret <4 x float> %res4
1370 }
1371
1372 declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8)
1373
1374 define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
1375 ; X86-LABEL: test_mask_load_aligned_pd_128:
1376 ; X86:       # %bb.0:
1377 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1378 ; X86-NEXT:    vmovapd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
1379 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1380 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1381 ; X86-NEXT:    vmovapd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x00]
1382 ; X86-NEXT:    vmovapd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x08]
1383 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1384 ; X86-NEXT:    retl # encoding: [0xc3]
1385 ;
1386 ; X64-LABEL: test_mask_load_aligned_pd_128:
1387 ; X64:       # %bb.0:
1388 ; X64-NEXT:    vmovapd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07]
1389 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1390 ; X64-NEXT:    vmovapd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
1391 ; X64-NEXT:    vmovapd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x0f]
1392 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1393 ; X64-NEXT:    retq # encoding: [0xc3]
1394   %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
1395   %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
1396   %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
1397   %res4 = fadd <2 x double> %res2, %res1
1398   ret <2 x double> %res4
1399 }
1400
1401 declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8)
1402
1403 define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
1404 ; X86-LABEL: test_mask_load_unaligned_pd_128:
1405 ; X86:       # %bb.0:
1406 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1407 ; X86-NEXT:    vmovupd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x00]
1408 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1409 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1410 ; X86-NEXT:    vmovupd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x00]
1411 ; X86-NEXT:    vmovupd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x08]
1412 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1413 ; X86-NEXT:    retl # encoding: [0xc3]
1414 ;
1415 ; X64-LABEL: test_mask_load_unaligned_pd_128:
1416 ; X64:       # %bb.0:
1417 ; X64-NEXT:    vmovupd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x07]
1418 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1419 ; X64-NEXT:    vmovupd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
1420 ; X64-NEXT:    vmovupd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x0f]
1421 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1422 ; X64-NEXT:    retq # encoding: [0xc3]
1423   %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
1424   %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
1425   %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
1426   %res4 = fadd <2 x double> %res2, %res1
1427   ret <2 x double> %res4
1428 }
1429
1430 declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8)
1431
1432 declare <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8*, <4 x i32>, i8)
1433
1434 define <4 x i32> @test_mask_load_unaligned_d_128(i8* %ptr, i8* %ptr2, <4 x i32> %data, i8 %mask) {
1435 ; X86-LABEL: test_mask_load_unaligned_d_128:
1436 ; X86:       # %bb.0:
1437 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1438 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1439 ; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
1440 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1441 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1442 ; X86-NEXT:    vmovdqu32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x00]
1443 ; X86-NEXT:    vmovdqu32 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x09]
1444 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1445 ; X86-NEXT:    retl # encoding: [0xc3]
1446 ;
1447 ; X64-LABEL: test_mask_load_unaligned_d_128:
1448 ; X64:       # %bb.0:
1449 ; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
1450 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1451 ; X64-NEXT:    vmovdqu32 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x06]
1452 ; X64-NEXT:    vmovdqu32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x0f]
1453 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1454 ; X64-NEXT:    retq # encoding: [0xc3]
1455   %res = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
1456   %res1 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr2, <4 x i32> %res, i8 %mask)
1457   %res2 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
1458   %res4 = add <4 x i32> %res2, %res1
1459   ret <4 x i32> %res4
1460 }
1461
1462 declare <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8*, <8 x i32>, i8)
1463
1464 define <8 x i32> @test_mask_load_unaligned_d_256(i8* %ptr, i8* %ptr2, <8 x i32> %data, i8 %mask) {
1465 ; X86-LABEL: test_mask_load_unaligned_d_256:
1466 ; X86:       # %bb.0:
1467 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1468 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1469 ; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
1470 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1471 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1472 ; X86-NEXT:    vmovdqu32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x00]
1473 ; X86-NEXT:    vmovdqu32 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x09]
1474 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1475 ; X86-NEXT:    retl # encoding: [0xc3]
1476 ;
1477 ; X64-LABEL: test_mask_load_unaligned_d_256:
1478 ; X64:       # %bb.0:
1479 ; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
1480 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1481 ; X64-NEXT:    vmovdqu32 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x06]
1482 ; X64-NEXT:    vmovdqu32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x0f]
1483 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1484 ; X64-NEXT:    retq # encoding: [0xc3]
1485   %res = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
1486   %res1 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr2, <8 x i32> %res, i8 %mask)
1487   %res2 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
1488   %res4 = add <8 x i32> %res2, %res1
1489   ret <8 x i32> %res4
1490 }
1491
1492 declare <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8*, <2 x i64>, i8)
1493
1494 define <2 x i64> @test_mask_load_unaligned_q_128(i8* %ptr, i8* %ptr2, <2 x i64> %data, i8 %mask) {
1495 ; X86-LABEL: test_mask_load_unaligned_q_128:
1496 ; X86:       # %bb.0:
1497 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1498 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1499 ; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
1500 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1501 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1502 ; X86-NEXT:    vmovdqu64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x00]
1503 ; X86-NEXT:    vmovdqu64 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x09]
1504 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1505 ; X86-NEXT:    retl # encoding: [0xc3]
1506 ;
1507 ; X64-LABEL: test_mask_load_unaligned_q_128:
1508 ; X64:       # %bb.0:
1509 ; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
1510 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1511 ; X64-NEXT:    vmovdqu64 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x06]
1512 ; X64-NEXT:    vmovdqu64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x0f]
1513 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1514 ; X64-NEXT:    retq # encoding: [0xc3]
1515   %res = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
1516   %res1 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr2, <2 x i64> %res, i8 %mask)
1517   %res2 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
1518   %res4 = add <2 x i64> %res2, %res1
1519   ret <2 x i64> %res4
1520 }
1521
1522 declare <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8*, <4 x i64>, i8)
1523
1524 define <4 x i64> @test_mask_load_unaligned_q_256(i8* %ptr, i8* %ptr2, <4 x i64> %data, i8 %mask) {
1525 ; X86-LABEL: test_mask_load_unaligned_q_256:
1526 ; X86:       # %bb.0:
1527 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1528 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1529 ; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
1530 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1531 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1532 ; X86-NEXT:    vmovdqu64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x00]
1533 ; X86-NEXT:    vmovdqu64 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x09]
1534 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1535 ; X86-NEXT:    retl # encoding: [0xc3]
1536 ;
1537 ; X64-LABEL: test_mask_load_unaligned_q_256:
1538 ; X64:       # %bb.0:
1539 ; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
1540 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1541 ; X64-NEXT:    vmovdqu64 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x06]
1542 ; X64-NEXT:    vmovdqu64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x0f]
1543 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1544 ; X64-NEXT:    retq # encoding: [0xc3]
1545   %res = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
1546   %res1 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr2, <4 x i64> %res, i8 %mask)
1547   %res2 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
1548   %res4 = add <4 x i64> %res2, %res1
1549   ret <4 x i64> %res4
1550 }
1551
1552 declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8)
1553
1554 define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) {
1555 ; X86-LABEL: test_mask_load_aligned_d_128:
1556 ; X86:       # %bb.0:
1557 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1558 ; X86-NEXT:    vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00]
1559 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1560 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1561 ; X86-NEXT:    vmovdqa32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x00]
1562 ; X86-NEXT:    vmovdqa32 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x08]
1563 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1564 ; X86-NEXT:    retl # encoding: [0xc3]
1565 ;
1566 ; X64-LABEL: test_mask_load_aligned_d_128:
1567 ; X64:       # %bb.0:
1568 ; X64-NEXT:    vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07]
1569 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1570 ; X64-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
1571 ; X64-NEXT:    vmovdqa32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x0f]
1572 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1573 ; X64-NEXT:    retq # encoding: [0xc3]
1574   %res = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
1575   %res1 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> %res, i8 %mask)
1576   %res2 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
1577   %res4 = add <4 x i32> %res2, %res1
1578   ret <4 x i32> %res4
1579 }
1580
1581 declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8)
1582
1583 define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) {
1584 ; X86-LABEL: test_mask_load_aligned_d_256:
1585 ; X86:       # %bb.0:
1586 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1587 ; X86-NEXT:    vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00]
1588 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1589 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1590 ; X86-NEXT:    vmovdqa32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x00]
1591 ; X86-NEXT:    vmovdqa32 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x08]
1592 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1593 ; X86-NEXT:    retl # encoding: [0xc3]
1594 ;
1595 ; X64-LABEL: test_mask_load_aligned_d_256:
1596 ; X64:       # %bb.0:
1597 ; X64-NEXT:    vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07]
1598 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1599 ; X64-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
1600 ; X64-NEXT:    vmovdqa32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x0f]
1601 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1602 ; X64-NEXT:    retq # encoding: [0xc3]
1603   %res = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
1604   %res1 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> %res, i8 %mask)
1605   %res2 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
1606   %res4 = add <8 x i32> %res2, %res1
1607   ret <8 x i32> %res4
1608 }
1609
1610 declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8)
1611
1612 define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) {
1613 ; X86-LABEL: test_mask_load_aligned_q_128:
1614 ; X86:       # %bb.0:
1615 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1616 ; X86-NEXT:    vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00]
1617 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1618 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1619 ; X86-NEXT:    vmovdqa64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x00]
1620 ; X86-NEXT:    vmovdqa64 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x08]
1621 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1622 ; X86-NEXT:    retl # encoding: [0xc3]
1623 ;
1624 ; X64-LABEL: test_mask_load_aligned_q_128:
1625 ; X64:       # %bb.0:
1626 ; X64-NEXT:    vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07]
1627 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1628 ; X64-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
1629 ; X64-NEXT:    vmovdqa64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x0f]
1630 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1631 ; X64-NEXT:    retq # encoding: [0xc3]
1632   %res = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
1633   %res1 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> %res, i8 %mask)
1634   %res2 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
1635   %res4 = add <2 x i64> %res2, %res1
1636   ret <2 x i64> %res4
1637 }
1638
1639 declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8)
1640
1641 define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) {
1642 ; X86-LABEL: test_mask_load_aligned_q_256:
1643 ; X86:       # %bb.0:
1644 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1645 ; X86-NEXT:    vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00]
1646 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1647 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1648 ; X86-NEXT:    vmovdqa64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x00]
1649 ; X86-NEXT:    vmovdqa64 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x08]
1650 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1651 ; X86-NEXT:    retl # encoding: [0xc3]
1652 ;
1653 ; X64-LABEL: test_mask_load_aligned_q_256:
1654 ; X64:       # %bb.0:
1655 ; X64-NEXT:    vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07]
1656 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1657 ; X64-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
1658 ; X64-NEXT:    vmovdqa64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x0f]
1659 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1660 ; X64-NEXT:    retq # encoding: [0xc3]
1661   %res = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
1662   %res1 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> %res, i8 %mask)
1663   %res2 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
1664   %res4 = add <4 x i64> %res2, %res1
1665   ret <4 x i64> %res4
1666 }
1667
1668 declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i32, <4 x i32>, i8)
1669
1670 define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
1671 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_128:
1672 ; X86:       # %bb.0:
1673 ; X86-NEXT:    vpshufd $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x70,0xd0,0x03]
1674 ; X86-NEXT:    # xmm2 = xmm0[3,0,0,0]
1675 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1676 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1677 ; X86-NEXT:    vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03]
1678 ; X86-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0]
1679 ; X86-NEXT:    vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03]
1680 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0]
1681 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
1682 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1683 ; X86-NEXT:    retl # encoding: [0xc3]
1684 ;
1685 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_128:
1686 ; X64:       # %bb.0:
1687 ; X64-NEXT:    vpshufd $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x70,0xd0,0x03]
1688 ; X64-NEXT:    # xmm2 = xmm0[3,0,0,0]
1689 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1690 ; X64-NEXT:    vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03]
1691 ; X64-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0]
1692 ; X64-NEXT:    vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03]
1693 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0]
1694 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
1695 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1696 ; X64-NEXT:    retq # encoding: [0xc3]
1697         %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
1698         %res1 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
1699         %res2 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
1700         %res3 = add <4 x i32> %res, %res1
1701         %res4 = add <4 x i32> %res3, %res2
1702         ret <4 x i32> %res4
1703 }
1704
1705 declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i32, <8 x i32>, i8)
1706
1707 define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
1708 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_256:
1709 ; X86:       # %bb.0:
1710 ; X86-NEXT:    vpshufd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x70,0xd0,0x03]
1711 ; X86-NEXT:    # ymm2 = ymm0[3,0,0,0,7,4,4,4]
1712 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1713 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1714 ; X86-NEXT:    vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03]
1715 ; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4]
1716 ; X86-NEXT:    vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03]
1717 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4]
1718 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
1719 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1720 ; X86-NEXT:    retl # encoding: [0xc3]
1721 ;
1722 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_256:
1723 ; X64:       # %bb.0:
1724 ; X64-NEXT:    vpshufd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x70,0xd0,0x03]
1725 ; X64-NEXT:    # ymm2 = ymm0[3,0,0,0,7,4,4,4]
1726 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1727 ; X64-NEXT:    vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03]
1728 ; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4]
1729 ; X64-NEXT:    vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03]
1730 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4]
1731 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
1732 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1733 ; X64-NEXT:    retq # encoding: [0xc3]
1734         %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
1735         %res1 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
1736         %res2 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
1737         %res3 = add <8 x i32> %res, %res1
1738         %res4 = add <8 x i32> %res3, %res2
1739         ret <8 x i32> %res4
1740 }
1741
1742 define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
1743 ; CHECK-LABEL: test_pcmpeq_d_256:
1744 ; CHECK:       # %bb.0:
1745 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
1746 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1747 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1748 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1749 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1750   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
1751   ret i8 %res
1752 }
1753
1754 define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1755 ; X86-LABEL: test_mask_pcmpeq_d_256:
1756 ; X86:       # %bb.0:
1757 ; X86-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
1758 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1759 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
1760 ; X86-NEXT:    # kill: def $al killed $al killed $eax
1761 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1762 ; X86-NEXT:    retl # encoding: [0xc3]
1763 ;
1764 ; X64-LABEL: test_mask_pcmpeq_d_256:
1765 ; X64:       # %bb.0:
1766 ; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
1767 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1768 ; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
1769 ; X64-NEXT:    # kill: def $al killed $al killed $eax
1770 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1771 ; X64-NEXT:    retq # encoding: [0xc3]
1772   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
1773   ret i8 %res
1774 }
1775
1776 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
1777
1778 define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
1779 ; CHECK-LABEL: test_pcmpeq_q_256:
1780 ; CHECK:       # %bb.0:
1781 ; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
1782 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1783 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1784 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1785 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1786   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
1787   ret i8 %res
1788 }
1789
1790 define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
1791 ; X86-LABEL: test_mask_pcmpeq_q_256:
1792 ; X86:       # %bb.0:
1793 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1794 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1795 ; X86-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
1796 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1797 ; X86-NEXT:    # kill: def $al killed $al killed $eax
1798 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1799 ; X86-NEXT:    retl # encoding: [0xc3]
1800 ;
1801 ; X64-LABEL: test_mask_pcmpeq_q_256:
1802 ; X64:       # %bb.0:
1803 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1804 ; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
1805 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1806 ; X64-NEXT:    # kill: def $al killed $al killed $eax
1807 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1808 ; X64-NEXT:    retq # encoding: [0xc3]
1809   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
1810   ret i8 %res
1811 }
1812
1813 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
1814
1815 define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
1816 ; CHECK-LABEL: test_pcmpgt_d_256:
1817 ; CHECK:       # %bb.0:
1818 ; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
1819 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1820 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1821 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1822 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1823   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
1824   ret i8 %res
1825 }
1826
1827 define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1828 ; X86-LABEL: test_mask_pcmpgt_d_256:
1829 ; X86:       # %bb.0:
1830 ; X86-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
1831 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1832 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
1833 ; X86-NEXT:    # kill: def $al killed $al killed $eax
1834 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1835 ; X86-NEXT:    retl # encoding: [0xc3]
1836 ;
1837 ; X64-LABEL: test_mask_pcmpgt_d_256:
1838 ; X64:       # %bb.0:
1839 ; X64-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
1840 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1841 ; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
1842 ; X64-NEXT:    # kill: def $al killed $al killed $eax
1843 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1844 ; X64-NEXT:    retq # encoding: [0xc3]
1845   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
1846   ret i8 %res
1847 }
1848
1849 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
1850
1851 define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
1852 ; CHECK-LABEL: test_pcmpgt_q_256:
1853 ; CHECK:       # %bb.0:
1854 ; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
1855 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1856 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1857 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1858 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1859   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
1860   ret i8 %res
1861 }
1862
1863 define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
1864 ; X86-LABEL: test_mask_pcmpgt_q_256:
1865 ; X86:       # %bb.0:
1866 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1867 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1868 ; X86-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
1869 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1870 ; X86-NEXT:    # kill: def $al killed $al killed $eax
1871 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1872 ; X86-NEXT:    retl # encoding: [0xc3]
1873 ;
1874 ; X64-LABEL: test_mask_pcmpgt_q_256:
1875 ; X64:       # %bb.0:
1876 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1877 ; X64-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
1878 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1879 ; X64-NEXT:    # kill: def $al killed $al killed $eax
1880 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1881 ; X64-NEXT:    retq # encoding: [0xc3]
1882   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
1883   ret i8 %res
1884 }
1885
1886 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
1887
1888 define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
1889 ; CHECK-LABEL: test_pcmpeq_d_128:
1890 ; CHECK:       # %bb.0:
1891 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
1892 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1893 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1894 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1895   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
1896   ret i8 %res
1897 }
1898
1899 define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1900 ; X86-LABEL: test_mask_pcmpeq_d_128:
1901 ; X86:       # %bb.0:
1902 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1903 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1904 ; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
1905 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1906 ; X86-NEXT:    # kill: def $al killed $al killed $eax
1907 ; X86-NEXT:    retl # encoding: [0xc3]
1908 ;
1909 ; X64-LABEL: test_mask_pcmpeq_d_128:
1910 ; X64:       # %bb.0:
1911 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1912 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
1913 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1914 ; X64-NEXT:    # kill: def $al killed $al killed $eax
1915 ; X64-NEXT:    retq # encoding: [0xc3]
1916   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
1917   ret i8 %res
1918 }
1919
1920 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
1921
1922 define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
1923 ; CHECK-LABEL: test_pcmpeq_q_128:
1924 ; CHECK:       # %bb.0:
1925 ; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
1926 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1927 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1928 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1929   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
1930   ret i8 %res
1931 }
1932
1933 define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
1934 ; X86-LABEL: test_mask_pcmpeq_q_128:
1935 ; X86:       # %bb.0:
1936 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1937 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1938 ; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
1939 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1940 ; X86-NEXT:    # kill: def $al killed $al killed $eax
1941 ; X86-NEXT:    retl # encoding: [0xc3]
1942 ;
1943 ; X64-LABEL: test_mask_pcmpeq_q_128:
1944 ; X64:       # %bb.0:
1945 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1946 ; X64-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
1947 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1948 ; X64-NEXT:    # kill: def $al killed $al killed $eax
1949 ; X64-NEXT:    retq # encoding: [0xc3]
1950   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
1951   ret i8 %res
1952 }
1953
1954 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
1955
1956 define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
1957 ; CHECK-LABEL: test_pcmpgt_d_128:
1958 ; CHECK:       # %bb.0:
1959 ; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
1960 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1961 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1962 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1963   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
1964   ret i8 %res
1965 }
1966
1967 define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1968 ; X86-LABEL: test_mask_pcmpgt_d_128:
1969 ; X86:       # %bb.0:
1970 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1971 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1972 ; X86-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
1973 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1974 ; X86-NEXT:    # kill: def $al killed $al killed $eax
1975 ; X86-NEXT:    retl # encoding: [0xc3]
1976 ;
1977 ; X64-LABEL: test_mask_pcmpgt_d_128:
1978 ; X64:       # %bb.0:
1979 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1980 ; X64-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
1981 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1982 ; X64-NEXT:    # kill: def $al killed $al killed $eax
1983 ; X64-NEXT:    retq # encoding: [0xc3]
1984   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
1985   ret i8 %res
1986 }
1987
1988 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
1989
1990 define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
1991 ; CHECK-LABEL: test_pcmpgt_q_128:
1992 ; CHECK:       # %bb.0:
1993 ; CHECK-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
1994 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1995 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1996 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1997   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
1998   ret i8 %res
1999 }
2000
2001 define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
2002 ; X86-LABEL: test_mask_pcmpgt_q_128:
2003 ; X86:       # %bb.0:
2004 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2005 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2006 ; X86-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
2007 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2008 ; X86-NEXT:    # kill: def $al killed $al killed $eax
2009 ; X86-NEXT:    retl # encoding: [0xc3]
2010 ;
2011 ; X64-LABEL: test_mask_pcmpgt_q_128:
2012 ; X64:       # %bb.0:
2013 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2014 ; X64-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
2015 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2016 ; X64-NEXT:    # kill: def $al killed $al killed $eax
2017 ; X64-NEXT:    retq # encoding: [0xc3]
2018   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
2019   ret i8 %res
2020 }
2021
2022 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
2023
2024 declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
2025
2026 define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
2027 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
2028 ; X86:       # %bb.0:
2029 ; X86-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xd9]
2030 ; X86-NEXT:    # xmm3 = xmm0[1],xmm1[1]
2031 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2032 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2033 ; X86-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1]
2034 ; X86-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
2035 ; X86-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
2036 ; X86-NEXT:    retl # encoding: [0xc3]
2037 ;
2038 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
2039 ; X64:       # %bb.0:
2040 ; X64-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xd9]
2041 ; X64-NEXT:    # xmm3 = xmm0[1],xmm1[1]
2042 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2043 ; X64-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1]
2044 ; X64-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
2045 ; X64-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
2046 ; X64-NEXT:    retq # encoding: [0xc3]
2047   %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
2048   %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
2049   %res2 = fadd <2 x double> %res, %res1
2050   ret <2 x double> %res2
2051 }
2052
2053 declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
2054
2055 define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
2056 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
2057 ; X86:       # %bb.0:
2058 ; X86-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xd9]
2059 ; X86-NEXT:    # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2060 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2061 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2062 ; X86-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1]
2063 ; X86-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2064 ; X86-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
2065 ; X86-NEXT:    retl # encoding: [0xc3]
2066 ;
2067 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
2068 ; X64:       # %bb.0:
2069 ; X64-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xd9]
2070 ; X64-NEXT:    # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2071 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2072 ; X64-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1]
2073 ; X64-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2074 ; X64-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
2075 ; X64-NEXT:    retq # encoding: [0xc3]
2076   %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
2077   %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
2078   %res2 = fadd <4 x double> %res, %res1
2079   ret <4 x double> %res2
2080 }
2081
2082 declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2083
2084 define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
2085 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
2086 ; X86:       # %bb.0:
2087 ; X86-NEXT:    vunpckhps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xd9]
2088 ; X86-NEXT:    # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2089 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2090 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2091 ; X86-NEXT:    vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1]
2092 ; X86-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2093 ; X86-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
2094 ; X86-NEXT:    retl # encoding: [0xc3]
2095 ;
2096 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
2097 ; X64:       # %bb.0:
2098 ; X64-NEXT:    vunpckhps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xd9]
2099 ; X64-NEXT:    # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2100 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2101 ; X64-NEXT:    vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1]
2102 ; X64-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2103 ; X64-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
2104 ; X64-NEXT:    retq # encoding: [0xc3]
2105   %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
2106   %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
2107   %res2 = fadd <4 x float> %res, %res1
2108   ret <4 x float> %res2
2109 }
2110
2111 declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2112
2113 define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
2114 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
2115 ; X86:       # %bb.0:
2116 ; X86-NEXT:    vunpckhps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xd9]
2117 ; X86-NEXT:    # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2118 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2119 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2120 ; X86-NEXT:    vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1]
2121 ; X86-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2122 ; X86-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
2123 ; X86-NEXT:    retl # encoding: [0xc3]
2124 ;
2125 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
2126 ; X64:       # %bb.0:
2127 ; X64-NEXT:    vunpckhps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xd9]
2128 ; X64-NEXT:    # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2129 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2130 ; X64-NEXT:    vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1]
2131 ; X64-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2132 ; X64-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
2133 ; X64-NEXT:    retq # encoding: [0xc3]
2134   %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
2135   %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
2136   %res2 = fadd <8 x float> %res, %res1
2137   ret <8 x float> %res2
2138 }
2139
2140 declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
2141
2142 define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
2143 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
2144 ; X86:       # %bb.0:
2145 ; X86-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x14,0xd9]
2146 ; X86-NEXT:    # xmm3 = xmm0[0],xmm1[0]
2147 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2148 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2149 ; X86-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1]
2150 ; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
2151 ; X86-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
2152 ; X86-NEXT:    retl # encoding: [0xc3]
2153 ;
2154 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
2155 ; X64:       # %bb.0:
2156 ; X64-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x14,0xd9]
2157 ; X64-NEXT:    # xmm3 = xmm0[0],xmm1[0]
2158 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2159 ; X64-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1]
2160 ; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
2161 ; X64-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
2162 ; X64-NEXT:    retq # encoding: [0xc3]
2163   %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
2164   %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
2165   %res2 = fadd <2 x double> %res, %res1
2166   ret <2 x double> %res2
2167 }
2168
2169 declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
2170
2171 define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
2172 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
2173 ; X86:       # %bb.0:
2174 ; X86-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xd9]
2175 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2176 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2177 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2178 ; X86-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1]
2179 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2180 ; X86-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
2181 ; X86-NEXT:    retl # encoding: [0xc3]
2182 ;
2183 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
2184 ; X64:       # %bb.0:
2185 ; X64-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xd9]
2186 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2187 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2188 ; X64-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1]
2189 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2190 ; X64-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
2191 ; X64-NEXT:    retq # encoding: [0xc3]
2192   %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
2193   %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
2194   %res2 = fadd <4 x double> %res, %res1
2195   ret <4 x double> %res2
2196 }
2197
2198 declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2199
2200 define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
2201 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
2202 ; X86:       # %bb.0:
2203 ; X86-NEXT:    vunpcklps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xd9]
2204 ; X86-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2205 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2206 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2207 ; X86-NEXT:    vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1]
2208 ; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2209 ; X86-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
2210 ; X86-NEXT:    retl # encoding: [0xc3]
2211 ;
2212 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
2213 ; X64:       # %bb.0:
2214 ; X64-NEXT:    vunpcklps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xd9]
2215 ; X64-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2216 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2217 ; X64-NEXT:    vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1]
2218 ; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2219 ; X64-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
2220 ; X64-NEXT:    retq # encoding: [0xc3]
2221   %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
2222   %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
2223   %res2 = fadd <4 x float> %res, %res1
2224   ret <4 x float> %res2
2225 }
2226
2227 declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2228
2229 define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
2230 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
2231 ; X86:       # %bb.0:
2232 ; X86-NEXT:    vunpcklps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xd9]
2233 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2234 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2235 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2236 ; X86-NEXT:    vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1]
2237 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2238 ; X86-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
2239 ; X86-NEXT:    retl # encoding: [0xc3]
2240 ;
2241 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
2242 ; X64:       # %bb.0:
2243 ; X64-NEXT:    vunpcklps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xd9]
2244 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2245 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2246 ; X64-NEXT:    vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1]
2247 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2248 ; X64-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
2249 ; X64-NEXT:    retq # encoding: [0xc3]
2250   %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
2251   %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
2252   %res2 = fadd <8 x float> %res, %res1
2253   ret <8 x float> %res2
2254 }
2255
2256 declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2257
2258 define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2259 ; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
2260 ; X86:       # %bb.0:
2261 ; X86-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6a,0xd9]
2262 ; X86-NEXT:    # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2263 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2264 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2265 ; X86-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1]
2266 ; X86-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2267 ; X86-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
2268 ; X86-NEXT:    retl # encoding: [0xc3]
2269 ;
2270 ; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
2271 ; X64:       # %bb.0:
2272 ; X64-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6a,0xd9]
2273 ; X64-NEXT:    # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2274 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2275 ; X64-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1]
2276 ; X64-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2277 ; X64-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
2278 ; X64-NEXT:    retq # encoding: [0xc3]
2279   %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2280   %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2281   %res2 = add <4 x i32> %res, %res1
2282   ret <4 x i32> %res2
2283 }
2284
2285 declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2286
2287 define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2288 ; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
2289 ; X86:       # %bb.0:
2290 ; X86-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x62,0xd9]
2291 ; X86-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2292 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2293 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2294 ; X86-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1]
2295 ; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2296 ; X86-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
2297 ; X86-NEXT:    retl # encoding: [0xc3]
2298 ;
2299 ; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
2300 ; X64:       # %bb.0:
2301 ; X64-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x62,0xd9]
2302 ; X64-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2303 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2304 ; X64-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1]
2305 ; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2306 ; X64-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
2307 ; X64-NEXT:    retq # encoding: [0xc3]
2308   %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2309   %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2310   %res2 = add <4 x i32> %res, %res1
2311   ret <4 x i32> %res2
2312 }
2313
2314 declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2315
2316 define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2317 ; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
2318 ; X86:       # %bb.0:
2319 ; X86-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6a,0xd9]
2320 ; X86-NEXT:    # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2321 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2322 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2323 ; X86-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1]
2324 ; X86-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2325 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
2326 ; X86-NEXT:    retl # encoding: [0xc3]
2327 ;
2328 ; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
2329 ; X64:       # %bb.0:
2330 ; X64-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6a,0xd9]
2331 ; X64-NEXT:    # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2332 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2333 ; X64-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1]
2334 ; X64-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2335 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
2336 ; X64-NEXT:    retq # encoding: [0xc3]
2337   %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2338   %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2339   %res2 = add <8 x i32> %res, %res1
2340   ret <8 x i32> %res2
2341 }
2342
2343 declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2344
2345 define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2346 ; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
2347 ; X86:       # %bb.0:
2348 ; X86-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x62,0xd9]
2349 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2350 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2351 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2352 ; X86-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1]
2353 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2354 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
2355 ; X86-NEXT:    retl # encoding: [0xc3]
2356 ;
2357 ; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
2358 ; X64:       # %bb.0:
2359 ; X64-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x62,0xd9]
2360 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2361 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2362 ; X64-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1]
2363 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2364 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
2365 ; X64-NEXT:    retq # encoding: [0xc3]
2366   %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2367   %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2368   %res2 = add <8 x i32> %res, %res1
2369   ret <8 x i32> %res2
2370 }
2371
2372 declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2373
2374 define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2375 ; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
2376 ; X86:       # %bb.0:
2377 ; X86-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6d,0xd9]
2378 ; X86-NEXT:    # xmm3 = xmm0[1],xmm1[1]
2379 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2380 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2381 ; X86-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1]
2382 ; X86-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
2383 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
2384 ; X86-NEXT:    retl # encoding: [0xc3]
2385 ;
2386 ; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
2387 ; X64:       # %bb.0:
2388 ; X64-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6d,0xd9]
2389 ; X64-NEXT:    # xmm3 = xmm0[1],xmm1[1]
2390 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2391 ; X64-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1]
2392 ; X64-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
2393 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
2394 ; X64-NEXT:    retq # encoding: [0xc3]
2395   %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2396   %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2397   %res2 = add <2 x i64> %res, %res1
2398   ret <2 x i64> %res2
2399 }
2400
2401 declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2402
2403 define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2404 ; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
2405 ; X86:       # %bb.0:
2406 ; X86-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xd9]
2407 ; X86-NEXT:    # xmm3 = xmm0[0],xmm1[0]
2408 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2409 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2410 ; X86-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1]
2411 ; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
2412 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
2413 ; X86-NEXT:    retl # encoding: [0xc3]
2414 ;
2415 ; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
2416 ; X64:       # %bb.0:
2417 ; X64-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xd9]
2418 ; X64-NEXT:    # xmm3 = xmm0[0],xmm1[0]
2419 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2420 ; X64-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1]
2421 ; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
2422 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
2423 ; X64-NEXT:    retq # encoding: [0xc3]
2424   %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2425   %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2426   %res2 = add <2 x i64> %res, %res1
2427   ret <2 x i64> %res2
2428 }
2429
2430 declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2431
2432 define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
2433 ; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
2434 ; X86:       # %bb.0:
2435 ; X86-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6c,0xd9]
2436 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2437 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2438 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2439 ; X86-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1]
2440 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2441 ; X86-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
2442 ; X86-NEXT:    retl # encoding: [0xc3]
2443 ;
2444 ; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
2445 ; X64:       # %bb.0:
2446 ; X64-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6c,0xd9]
2447 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2448 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2449 ; X64-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1]
2450 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2451 ; X64-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
2452 ; X64-NEXT:    retq # encoding: [0xc3]
2453   %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
2454   %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
2455   %res2 = add <4 x i64> %res, %res1
2456   ret <4 x i64> %res2
2457 }
2458
2459 declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2460
2461 define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
2462 ; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
2463 ; X86:       # %bb.0:
2464 ; X86-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6d,0xd9]
2465 ; X86-NEXT:    # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2466 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2467 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2468 ; X86-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1]
2469 ; X86-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2470 ; X86-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
2471 ; X86-NEXT:    retl # encoding: [0xc3]
2472 ;
2473 ; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
2474 ; X64:       # %bb.0:
2475 ; X64-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6d,0xd9]
2476 ; X64-NEXT:    # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2477 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2478 ; X64-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1]
2479 ; X64-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2480 ; X64-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
2481 ; X64-NEXT:    retq # encoding: [0xc3]
2482   %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
2483   %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
2484   %res2 = add <4 x i64> %res, %res1
2485   ret <4 x i64> %res2
2486 }
2487
2488 define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
2489 ; CHECK-LABEL: test_mask_and_epi32_rr_128:
2490 ; CHECK:       # %bb.0:
2491 ; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
2492 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2493   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2494   ret <4 x i32> %res
2495 }
2496
2497 define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
2498 ; X86-LABEL: test_mask_and_epi32_rrk_128:
2499 ; X86:       # %bb.0:
2500 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2501 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2502 ; X86-NEXT:    vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
2503 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2504 ; X86-NEXT:    retl # encoding: [0xc3]
2505 ;
2506 ; X64-LABEL: test_mask_and_epi32_rrk_128:
2507 ; X64:       # %bb.0:
2508 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2509 ; X64-NEXT:    vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
2510 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2511 ; X64-NEXT:    retq # encoding: [0xc3]
2512   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2513   ret <4 x i32> %res
2514 }
2515
2516 define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
2517 ; X86-LABEL: test_mask_and_epi32_rrkz_128:
2518 ; X86:       # %bb.0:
2519 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2520 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2521 ; X86-NEXT:    vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
2522 ; X86-NEXT:    retl # encoding: [0xc3]
2523 ;
2524 ; X64-LABEL: test_mask_and_epi32_rrkz_128:
2525 ; X64:       # %bb.0:
2526 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2527 ; X64-NEXT:    vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
2528 ; X64-NEXT:    retq # encoding: [0xc3]
2529   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2530   ret <4 x i32> %res
2531 }
2532
2533 define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
2534 ; X86-LABEL: test_mask_and_epi32_rm_128:
2535 ; X86:       # %bb.0:
2536 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2537 ; X86-NEXT:    vandps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x00]
2538 ; X86-NEXT:    retl # encoding: [0xc3]
2539 ;
2540 ; X64-LABEL: test_mask_and_epi32_rm_128:
2541 ; X64:       # %bb.0:
2542 ; X64-NEXT:    vandps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x07]
2543 ; X64-NEXT:    retq # encoding: [0xc3]
2544   %b = load <4 x i32>, <4 x i32>* %ptr_b
2545   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2546   ret <4 x i32> %res
2547 }
2548
2549 define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2550 ; X86-LABEL: test_mask_and_epi32_rmk_128:
2551 ; X86:       # %bb.0:
2552 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2553 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2554 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2555 ; X86-NEXT:    vpandd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x08]
2556 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2557 ; X86-NEXT:    retl # encoding: [0xc3]
2558 ;
2559 ; X64-LABEL: test_mask_and_epi32_rmk_128:
2560 ; X64:       # %bb.0:
2561 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2562 ; X64-NEXT:    vpandd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f]
2563 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2564 ; X64-NEXT:    retq # encoding: [0xc3]
2565   %b = load <4 x i32>, <4 x i32>* %ptr_b
2566   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2567   ret <4 x i32> %res
2568 }
2569
2570 define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
2571 ; X86-LABEL: test_mask_and_epi32_rmkz_128:
2572 ; X86:       # %bb.0:
2573 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2574 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2575 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2576 ; X86-NEXT:    vpandd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x00]
2577 ; X86-NEXT:    retl # encoding: [0xc3]
2578 ;
2579 ; X64-LABEL: test_mask_and_epi32_rmkz_128:
2580 ; X64:       # %bb.0:
2581 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2582 ; X64-NEXT:    vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07]
2583 ; X64-NEXT:    retq # encoding: [0xc3]
2584   %b = load <4 x i32>, <4 x i32>* %ptr_b
2585   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2586   ret <4 x i32> %res
2587 }
2588
2589 define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
2590 ; X86-LABEL: test_mask_and_epi32_rmb_128:
2591 ; X86:       # %bb.0:
2592 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2593 ; X86-NEXT:    vpandd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x00]
2594 ; X86-NEXT:    retl # encoding: [0xc3]
2595 ;
2596 ; X64-LABEL: test_mask_and_epi32_rmb_128:
2597 ; X64:       # %bb.0:
2598 ; X64-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07]
2599 ; X64-NEXT:    retq # encoding: [0xc3]
2600   %q = load i32, i32* %ptr_b
2601   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2602   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2603   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2604   ret <4 x i32> %res
2605 }
2606
2607 define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2608 ; X86-LABEL: test_mask_and_epi32_rmbk_128:
2609 ; X86:       # %bb.0:
2610 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2611 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2612 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2613 ; X86-NEXT:    vpandd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x08]
2614 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2615 ; X86-NEXT:    retl # encoding: [0xc3]
2616 ;
2617 ; X64-LABEL: test_mask_and_epi32_rmbk_128:
2618 ; X64:       # %bb.0:
2619 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2620 ; X64-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f]
2621 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2622 ; X64-NEXT:    retq # encoding: [0xc3]
2623   %q = load i32, i32* %ptr_b
2624   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2625   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2626   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2627   ret <4 x i32> %res
2628 }
2629
2630 define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
2631 ; X86-LABEL: test_mask_and_epi32_rmbkz_128:
2632 ; X86:       # %bb.0:
2633 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2634 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2635 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2636 ; X86-NEXT:    vpandd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x00]
2637 ; X86-NEXT:    retl # encoding: [0xc3]
2638 ;
2639 ; X64-LABEL: test_mask_and_epi32_rmbkz_128:
2640 ; X64:       # %bb.0:
2641 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2642 ; X64-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07]
2643 ; X64-NEXT:    retq # encoding: [0xc3]
2644   %q = load i32, i32* %ptr_b
2645   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2646   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2647   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2648   ret <4 x i32> %res
2649 }
2650
2651 declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2652
2653 define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
2654 ; CHECK-LABEL: test_mask_and_epi32_rr_256:
2655 ; CHECK:       # %bb.0:
2656 ; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0xc1]
2657 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2658   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2659   ret <8 x i32> %res
2660 }
2661
2662 define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
2663 ; X86-LABEL: test_mask_and_epi32_rrk_256:
2664 ; X86:       # %bb.0:
2665 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2666 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2667 ; X86-NEXT:    vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
2668 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2669 ; X86-NEXT:    retl # encoding: [0xc3]
2670 ;
2671 ; X64-LABEL: test_mask_and_epi32_rrk_256:
2672 ; X64:       # %bb.0:
2673 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2674 ; X64-NEXT:    vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
2675 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2676 ; X64-NEXT:    retq # encoding: [0xc3]
2677   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2678   ret <8 x i32> %res
2679 }
2680
2681 define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
2682 ; X86-LABEL: test_mask_and_epi32_rrkz_256:
2683 ; X86:       # %bb.0:
2684 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2685 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2686 ; X86-NEXT:    vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
2687 ; X86-NEXT:    retl # encoding: [0xc3]
2688 ;
2689 ; X64-LABEL: test_mask_and_epi32_rrkz_256:
2690 ; X64:       # %bb.0:
2691 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2692 ; X64-NEXT:    vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
2693 ; X64-NEXT:    retq # encoding: [0xc3]
2694   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2695   ret <8 x i32> %res
2696 }
2697
2698 define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
2699 ; X86-LABEL: test_mask_and_epi32_rm_256:
2700 ; X86:       # %bb.0:
2701 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2702 ; X86-NEXT:    vandps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x00]
2703 ; X86-NEXT:    retl # encoding: [0xc3]
2704 ;
2705 ; X64-LABEL: test_mask_and_epi32_rm_256:
2706 ; X64:       # %bb.0:
2707 ; X64-NEXT:    vandps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x07]
2708 ; X64-NEXT:    retq # encoding: [0xc3]
2709   %b = load <8 x i32>, <8 x i32>* %ptr_b
2710   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2711   ret <8 x i32> %res
2712 }
2713
2714 define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2715 ; X86-LABEL: test_mask_and_epi32_rmk_256:
2716 ; X86:       # %bb.0:
2717 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2718 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2719 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2720 ; X86-NEXT:    vpandd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x08]
2721 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2722 ; X86-NEXT:    retl # encoding: [0xc3]
2723 ;
2724 ; X64-LABEL: test_mask_and_epi32_rmk_256:
2725 ; X64:       # %bb.0:
2726 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2727 ; X64-NEXT:    vpandd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f]
2728 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2729 ; X64-NEXT:    retq # encoding: [0xc3]
2730   %b = load <8 x i32>, <8 x i32>* %ptr_b
2731   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2732   ret <8 x i32> %res
2733 }
2734
2735 define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
2736 ; X86-LABEL: test_mask_and_epi32_rmkz_256:
2737 ; X86:       # %bb.0:
2738 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2739 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2740 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2741 ; X86-NEXT:    vpandd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x00]
2742 ; X86-NEXT:    retl # encoding: [0xc3]
2743 ;
2744 ; X64-LABEL: test_mask_and_epi32_rmkz_256:
2745 ; X64:       # %bb.0:
2746 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2747 ; X64-NEXT:    vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07]
2748 ; X64-NEXT:    retq # encoding: [0xc3]
2749   %b = load <8 x i32>, <8 x i32>* %ptr_b
2750   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2751   ret <8 x i32> %res
2752 }
2753
2754 define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
2755 ; X86-LABEL: test_mask_and_epi32_rmb_256:
2756 ; X86:       # %bb.0:
2757 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2758 ; X86-NEXT:    vpandd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x00]
2759 ; X86-NEXT:    retl # encoding: [0xc3]
2760 ;
2761 ; X64-LABEL: test_mask_and_epi32_rmb_256:
2762 ; X64:       # %bb.0:
2763 ; X64-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07]
2764 ; X64-NEXT:    retq # encoding: [0xc3]
2765   %q = load i32, i32* %ptr_b
2766   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2767   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2768   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2769   ret <8 x i32> %res
2770 }
2771
2772 define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2773 ; X86-LABEL: test_mask_and_epi32_rmbk_256:
2774 ; X86:       # %bb.0:
2775 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2776 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2777 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2778 ; X86-NEXT:    vpandd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x08]
2779 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2780 ; X86-NEXT:    retl # encoding: [0xc3]
2781 ;
2782 ; X64-LABEL: test_mask_and_epi32_rmbk_256:
2783 ; X64:       # %bb.0:
2784 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2785 ; X64-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f]
2786 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2787 ; X64-NEXT:    retq # encoding: [0xc3]
2788   %q = load i32, i32* %ptr_b
2789   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2790   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2791   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2792   ret <8 x i32> %res
2793 }
2794
2795 define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
2796 ; X86-LABEL: test_mask_and_epi32_rmbkz_256:
2797 ; X86:       # %bb.0:
2798 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2799 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2800 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2801 ; X86-NEXT:    vpandd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x00]
2802 ; X86-NEXT:    retl # encoding: [0xc3]
2803 ;
2804 ; X64-LABEL: test_mask_and_epi32_rmbkz_256:
2805 ; X64:       # %bb.0:
2806 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2807 ; X64-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07]
2808 ; X64-NEXT:    retq # encoding: [0xc3]
2809   %q = load i32, i32* %ptr_b
2810   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2811   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2812   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2813   ret <8 x i32> %res
2814 }
2815
2816 declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2817
2818 define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
2819 ; CHECK-LABEL: test_mask_or_epi32_rr_128:
2820 ; CHECK:       # %bb.0:
2821 ; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
2822 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2823   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2824   ret <4 x i32> %res
2825 }
2826
2827 define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
2828 ; X86-LABEL: test_mask_or_epi32_rrk_128:
2829 ; X86:       # %bb.0:
2830 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2831 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2832 ; X86-NEXT:    vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
2833 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2834 ; X86-NEXT:    retl # encoding: [0xc3]
2835 ;
2836 ; X64-LABEL: test_mask_or_epi32_rrk_128:
2837 ; X64:       # %bb.0:
2838 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2839 ; X64-NEXT:    vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
2840 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2841 ; X64-NEXT:    retq # encoding: [0xc3]
2842   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2843   ret <4 x i32> %res
2844 }
2845
2846 define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
2847 ; X86-LABEL: test_mask_or_epi32_rrkz_128:
2848 ; X86:       # %bb.0:
2849 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2850 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2851 ; X86-NEXT:    vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
2852 ; X86-NEXT:    retl # encoding: [0xc3]
2853 ;
2854 ; X64-LABEL: test_mask_or_epi32_rrkz_128:
2855 ; X64:       # %bb.0:
2856 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2857 ; X64-NEXT:    vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
2858 ; X64-NEXT:    retq # encoding: [0xc3]
2859   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2860   ret <4 x i32> %res
2861 }
2862
2863 define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
2864 ; X86-LABEL: test_mask_or_epi32_rm_128:
2865 ; X86:       # %bb.0:
2866 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2867 ; X86-NEXT:    vorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x00]
2868 ; X86-NEXT:    retl # encoding: [0xc3]
2869 ;
2870 ; X64-LABEL: test_mask_or_epi32_rm_128:
2871 ; X64:       # %bb.0:
2872 ; X64-NEXT:    vorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x07]
2873 ; X64-NEXT:    retq # encoding: [0xc3]
2874   %b = load <4 x i32>, <4 x i32>* %ptr_b
2875   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2876   ret <4 x i32> %res
2877 }
2878
2879 define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2880 ; X86-LABEL: test_mask_or_epi32_rmk_128:
2881 ; X86:       # %bb.0:
2882 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2883 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2884 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2885 ; X86-NEXT:    vpord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x08]
2886 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2887 ; X86-NEXT:    retl # encoding: [0xc3]
2888 ;
2889 ; X64-LABEL: test_mask_or_epi32_rmk_128:
2890 ; X64:       # %bb.0:
2891 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2892 ; X64-NEXT:    vpord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f]
2893 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2894 ; X64-NEXT:    retq # encoding: [0xc3]
2895   %b = load <4 x i32>, <4 x i32>* %ptr_b
2896   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2897   ret <4 x i32> %res
2898 }
2899
2900 define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
2901 ; X86-LABEL: test_mask_or_epi32_rmkz_128:
2902 ; X86:       # %bb.0:
2903 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2904 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2905 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2906 ; X86-NEXT:    vpord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x00]
2907 ; X86-NEXT:    retl # encoding: [0xc3]
2908 ;
2909 ; X64-LABEL: test_mask_or_epi32_rmkz_128:
2910 ; X64:       # %bb.0:
2911 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2912 ; X64-NEXT:    vpord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07]
2913 ; X64-NEXT:    retq # encoding: [0xc3]
2914   %b = load <4 x i32>, <4 x i32>* %ptr_b
2915   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2916   ret <4 x i32> %res
2917 }
2918
2919 define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
2920 ; X86-LABEL: test_mask_or_epi32_rmb_128:
2921 ; X86:       # %bb.0:
2922 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2923 ; X86-NEXT:    vpord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x00]
2924 ; X86-NEXT:    retl # encoding: [0xc3]
2925 ;
2926 ; X64-LABEL: test_mask_or_epi32_rmb_128:
2927 ; X64:       # %bb.0:
2928 ; X64-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07]
2929 ; X64-NEXT:    retq # encoding: [0xc3]
2930   %q = load i32, i32* %ptr_b
2931   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2932   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2933   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2934   ret <4 x i32> %res
2935 }
2936
2937 define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2938 ; X86-LABEL: test_mask_or_epi32_rmbk_128:
2939 ; X86:       # %bb.0:
2940 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2941 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2942 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2943 ; X86-NEXT:    vpord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x08]
2944 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2945 ; X86-NEXT:    retl # encoding: [0xc3]
2946 ;
2947 ; X64-LABEL: test_mask_or_epi32_rmbk_128:
2948 ; X64:       # %bb.0:
2949 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2950 ; X64-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f]
2951 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2952 ; X64-NEXT:    retq # encoding: [0xc3]
2953   %q = load i32, i32* %ptr_b
2954   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2955   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2956   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2957   ret <4 x i32> %res
2958 }
2959
2960 define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
2961 ; X86-LABEL: test_mask_or_epi32_rmbkz_128:
2962 ; X86:       # %bb.0:
2963 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2964 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2965 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2966 ; X86-NEXT:    vpord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x00]
2967 ; X86-NEXT:    retl # encoding: [0xc3]
2968 ;
2969 ; X64-LABEL: test_mask_or_epi32_rmbkz_128:
2970 ; X64:       # %bb.0:
2971 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2972 ; X64-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07]
2973 ; X64-NEXT:    retq # encoding: [0xc3]
2974   %q = load i32, i32* %ptr_b
2975   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2976   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2977   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2978   ret <4 x i32> %res
2979 }
2980
2981 declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2982
2983 define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
2984 ; CHECK-LABEL: test_mask_or_epi32_rr_256:
2985 ; CHECK:       # %bb.0:
2986 ; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0xc1]
2987 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2988   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2989   ret <8 x i32> %res
2990 }
2991
2992 define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
2993 ; X86-LABEL: test_mask_or_epi32_rrk_256:
2994 ; X86:       # %bb.0:
2995 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2996 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2997 ; X86-NEXT:    vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
2998 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2999 ; X86-NEXT:    retl # encoding: [0xc3]
3000 ;
3001 ; X64-LABEL: test_mask_or_epi32_rrk_256:
3002 ; X64:       # %bb.0:
3003 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3004 ; X64-NEXT:    vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
3005 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3006 ; X64-NEXT:    retq # encoding: [0xc3]
3007   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3008   ret <8 x i32> %res
3009 }
3010
3011 define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
3012 ; X86-LABEL: test_mask_or_epi32_rrkz_256:
3013 ; X86:       # %bb.0:
3014 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3015 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3016 ; X86-NEXT:    vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
3017 ; X86-NEXT:    retl # encoding: [0xc3]
3018 ;
3019 ; X64-LABEL: test_mask_or_epi32_rrkz_256:
3020 ; X64:       # %bb.0:
3021 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3022 ; X64-NEXT:    vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
3023 ; X64-NEXT:    retq # encoding: [0xc3]
3024   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3025   ret <8 x i32> %res
3026 }
3027
3028 define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
3029 ; X86-LABEL: test_mask_or_epi32_rm_256:
3030 ; X86:       # %bb.0:
3031 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3032 ; X86-NEXT:    vorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x00]
3033 ; X86-NEXT:    retl # encoding: [0xc3]
3034 ;
3035 ; X64-LABEL: test_mask_or_epi32_rm_256:
3036 ; X64:       # %bb.0:
3037 ; X64-NEXT:    vorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x07]
3038 ; X64-NEXT:    retq # encoding: [0xc3]
3039   %b = load <8 x i32>, <8 x i32>* %ptr_b
3040   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3041   ret <8 x i32> %res
3042 }
3043
3044 define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3045 ; X86-LABEL: test_mask_or_epi32_rmk_256:
3046 ; X86:       # %bb.0:
3047 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3048 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3049 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3050 ; X86-NEXT:    vpord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x08]
3051 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3052 ; X86-NEXT:    retl # encoding: [0xc3]
3053 ;
3054 ; X64-LABEL: test_mask_or_epi32_rmk_256:
3055 ; X64:       # %bb.0:
3056 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3057 ; X64-NEXT:    vpord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f]
3058 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3059 ; X64-NEXT:    retq # encoding: [0xc3]
3060   %b = load <8 x i32>, <8 x i32>* %ptr_b
3061   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3062   ret <8 x i32> %res
3063 }
3064
3065 define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
3066 ; X86-LABEL: test_mask_or_epi32_rmkz_256:
3067 ; X86:       # %bb.0:
3068 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3069 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3070 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3071 ; X86-NEXT:    vpord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x00]
3072 ; X86-NEXT:    retl # encoding: [0xc3]
3073 ;
3074 ; X64-LABEL: test_mask_or_epi32_rmkz_256:
3075 ; X64:       # %bb.0:
3076 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3077 ; X64-NEXT:    vpord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07]
3078 ; X64-NEXT:    retq # encoding: [0xc3]
3079   %b = load <8 x i32>, <8 x i32>* %ptr_b
3080   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3081   ret <8 x i32> %res
3082 }
3083
3084 define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
3085 ; X86-LABEL: test_mask_or_epi32_rmb_256:
3086 ; X86:       # %bb.0:
3087 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3088 ; X86-NEXT:    vpord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x00]
3089 ; X86-NEXT:    retl # encoding: [0xc3]
3090 ;
3091 ; X64-LABEL: test_mask_or_epi32_rmb_256:
3092 ; X64:       # %bb.0:
3093 ; X64-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07]
3094 ; X64-NEXT:    retq # encoding: [0xc3]
3095   %q = load i32, i32* %ptr_b
3096   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3097   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3098   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3099   ret <8 x i32> %res
3100 }
3101
3102 define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3103 ; X86-LABEL: test_mask_or_epi32_rmbk_256:
3104 ; X86:       # %bb.0:
3105 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3106 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3107 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3108 ; X86-NEXT:    vpord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x08]
3109 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3110 ; X86-NEXT:    retl # encoding: [0xc3]
3111 ;
3112 ; X64-LABEL: test_mask_or_epi32_rmbk_256:
3113 ; X64:       # %bb.0:
3114 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3115 ; X64-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f]
3116 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3117 ; X64-NEXT:    retq # encoding: [0xc3]
3118   %q = load i32, i32* %ptr_b
3119   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3120   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3121   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3122   ret <8 x i32> %res
3123 }
3124
3125 define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
3126 ; X86-LABEL: test_mask_or_epi32_rmbkz_256:
3127 ; X86:       # %bb.0:
3128 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3129 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3130 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3131 ; X86-NEXT:    vpord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x00]
3132 ; X86-NEXT:    retl # encoding: [0xc3]
3133 ;
3134 ; X64-LABEL: test_mask_or_epi32_rmbkz_256:
3135 ; X64:       # %bb.0:
3136 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3137 ; X64-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07]
3138 ; X64-NEXT:    retq # encoding: [0xc3]
3139   %q = load i32, i32* %ptr_b
3140   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3141   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3142   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3143   ret <8 x i32> %res
3144 }
3145
3146 declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3147
3148 define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
3149 ; CHECK-LABEL: test_mask_xor_epi32_rr_128:
3150 ; CHECK:       # %bb.0:
3151 ; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
3152 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3153   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3154   ret <4 x i32> %res
3155 }
3156
3157 define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
3158 ; X86-LABEL: test_mask_xor_epi32_rrk_128:
3159 ; X86:       # %bb.0:
3160 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3161 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3162 ; X86-NEXT:    vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
3163 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3164 ; X86-NEXT:    retl # encoding: [0xc3]
3165 ;
3166 ; X64-LABEL: test_mask_xor_epi32_rrk_128:
3167 ; X64:       # %bb.0:
3168 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3169 ; X64-NEXT:    vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
3170 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3171 ; X64-NEXT:    retq # encoding: [0xc3]
3172   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3173   ret <4 x i32> %res
3174 }
3175
3176 define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
3177 ; X86-LABEL: test_mask_xor_epi32_rrkz_128:
3178 ; X86:       # %bb.0:
3179 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3180 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3181 ; X86-NEXT:    vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
3182 ; X86-NEXT:    retl # encoding: [0xc3]
3183 ;
3184 ; X64-LABEL: test_mask_xor_epi32_rrkz_128:
3185 ; X64:       # %bb.0:
3186 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3187 ; X64-NEXT:    vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
3188 ; X64-NEXT:    retq # encoding: [0xc3]
3189   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3190   ret <4 x i32> %res
3191 }
3192
3193 define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
3194 ; X86-LABEL: test_mask_xor_epi32_rm_128:
3195 ; X86:       # %bb.0:
3196 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3197 ; X86-NEXT:    vxorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x00]
3198 ; X86-NEXT:    retl # encoding: [0xc3]
3199 ;
3200 ; X64-LABEL: test_mask_xor_epi32_rm_128:
3201 ; X64:       # %bb.0:
3202 ; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x07]
3203 ; X64-NEXT:    retq # encoding: [0xc3]
3204   %b = load <4 x i32>, <4 x i32>* %ptr_b
3205   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3206   ret <4 x i32> %res
3207 }
3208
3209 define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3210 ; X86-LABEL: test_mask_xor_epi32_rmk_128:
3211 ; X86:       # %bb.0:
3212 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3213 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3214 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3215 ; X86-NEXT:    vpxord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x08]
3216 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3217 ; X86-NEXT:    retl # encoding: [0xc3]
3218 ;
3219 ; X64-LABEL: test_mask_xor_epi32_rmk_128:
3220 ; X64:       # %bb.0:
3221 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3222 ; X64-NEXT:    vpxord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f]
3223 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3224 ; X64-NEXT:    retq # encoding: [0xc3]
3225   %b = load <4 x i32>, <4 x i32>* %ptr_b
3226   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3227   ret <4 x i32> %res
3228 }
3229
3230 define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
3231 ; X86-LABEL: test_mask_xor_epi32_rmkz_128:
3232 ; X86:       # %bb.0:
3233 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3234 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3235 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3236 ; X86-NEXT:    vpxord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x00]
3237 ; X86-NEXT:    retl # encoding: [0xc3]
3238 ;
3239 ; X64-LABEL: test_mask_xor_epi32_rmkz_128:
3240 ; X64:       # %bb.0:
3241 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3242 ; X64-NEXT:    vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07]
3243 ; X64-NEXT:    retq # encoding: [0xc3]
3244   %b = load <4 x i32>, <4 x i32>* %ptr_b
3245   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3246   ret <4 x i32> %res
3247 }
3248
3249 define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
3250 ; X86-LABEL: test_mask_xor_epi32_rmb_128:
3251 ; X86:       # %bb.0:
3252 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3253 ; X86-NEXT:    vpxord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x00]
3254 ; X86-NEXT:    retl # encoding: [0xc3]
3255 ;
3256 ; X64-LABEL: test_mask_xor_epi32_rmb_128:
3257 ; X64:       # %bb.0:
3258 ; X64-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07]
3259 ; X64-NEXT:    retq # encoding: [0xc3]
3260   %q = load i32, i32* %ptr_b
3261   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3262   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3263   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3264   ret <4 x i32> %res
3265 }
3266
3267 define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3268 ; X86-LABEL: test_mask_xor_epi32_rmbk_128:
3269 ; X86:       # %bb.0:
3270 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3271 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3272 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3273 ; X86-NEXT:    vpxord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x08]
3274 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3275 ; X86-NEXT:    retl # encoding: [0xc3]
3276 ;
3277 ; X64-LABEL: test_mask_xor_epi32_rmbk_128:
3278 ; X64:       # %bb.0:
3279 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3280 ; X64-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f]
3281 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3282 ; X64-NEXT:    retq # encoding: [0xc3]
3283   %q = load i32, i32* %ptr_b
3284   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3285   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3286   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3287   ret <4 x i32> %res
3288 }
3289
3290 define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
3291 ; X86-LABEL: test_mask_xor_epi32_rmbkz_128:
3292 ; X86:       # %bb.0:
3293 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3294 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3295 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3296 ; X86-NEXT:    vpxord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x00]
3297 ; X86-NEXT:    retl # encoding: [0xc3]
3298 ;
3299 ; X64-LABEL: test_mask_xor_epi32_rmbkz_128:
3300 ; X64:       # %bb.0:
3301 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3302 ; X64-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
3303 ; X64-NEXT:    retq # encoding: [0xc3]
3304   %q = load i32, i32* %ptr_b
3305   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3306   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3307   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3308   ret <4 x i32> %res
3309 }
3310
3311 declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
3312
3313 define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
3314 ; CHECK-LABEL: test_mask_xor_epi32_rr_256:
3315 ; CHECK:       # %bb.0:
3316 ; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0xc1]
3317 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3318   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3319   ret <8 x i32> %res
3320 }
3321
3322 define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
3323 ; X86-LABEL: test_mask_xor_epi32_rrk_256:
3324 ; X86:       # %bb.0:
3325 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3326 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3327 ; X86-NEXT:    vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
3328 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3329 ; X86-NEXT:    retl # encoding: [0xc3]
3330 ;
3331 ; X64-LABEL: test_mask_xor_epi32_rrk_256:
3332 ; X64:       # %bb.0:
3333 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3334 ; X64-NEXT:    vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
3335 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3336 ; X64-NEXT:    retq # encoding: [0xc3]
3337   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3338   ret <8 x i32> %res
3339 }
3340
3341 define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
3342 ; X86-LABEL: test_mask_xor_epi32_rrkz_256:
3343 ; X86:       # %bb.0:
3344 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3345 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3346 ; X86-NEXT:    vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
3347 ; X86-NEXT:    retl # encoding: [0xc3]
3348 ;
3349 ; X64-LABEL: test_mask_xor_epi32_rrkz_256:
3350 ; X64:       # %bb.0:
3351 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3352 ; X64-NEXT:    vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
3353 ; X64-NEXT:    retq # encoding: [0xc3]
3354   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3355   ret <8 x i32> %res
3356 }
3357
3358 define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
3359 ; X86-LABEL: test_mask_xor_epi32_rm_256:
3360 ; X86:       # %bb.0:
3361 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3362 ; X86-NEXT:    vxorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x00]
3363 ; X86-NEXT:    retl # encoding: [0xc3]
3364 ;
3365 ; X64-LABEL: test_mask_xor_epi32_rm_256:
3366 ; X64:       # %bb.0:
3367 ; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x07]
3368 ; X64-NEXT:    retq # encoding: [0xc3]
3369   %b = load <8 x i32>, <8 x i32>* %ptr_b
3370   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3371   ret <8 x i32> %res
3372 }
3373
3374 define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3375 ; X86-LABEL: test_mask_xor_epi32_rmk_256:
3376 ; X86:       # %bb.0:
3377 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3378 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3379 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3380 ; X86-NEXT:    vpxord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x08]
3381 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3382 ; X86-NEXT:    retl # encoding: [0xc3]
3383 ;
3384 ; X64-LABEL: test_mask_xor_epi32_rmk_256:
3385 ; X64:       # %bb.0:
3386 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3387 ; X64-NEXT:    vpxord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f]
3388 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3389 ; X64-NEXT:    retq # encoding: [0xc3]
3390   %b = load <8 x i32>, <8 x i32>* %ptr_b
3391   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3392   ret <8 x i32> %res
3393 }
3394
3395 define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
3396 ; X86-LABEL: test_mask_xor_epi32_rmkz_256:
3397 ; X86:       # %bb.0:
3398 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3399 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3400 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3401 ; X86-NEXT:    vpxord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x00]
3402 ; X86-NEXT:    retl # encoding: [0xc3]
3403 ;
3404 ; X64-LABEL: test_mask_xor_epi32_rmkz_256:
3405 ; X64:       # %bb.0:
3406 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3407 ; X64-NEXT:    vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07]
3408 ; X64-NEXT:    retq # encoding: [0xc3]
3409   %b = load <8 x i32>, <8 x i32>* %ptr_b
3410   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3411   ret <8 x i32> %res
3412 }
3413
3414 define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
3415 ; X86-LABEL: test_mask_xor_epi32_rmb_256:
3416 ; X86:       # %bb.0:
3417 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3418 ; X86-NEXT:    vpxord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x00]
3419 ; X86-NEXT:    retl # encoding: [0xc3]
3420 ;
3421 ; X64-LABEL: test_mask_xor_epi32_rmb_256:
3422 ; X64:       # %bb.0:
3423 ; X64-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07]
3424 ; X64-NEXT:    retq # encoding: [0xc3]
3425   %q = load i32, i32* %ptr_b
3426   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3427   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3428   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3429   ret <8 x i32> %res
3430 }
3431
3432 define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3433 ; X86-LABEL: test_mask_xor_epi32_rmbk_256:
3434 ; X86:       # %bb.0:
3435 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3436 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3437 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3438 ; X86-NEXT:    vpxord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x08]
3439 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3440 ; X86-NEXT:    retl # encoding: [0xc3]
3441 ;
3442 ; X64-LABEL: test_mask_xor_epi32_rmbk_256:
3443 ; X64:       # %bb.0:
3444 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3445 ; X64-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f]
3446 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3447 ; X64-NEXT:    retq # encoding: [0xc3]
3448   %q = load i32, i32* %ptr_b
3449   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3450   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3451   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3452   ret <8 x i32> %res
3453 }
3454
3455 define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
3456 ; X86-LABEL: test_mask_xor_epi32_rmbkz_256:
3457 ; X86:       # %bb.0:
3458 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3459 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3460 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3461 ; X86-NEXT:    vpxord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x00]
3462 ; X86-NEXT:    retl # encoding: [0xc3]
3463 ;
3464 ; X64-LABEL: test_mask_xor_epi32_rmbkz_256:
3465 ; X64:       # %bb.0:
3466 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3467 ; X64-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07]
3468 ; X64-NEXT:    retq # encoding: [0xc3]
3469   %q = load i32, i32* %ptr_b
3470   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3471   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3472   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3473   ret <8 x i32> %res
3474 }
3475
3476 declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3477
3478 define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
3479 ; CHECK-LABEL: test_mask_andnot_epi32_rr_128:
3480 ; CHECK:       # %bb.0:
3481 ; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
3482 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3483   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3484   ret <4 x i32> %res
3485 }
3486
3487 define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
3488 ; X86-LABEL: test_mask_andnot_epi32_rrk_128:
3489 ; X86:       # %bb.0:
3490 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3491 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3492 ; X86-NEXT:    vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
3493 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3494 ; X86-NEXT:    retl # encoding: [0xc3]
3495 ;
3496 ; X64-LABEL: test_mask_andnot_epi32_rrk_128:
3497 ; X64:       # %bb.0:
3498 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3499 ; X64-NEXT:    vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
3500 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3501 ; X64-NEXT:    retq # encoding: [0xc3]
3502   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3503   ret <4 x i32> %res
3504 }
3505
3506 define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
3507 ; X86-LABEL: test_mask_andnot_epi32_rrkz_128:
3508 ; X86:       # %bb.0:
3509 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3510 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3511 ; X86-NEXT:    vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
3512 ; X86-NEXT:    retl # encoding: [0xc3]
3513 ;
3514 ; X64-LABEL: test_mask_andnot_epi32_rrkz_128:
3515 ; X64:       # %bb.0:
3516 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3517 ; X64-NEXT:    vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
3518 ; X64-NEXT:    retq # encoding: [0xc3]
3519   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3520   ret <4 x i32> %res
3521 }
3522
3523 define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
3524 ; X86-LABEL: test_mask_andnot_epi32_rm_128:
3525 ; X86:       # %bb.0:
3526 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3527 ; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00]
3528 ; X86-NEXT:    retl # encoding: [0xc3]
3529 ;
3530 ; X64-LABEL: test_mask_andnot_epi32_rm_128:
3531 ; X64:       # %bb.0:
3532 ; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07]
3533 ; X64-NEXT:    retq # encoding: [0xc3]
3534   %b = load <4 x i32>, <4 x i32>* %ptr_b
3535   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3536   ret <4 x i32> %res
3537 }
3538
3539 define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3540 ; X86-LABEL: test_mask_andnot_epi32_rmk_128:
3541 ; X86:       # %bb.0:
3542 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3543 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3544 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3545 ; X86-NEXT:    vpandnd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x08]
3546 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3547 ; X86-NEXT:    retl # encoding: [0xc3]
3548 ;
3549 ; X64-LABEL: test_mask_andnot_epi32_rmk_128:
3550 ; X64:       # %bb.0:
3551 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3552 ; X64-NEXT:    vpandnd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f]
3553 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3554 ; X64-NEXT:    retq # encoding: [0xc3]
3555   %b = load <4 x i32>, <4 x i32>* %ptr_b
3556   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3557   ret <4 x i32> %res
3558 }
3559
3560 define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
3561 ; X86-LABEL: test_mask_andnot_epi32_rmkz_128:
3562 ; X86:       # %bb.0:
3563 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3564 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3565 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3566 ; X86-NEXT:    vpandnd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x00]
3567 ; X86-NEXT:    retl # encoding: [0xc3]
3568 ;
3569 ; X64-LABEL: test_mask_andnot_epi32_rmkz_128:
3570 ; X64:       # %bb.0:
3571 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3572 ; X64-NEXT:    vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07]
3573 ; X64-NEXT:    retq # encoding: [0xc3]
3574   %b = load <4 x i32>, <4 x i32>* %ptr_b
3575   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3576   ret <4 x i32> %res
3577 }
3578
3579 define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
3580 ; X86-LABEL: test_mask_andnot_epi32_rmb_128:
3581 ; X86:       # %bb.0:
3582 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3583 ; X86-NEXT:    vpandnd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x00]
3584 ; X86-NEXT:    retl # encoding: [0xc3]
3585 ;
3586 ; X64-LABEL: test_mask_andnot_epi32_rmb_128:
3587 ; X64:       # %bb.0:
3588 ; X64-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07]
3589 ; X64-NEXT:    retq # encoding: [0xc3]
3590   %q = load i32, i32* %ptr_b
3591   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3592   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3593   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3594   ret <4 x i32> %res
3595 }
3596
3597 define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3598 ; X86-LABEL: test_mask_andnot_epi32_rmbk_128:
3599 ; X86:       # %bb.0:
3600 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3601 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3602 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3603 ; X86-NEXT:    vpandnd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x08]
3604 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3605 ; X86-NEXT:    retl # encoding: [0xc3]
3606 ;
3607 ; X64-LABEL: test_mask_andnot_epi32_rmbk_128:
3608 ; X64:       # %bb.0:
3609 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3610 ; X64-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f]
3611 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3612 ; X64-NEXT:    retq # encoding: [0xc3]
3613   %q = load i32, i32* %ptr_b
3614   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3615   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3616   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3617   ret <4 x i32> %res
3618 }
3619
3620 define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
3621 ; X86-LABEL: test_mask_andnot_epi32_rmbkz_128:
3622 ; X86:       # %bb.0:
3623 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3624 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3625 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3626 ; X86-NEXT:    vpandnd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x00]
3627 ; X86-NEXT:    retl # encoding: [0xc3]
3628 ;
3629 ; X64-LABEL: test_mask_andnot_epi32_rmbkz_128:
3630 ; X64:       # %bb.0:
3631 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3632 ; X64-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07]
3633 ; X64-NEXT:    retq # encoding: [0xc3]
3634   %q = load i32, i32* %ptr_b
3635   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3636   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3637   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3638   ret <4 x i32> %res
3639 }
3640
3641 declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
3642
3643 define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
3644 ; CHECK-LABEL: test_mask_andnot_epi32_rr_256:
3645 ; CHECK:       # %bb.0:
3646 ; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
3647 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3648   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3649   ret <8 x i32> %res
3650 }
3651
3652 define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
3653 ; X86-LABEL: test_mask_andnot_epi32_rrk_256:
3654 ; X86:       # %bb.0:
3655 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3656 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3657 ; X86-NEXT:    vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
3658 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3659 ; X86-NEXT:    retl # encoding: [0xc3]
3660 ;
3661 ; X64-LABEL: test_mask_andnot_epi32_rrk_256:
3662 ; X64:       # %bb.0:
3663 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3664 ; X64-NEXT:    vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
3665 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3666 ; X64-NEXT:    retq # encoding: [0xc3]
3667   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3668   ret <8 x i32> %res
3669 }
3670
3671 define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
3672 ; X86-LABEL: test_mask_andnot_epi32_rrkz_256:
3673 ; X86:       # %bb.0:
3674 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3675 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3676 ; X86-NEXT:    vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
3677 ; X86-NEXT:    retl # encoding: [0xc3]
3678 ;
3679 ; X64-LABEL: test_mask_andnot_epi32_rrkz_256:
3680 ; X64:       # %bb.0:
3681 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3682 ; X64-NEXT:    vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
3683 ; X64-NEXT:    retq # encoding: [0xc3]
3684   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3685   ret <8 x i32> %res
3686 }
3687
3688 define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
3689 ; X86-LABEL: test_mask_andnot_epi32_rm_256:
3690 ; X86:       # %bb.0:
3691 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3692 ; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00]
3693 ; X86-NEXT:    retl # encoding: [0xc3]
3694 ;
3695 ; X64-LABEL: test_mask_andnot_epi32_rm_256:
3696 ; X64:       # %bb.0:
3697 ; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07]
3698 ; X64-NEXT:    retq # encoding: [0xc3]
3699   %b = load <8 x i32>, <8 x i32>* %ptr_b
3700   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3701   ret <8 x i32> %res
3702 }
3703
3704 define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3705 ; X86-LABEL: test_mask_andnot_epi32_rmk_256:
3706 ; X86:       # %bb.0:
3707 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3708 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3709 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3710 ; X86-NEXT:    vpandnd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x08]
3711 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3712 ; X86-NEXT:    retl # encoding: [0xc3]
3713 ;
3714 ; X64-LABEL: test_mask_andnot_epi32_rmk_256:
3715 ; X64:       # %bb.0:
3716 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3717 ; X64-NEXT:    vpandnd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f]
3718 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3719 ; X64-NEXT:    retq # encoding: [0xc3]
3720   %b = load <8 x i32>, <8 x i32>* %ptr_b
3721   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3722   ret <8 x i32> %res
3723 }
3724
3725 define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
3726 ; X86-LABEL: test_mask_andnot_epi32_rmkz_256:
3727 ; X86:       # %bb.0:
3728 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3729 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3730 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3731 ; X86-NEXT:    vpandnd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x00]
3732 ; X86-NEXT:    retl # encoding: [0xc3]
3733 ;
3734 ; X64-LABEL: test_mask_andnot_epi32_rmkz_256:
3735 ; X64:       # %bb.0:
3736 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3737 ; X64-NEXT:    vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07]
3738 ; X64-NEXT:    retq # encoding: [0xc3]
3739   %b = load <8 x i32>, <8 x i32>* %ptr_b
3740   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3741   ret <8 x i32> %res
3742 }
3743
3744 define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
3745 ; X86-LABEL: test_mask_andnot_epi32_rmb_256:
3746 ; X86:       # %bb.0:
3747 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3748 ; X86-NEXT:    vpandnd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x00]
3749 ; X86-NEXT:    retl # encoding: [0xc3]
3750 ;
3751 ; X64-LABEL: test_mask_andnot_epi32_rmb_256:
3752 ; X64:       # %bb.0:
3753 ; X64-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07]
3754 ; X64-NEXT:    retq # encoding: [0xc3]
3755   %q = load i32, i32* %ptr_b
3756   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3757   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3758   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3759   ret <8 x i32> %res
3760 }
3761
3762 define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3763 ; X86-LABEL: test_mask_andnot_epi32_rmbk_256:
3764 ; X86:       # %bb.0:
3765 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3766 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3767 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3768 ; X86-NEXT:    vpandnd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x08]
3769 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3770 ; X86-NEXT:    retl # encoding: [0xc3]
3771 ;
3772 ; X64-LABEL: test_mask_andnot_epi32_rmbk_256:
3773 ; X64:       # %bb.0:
3774 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3775 ; X64-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f]
3776 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3777 ; X64-NEXT:    retq # encoding: [0xc3]
3778   %q = load i32, i32* %ptr_b
3779   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3780   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3781   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3782   ret <8 x i32> %res
3783 }
3784
3785 define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
3786 ; X86-LABEL: test_mask_andnot_epi32_rmbkz_256:
3787 ; X86:       # %bb.0:
3788 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3789 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3790 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3791 ; X86-NEXT:    vpandnd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x00]
3792 ; X86-NEXT:    retl # encoding: [0xc3]
3793 ;
3794 ; X64-LABEL: test_mask_andnot_epi32_rmbkz_256:
3795 ; X64:       # %bb.0:
3796 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3797 ; X64-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07]
3798 ; X64-NEXT:    retq # encoding: [0xc3]
3799   %q = load i32, i32* %ptr_b
3800   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3801   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3802   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3803   ret <8 x i32> %res
3804 }
3805
3806 declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3807
3808 define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
3809 ; CHECK-LABEL: test_mask_andnot_epi64_rr_128:
3810 ; CHECK:       # %bb.0:
3811 ; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
3812 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3813   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
3814   ret <2 x i64> %res
3815 }
3816
3817 define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
3818 ; X86-LABEL: test_mask_andnot_epi64_rrk_128:
3819 ; X86:       # %bb.0:
3820 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3821 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3822 ; X86-NEXT:    vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
3823 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3824 ; X86-NEXT:    retl # encoding: [0xc3]
3825 ;
3826 ; X64-LABEL: test_mask_andnot_epi64_rrk_128:
3827 ; X64:       # %bb.0:
3828 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3829 ; X64-NEXT:    vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
3830 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3831 ; X64-NEXT:    retq # encoding: [0xc3]
3832   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
3833   ret <2 x i64> %res
3834 }
3835
3836 define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
3837 ; X86-LABEL: test_mask_andnot_epi64_rrkz_128:
3838 ; X86:       # %bb.0:
3839 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3840 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3841 ; X86-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
3842 ; X86-NEXT:    retl # encoding: [0xc3]
3843 ;
3844 ; X64-LABEL: test_mask_andnot_epi64_rrkz_128:
3845 ; X64:       # %bb.0:
3846 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3847 ; X64-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
3848 ; X64-NEXT:    retq # encoding: [0xc3]
3849   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
3850   ret <2 x i64> %res
3851 }
3852
3853 define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
3854 ; X86-LABEL: test_mask_andnot_epi64_rm_128:
3855 ; X86:       # %bb.0:
3856 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3857 ; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00]
3858 ; X86-NEXT:    retl # encoding: [0xc3]
3859 ;
3860 ; X64-LABEL: test_mask_andnot_epi64_rm_128:
3861 ; X64:       # %bb.0:
3862 ; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07]
3863 ; X64-NEXT:    retq # encoding: [0xc3]
3864   %b = load <2 x i64>, <2 x i64>* %ptr_b
3865   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
3866   ret <2 x i64> %res
3867 }
3868
3869 define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
3870 ; X86-LABEL: test_mask_andnot_epi64_rmk_128:
3871 ; X86:       # %bb.0:
3872 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3873 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3874 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3875 ; X86-NEXT:    vpandnq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x08]
3876 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3877 ; X86-NEXT:    retl # encoding: [0xc3]
3878 ;
3879 ; X64-LABEL: test_mask_andnot_epi64_rmk_128:
3880 ; X64:       # %bb.0:
3881 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3882 ; X64-NEXT:    vpandnq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f]
3883 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3884 ; X64-NEXT:    retq # encoding: [0xc3]
3885   %b = load <2 x i64>, <2 x i64>* %ptr_b
3886   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
3887   ret <2 x i64> %res
3888 }
3889
3890 define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
3891 ; X86-LABEL: test_mask_andnot_epi64_rmkz_128:
3892 ; X86:       # %bb.0:
3893 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3894 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3895 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3896 ; X86-NEXT:    vpandnq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x00]
3897 ; X86-NEXT:    retl # encoding: [0xc3]
3898 ;
3899 ; X64-LABEL: test_mask_andnot_epi64_rmkz_128:
3900 ; X64:       # %bb.0:
3901 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3902 ; X64-NEXT:    vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07]
3903 ; X64-NEXT:    retq # encoding: [0xc3]
3904   %b = load <2 x i64>, <2 x i64>* %ptr_b
3905   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
3906   ret <2 x i64> %res
3907 }
3908
3909 define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
3910 ; X86-LABEL: test_mask_andnot_epi64_rmb_128:
3911 ; X86:       # %bb.0:
3912 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3913 ; X86-NEXT:    vmovddup (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x08]
3914 ; X86-NEXT:    # xmm1 = mem[0,0]
3915 ; X86-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
3916 ; X86-NEXT:    retl # encoding: [0xc3]
3917 ;
3918 ; X64-LABEL: test_mask_andnot_epi64_rmb_128:
3919 ; X64:       # %bb.0:
3920 ; X64-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07]
3921 ; X64-NEXT:    retq # encoding: [0xc3]
3922   %q = load i64, i64* %ptr_b
3923   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
3924   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
3925   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
3926   ret <2 x i64> %res
3927 }
3928
3929 define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
3930 ; X86-LABEL: test_mask_andnot_epi64_rmbk_128:
3931 ; X86:       # %bb.0:
3932 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3933 ; X86-NEXT:    vpbroadcastq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x10]
3934 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3935 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3936 ; X86-NEXT:    vpandnq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xca]
3937 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3938 ; X86-NEXT:    retl # encoding: [0xc3]
3939 ;
3940 ; X64-LABEL: test_mask_andnot_epi64_rmbk_128:
3941 ; X64:       # %bb.0:
3942 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3943 ; X64-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f]
3944 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3945 ; X64-NEXT:    retq # encoding: [0xc3]
3946   %q = load i64, i64* %ptr_b
3947   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
3948   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
3949   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
3950   ret <2 x i64> %res
3951 }
3952
3953 define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
3954 ; X86-LABEL: test_mask_andnot_epi64_rmbkz_128:
3955 ; X86:       # %bb.0:
3956 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3957 ; X86-NEXT:    vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08]
3958 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3959 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3960 ; X86-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
3961 ; X86-NEXT:    retl # encoding: [0xc3]
3962 ;
3963 ; X64-LABEL: test_mask_andnot_epi64_rmbkz_128:
3964 ; X64:       # %bb.0:
3965 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3966 ; X64-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07]
3967 ; X64-NEXT:    retq # encoding: [0xc3]
3968   %q = load i64, i64* %ptr_b
3969   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
3970   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
3971   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
3972   ret <2 x i64> %res
3973 }
3974
3975 declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
3976
3977 define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
3978 ; CHECK-LABEL: test_mask_andnot_epi64_rr_256:
3979 ; CHECK:       # %bb.0:
3980 ; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
3981 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3982   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
3983   ret <4 x i64> %res
3984 }
3985
3986 define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
3987 ; X86-LABEL: test_mask_andnot_epi64_rrk_256:
3988 ; X86:       # %bb.0:
3989 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3990 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3991 ; X86-NEXT:    vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
3992 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3993 ; X86-NEXT:    retl # encoding: [0xc3]
3994 ;
3995 ; X64-LABEL: test_mask_andnot_epi64_rrk_256:
3996 ; X64:       # %bb.0:
3997 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3998 ; X64-NEXT:    vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
3999 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4000 ; X64-NEXT:    retq # encoding: [0xc3]
4001   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
4002   ret <4 x i64> %res
4003 }
4004
4005 define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
4006 ; X86-LABEL: test_mask_andnot_epi64_rrkz_256:
4007 ; X86:       # %bb.0:
4008 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4009 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4010 ; X86-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
4011 ; X86-NEXT:    retl # encoding: [0xc3]
4012 ;
4013 ; X64-LABEL: test_mask_andnot_epi64_rrkz_256:
4014 ; X64:       # %bb.0:
4015 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4016 ; X64-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
4017 ; X64-NEXT:    retq # encoding: [0xc3]
4018   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
4019   ret <4 x i64> %res
4020 }
4021
4022 define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
4023 ; X86-LABEL: test_mask_andnot_epi64_rm_256:
4024 ; X86:       # %bb.0:
4025 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4026 ; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00]
4027 ; X86-NEXT:    retl # encoding: [0xc3]
4028 ;
4029 ; X64-LABEL: test_mask_andnot_epi64_rm_256:
4030 ; X64:       # %bb.0:
4031 ; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07]
4032 ; X64-NEXT:    retq # encoding: [0xc3]
4033   %b = load <4 x i64>, <4 x i64>* %ptr_b
4034   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
4035   ret <4 x i64> %res
4036 }
4037
4038 define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
4039 ; X86-LABEL: test_mask_andnot_epi64_rmk_256:
4040 ; X86:       # %bb.0:
4041 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4042 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4043 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4044 ; X86-NEXT:    vpandnq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x08]
4045 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4046 ; X86-NEXT:    retl # encoding: [0xc3]
4047 ;
4048 ; X64-LABEL: test_mask_andnot_epi64_rmk_256:
4049 ; X64:       # %bb.0:
4050 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4051 ; X64-NEXT:    vpandnq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f]
4052 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4053 ; X64-NEXT:    retq # encoding: [0xc3]
4054   %b = load <4 x i64>, <4 x i64>* %ptr_b
4055   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
4056   ret <4 x i64> %res
4057 }
4058
4059 define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
4060 ; X86-LABEL: test_mask_andnot_epi64_rmkz_256:
4061 ; X86:       # %bb.0:
4062 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4063 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4064 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4065 ; X86-NEXT:    vpandnq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x00]
4066 ; X86-NEXT:    retl # encoding: [0xc3]
4067 ;
4068 ; X64-LABEL: test_mask_andnot_epi64_rmkz_256:
4069 ; X64:       # %bb.0:
4070 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4071 ; X64-NEXT:    vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07]
4072 ; X64-NEXT:    retq # encoding: [0xc3]
4073   %b = load <4 x i64>, <4 x i64>* %ptr_b
4074   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
4075   ret <4 x i64> %res
4076 }
4077
4078 define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
4079 ; X86-LABEL: test_mask_andnot_epi64_rmb_256:
4080 ; X86:       # %bb.0:
4081 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4082 ; X86-NEXT:    vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
4083 ; X86-NEXT:    # xmm1 = mem[0],zero
4084 ; X86-NEXT:    vbroadcastsd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc9]
4085 ; X86-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
4086 ; X86-NEXT:    retl # encoding: [0xc3]
4087 ;
4088 ; X64-LABEL: test_mask_andnot_epi64_rmb_256:
4089 ; X64:       # %bb.0:
4090 ; X64-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07]
4091 ; X64-NEXT:    retq # encoding: [0xc3]
4092   %q = load i64, i64* %ptr_b
4093   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
4094   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
4095   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
4096   ret <4 x i64> %res
4097 }
4098
4099 define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
4100 ; X86-LABEL: test_mask_andnot_epi64_rmbk_256:
4101 ; X86:       # %bb.0:
4102 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4103 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
4104 ; X86-NEXT:    # xmm2 = mem[0],zero
4105 ; X86-NEXT:    vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2]
4106 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4107 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4108 ; X86-NEXT:    vpandnq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xca]
4109 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4110 ; X86-NEXT:    retl # encoding: [0xc3]
4111 ;
4112 ; X64-LABEL: test_mask_andnot_epi64_rmbk_256:
4113 ; X64:       # %bb.0:
4114 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4115 ; X64-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f]
4116 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4117 ; X64-NEXT:    retq # encoding: [0xc3]
4118   %q = load i64, i64* %ptr_b
4119   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
4120   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
4121   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
4122   ret <4 x i64> %res
4123 }
4124
4125 define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
4126 ; X86-LABEL: test_mask_andnot_epi64_rmbkz_256:
4127 ; X86:       # %bb.0:
4128 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4129 ; X86-NEXT:    vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
4130 ; X86-NEXT:    # xmm1 = mem[0],zero
4131 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
4132 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4133 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4134 ; X86-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
4135 ; X86-NEXT:    retl # encoding: [0xc3]
4136 ;
4137 ; X64-LABEL: test_mask_andnot_epi64_rmbkz_256:
4138 ; X64:       # %bb.0:
4139 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4140 ; X64-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07]
4141 ; X64-NEXT:    retq # encoding: [0xc3]
4142   %q = load i64, i64* %ptr_b
4143   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
4144   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
4145   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
4146   ret <4 x i64> %res
4147 }
4148
4149 declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
4150
4151 define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
4152 ; CHECK-LABEL: test_mask_add_epi32_rr_128:
4153 ; CHECK:       # %bb.0:
4154 ; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
4155 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4156   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4157   ret <4 x i32> %res
4158 }
4159
4160 define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
4161 ; X86-LABEL: test_mask_add_epi32_rrk_128:
4162 ; X86:       # %bb.0:
4163 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4164 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4165 ; X86-NEXT:    vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
4166 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4167 ; X86-NEXT:    retl # encoding: [0xc3]
4168 ;
4169 ; X64-LABEL: test_mask_add_epi32_rrk_128:
4170 ; X64:       # %bb.0:
4171 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4172 ; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
4173 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4174 ; X64-NEXT:    retq # encoding: [0xc3]
4175   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4176   ret <4 x i32> %res
4177 }
4178
4179 define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
4180 ; X86-LABEL: test_mask_add_epi32_rrkz_128:
4181 ; X86:       # %bb.0:
4182 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4183 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4184 ; X86-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
4185 ; X86-NEXT:    retl # encoding: [0xc3]
4186 ;
4187 ; X64-LABEL: test_mask_add_epi32_rrkz_128:
4188 ; X64:       # %bb.0:
4189 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4190 ; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
4191 ; X64-NEXT:    retq # encoding: [0xc3]
4192   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4193   ret <4 x i32> %res
4194 }
4195
4196 define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
4197 ; X86-LABEL: test_mask_add_epi32_rm_128:
4198 ; X86:       # %bb.0:
4199 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4200 ; X86-NEXT:    vpaddd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x00]
4201 ; X86-NEXT:    retl # encoding: [0xc3]
4202 ;
4203 ; X64-LABEL: test_mask_add_epi32_rm_128:
4204 ; X64:       # %bb.0:
4205 ; X64-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x07]
4206 ; X64-NEXT:    retq # encoding: [0xc3]
4207   %b = load <4 x i32>, <4 x i32>* %ptr_b
4208   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4209   ret <4 x i32> %res
4210 }
4211
4212 define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4213 ; X86-LABEL: test_mask_add_epi32_rmk_128:
4214 ; X86:       # %bb.0:
4215 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4216 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4217 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4218 ; X86-NEXT:    vpaddd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x08]
4219 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4220 ; X86-NEXT:    retl # encoding: [0xc3]
4221 ;
4222 ; X64-LABEL: test_mask_add_epi32_rmk_128:
4223 ; X64:       # %bb.0:
4224 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4225 ; X64-NEXT:    vpaddd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f]
4226 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4227 ; X64-NEXT:    retq # encoding: [0xc3]
4228   %b = load <4 x i32>, <4 x i32>* %ptr_b
4229   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4230   ret <4 x i32> %res
4231 }
4232
4233 define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
4234 ; X86-LABEL: test_mask_add_epi32_rmkz_128:
4235 ; X86:       # %bb.0:
4236 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4237 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4238 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4239 ; X86-NEXT:    vpaddd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x00]
4240 ; X86-NEXT:    retl # encoding: [0xc3]
4241 ;
4242 ; X64-LABEL: test_mask_add_epi32_rmkz_128:
4243 ; X64:       # %bb.0:
4244 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4245 ; X64-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
4246 ; X64-NEXT:    retq # encoding: [0xc3]
4247   %b = load <4 x i32>, <4 x i32>* %ptr_b
4248   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4249   ret <4 x i32> %res
4250 }
4251
4252 define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
4253 ; X86-LABEL: test_mask_add_epi32_rmb_128:
4254 ; X86:       # %bb.0:
4255 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4256 ; X86-NEXT:    vpaddd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x00]
4257 ; X86-NEXT:    retl # encoding: [0xc3]
4258 ;
4259 ; X64-LABEL: test_mask_add_epi32_rmb_128:
4260 ; X64:       # %bb.0:
4261 ; X64-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07]
4262 ; X64-NEXT:    retq # encoding: [0xc3]
4263   %q = load i32, i32* %ptr_b
4264   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4265   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4266   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4267   ret <4 x i32> %res
4268 }
4269
4270 define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4271 ; X86-LABEL: test_mask_add_epi32_rmbk_128:
4272 ; X86:       # %bb.0:
4273 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4274 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4275 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4276 ; X86-NEXT:    vpaddd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x08]
4277 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4278 ; X86-NEXT:    retl # encoding: [0xc3]
4279 ;
4280 ; X64-LABEL: test_mask_add_epi32_rmbk_128:
4281 ; X64:       # %bb.0:
4282 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4283 ; X64-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f]
4284 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4285 ; X64-NEXT:    retq # encoding: [0xc3]
4286   %q = load i32, i32* %ptr_b
4287   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4288   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4289   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4290   ret <4 x i32> %res
4291 }
4292
4293 define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
4294 ; X86-LABEL: test_mask_add_epi32_rmbkz_128:
4295 ; X86:       # %bb.0:
4296 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4297 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4298 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4299 ; X86-NEXT:    vpaddd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x00]
4300 ; X86-NEXT:    retl # encoding: [0xc3]
4301 ;
4302 ; X64-LABEL: test_mask_add_epi32_rmbkz_128:
4303 ; X64:       # %bb.0:
4304 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4305 ; X64-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07]
4306 ; X64-NEXT:    retq # encoding: [0xc3]
4307   %q = load i32, i32* %ptr_b
4308   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4309   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4310   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4311   ret <4 x i32> %res
4312 }
4313
4314 declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
4315
4316 define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
4317 ; CHECK-LABEL: test_mask_sub_epi32_rr_128:
4318 ; CHECK:       # %bb.0:
4319 ; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1]
4320 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4321   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4322   ret <4 x i32> %res
4323 }
4324
4325 define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
4326 ; X86-LABEL: test_mask_sub_epi32_rrk_128:
4327 ; X86:       # %bb.0:
4328 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4329 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4330 ; X86-NEXT:    vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
4331 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4332 ; X86-NEXT:    retl # encoding: [0xc3]
4333 ;
4334 ; X64-LABEL: test_mask_sub_epi32_rrk_128:
4335 ; X64:       # %bb.0:
4336 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4337 ; X64-NEXT:    vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
4338 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4339 ; X64-NEXT:    retq # encoding: [0xc3]
4340   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4341   ret <4 x i32> %res
4342 }
4343
4344 define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
4345 ; X86-LABEL: test_mask_sub_epi32_rrkz_128:
4346 ; X86:       # %bb.0:
4347 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4348 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4349 ; X86-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
4350 ; X86-NEXT:    retl # encoding: [0xc3]
4351 ;
4352 ; X64-LABEL: test_mask_sub_epi32_rrkz_128:
4353 ; X64:       # %bb.0:
4354 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4355 ; X64-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
4356 ; X64-NEXT:    retq # encoding: [0xc3]
4357   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4358   ret <4 x i32> %res
4359 }
4360
4361 define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
4362 ; X86-LABEL: test_mask_sub_epi32_rm_128:
4363 ; X86:       # %bb.0:
4364 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4365 ; X86-NEXT:    vpsubd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x00]
4366 ; X86-NEXT:    retl # encoding: [0xc3]
4367 ;
4368 ; X64-LABEL: test_mask_sub_epi32_rm_128:
4369 ; X64:       # %bb.0:
4370 ; X64-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x07]
4371 ; X64-NEXT:    retq # encoding: [0xc3]
4372   %b = load <4 x i32>, <4 x i32>* %ptr_b
4373   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4374   ret <4 x i32> %res
4375 }
4376
4377 define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4378 ; X86-LABEL: test_mask_sub_epi32_rmk_128:
4379 ; X86:       # %bb.0:
4380 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4381 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4382 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4383 ; X86-NEXT:    vpsubd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x08]
4384 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4385 ; X86-NEXT:    retl # encoding: [0xc3]
4386 ;
4387 ; X64-LABEL: test_mask_sub_epi32_rmk_128:
4388 ; X64:       # %bb.0:
4389 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4390 ; X64-NEXT:    vpsubd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f]
4391 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4392 ; X64-NEXT:    retq # encoding: [0xc3]
4393   %b = load <4 x i32>, <4 x i32>* %ptr_b
4394   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4395   ret <4 x i32> %res
4396 }
4397
4398 define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
4399 ; X86-LABEL: test_mask_sub_epi32_rmkz_128:
4400 ; X86:       # %bb.0:
4401 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4402 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4403 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4404 ; X86-NEXT:    vpsubd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x00]
4405 ; X86-NEXT:    retl # encoding: [0xc3]
4406 ;
4407 ; X64-LABEL: test_mask_sub_epi32_rmkz_128:
4408 ; X64:       # %bb.0:
4409 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4410 ; X64-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07]
4411 ; X64-NEXT:    retq # encoding: [0xc3]
4412   %b = load <4 x i32>, <4 x i32>* %ptr_b
4413   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4414   ret <4 x i32> %res
4415 }
4416
4417 define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
4418 ; X86-LABEL: test_mask_sub_epi32_rmb_128:
4419 ; X86:       # %bb.0:
4420 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4421 ; X86-NEXT:    vpsubd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x00]
4422 ; X86-NEXT:    retl # encoding: [0xc3]
4423 ;
4424 ; X64-LABEL: test_mask_sub_epi32_rmb_128:
4425 ; X64:       # %bb.0:
4426 ; X64-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07]
4427 ; X64-NEXT:    retq # encoding: [0xc3]
4428   %q = load i32, i32* %ptr_b
4429   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4430   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4431   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4432   ret <4 x i32> %res
4433 }
4434
4435 define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4436 ; X86-LABEL: test_mask_sub_epi32_rmbk_128:
4437 ; X86:       # %bb.0:
4438 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4439 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4440 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4441 ; X86-NEXT:    vpsubd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x08]
4442 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4443 ; X86-NEXT:    retl # encoding: [0xc3]
4444 ;
4445 ; X64-LABEL: test_mask_sub_epi32_rmbk_128:
4446 ; X64:       # %bb.0:
4447 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4448 ; X64-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f]
4449 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4450 ; X64-NEXT:    retq # encoding: [0xc3]
4451   %q = load i32, i32* %ptr_b
4452   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4453   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4454   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4455   ret <4 x i32> %res
4456 }
4457
4458 define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
4459 ; X86-LABEL: test_mask_sub_epi32_rmbkz_128:
4460 ; X86:       # %bb.0:
4461 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4462 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4463 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4464 ; X86-NEXT:    vpsubd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x00]
4465 ; X86-NEXT:    retl # encoding: [0xc3]
4466 ;
4467 ; X64-LABEL: test_mask_sub_epi32_rmbkz_128:
4468 ; X64:       # %bb.0:
4469 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4470 ; X64-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07]
4471 ; X64-NEXT:    retq # encoding: [0xc3]
4472   %q = load i32, i32* %ptr_b
4473   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4474   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4475   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4476   ret <4 x i32> %res
4477 }
4478
4479 declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
4480
4481 define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
4482 ; CHECK-LABEL: test_mask_sub_epi32_rr_256:
4483 ; CHECK:       # %bb.0:
4484 ; CHECK-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1]
4485 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4486   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4487   ret <8 x i32> %res
4488 }
4489
4490 define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
4491 ; X86-LABEL: test_mask_sub_epi32_rrk_256:
4492 ; X86:       # %bb.0:
4493 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4494 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4495 ; X86-NEXT:    vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
4496 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4497 ; X86-NEXT:    retl # encoding: [0xc3]
4498 ;
4499 ; X64-LABEL: test_mask_sub_epi32_rrk_256:
4500 ; X64:       # %bb.0:
4501 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4502 ; X64-NEXT:    vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
4503 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4504 ; X64-NEXT:    retq # encoding: [0xc3]
4505   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4506   ret <8 x i32> %res
4507 }
4508
4509 define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
4510 ; X86-LABEL: test_mask_sub_epi32_rrkz_256:
4511 ; X86:       # %bb.0:
4512 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4513 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4514 ; X86-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
4515 ; X86-NEXT:    retl # encoding: [0xc3]
4516 ;
4517 ; X64-LABEL: test_mask_sub_epi32_rrkz_256:
4518 ; X64:       # %bb.0:
4519 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4520 ; X64-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
4521 ; X64-NEXT:    retq # encoding: [0xc3]
4522   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4523   ret <8 x i32> %res
4524 }
4525
4526 define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
4527 ; X86-LABEL: test_mask_sub_epi32_rm_256:
4528 ; X86:       # %bb.0:
4529 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4530 ; X86-NEXT:    vpsubd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x00]
4531 ; X86-NEXT:    retl # encoding: [0xc3]
4532 ;
4533 ; X64-LABEL: test_mask_sub_epi32_rm_256:
4534 ; X64:       # %bb.0:
4535 ; X64-NEXT:    vpsubd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x07]
4536 ; X64-NEXT:    retq # encoding: [0xc3]
4537   %b = load <8 x i32>, <8 x i32>* %ptr_b
4538   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4539   ret <8 x i32> %res
4540 }
4541
4542 define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4543 ; X86-LABEL: test_mask_sub_epi32_rmk_256:
4544 ; X86:       # %bb.0:
4545 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4546 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4547 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4548 ; X86-NEXT:    vpsubd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x08]
4549 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4550 ; X86-NEXT:    retl # encoding: [0xc3]
4551 ;
4552 ; X64-LABEL: test_mask_sub_epi32_rmk_256:
4553 ; X64:       # %bb.0:
4554 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4555 ; X64-NEXT:    vpsubd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f]
4556 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4557 ; X64-NEXT:    retq # encoding: [0xc3]
4558   %b = load <8 x i32>, <8 x i32>* %ptr_b
4559   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4560   ret <8 x i32> %res
4561 }
4562
4563 define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
4564 ; X86-LABEL: test_mask_sub_epi32_rmkz_256:
4565 ; X86:       # %bb.0:
4566 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4567 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4568 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4569 ; X86-NEXT:    vpsubd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x00]
4570 ; X86-NEXT:    retl # encoding: [0xc3]
4571 ;
4572 ; X64-LABEL: test_mask_sub_epi32_rmkz_256:
4573 ; X64:       # %bb.0:
4574 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4575 ; X64-NEXT:    vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07]
4576 ; X64-NEXT:    retq # encoding: [0xc3]
4577   %b = load <8 x i32>, <8 x i32>* %ptr_b
4578   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4579   ret <8 x i32> %res
4580 }
4581
4582 define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
4583 ; X86-LABEL: test_mask_sub_epi32_rmb_256:
4584 ; X86:       # %bb.0:
4585 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4586 ; X86-NEXT:    vpsubd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x00]
4587 ; X86-NEXT:    retl # encoding: [0xc3]
4588 ;
4589 ; X64-LABEL: test_mask_sub_epi32_rmb_256:
4590 ; X64:       # %bb.0:
4591 ; X64-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07]
4592 ; X64-NEXT:    retq # encoding: [0xc3]
4593   %q = load i32, i32* %ptr_b
4594   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4595   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4596   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4597   ret <8 x i32> %res
4598 }
4599
4600 define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4601 ; X86-LABEL: test_mask_sub_epi32_rmbk_256:
4602 ; X86:       # %bb.0:
4603 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4604 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4605 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4606 ; X86-NEXT:    vpsubd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x08]
4607 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4608 ; X86-NEXT:    retl # encoding: [0xc3]
4609 ;
4610 ; X64-LABEL: test_mask_sub_epi32_rmbk_256:
4611 ; X64:       # %bb.0:
4612 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4613 ; X64-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f]
4614 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4615 ; X64-NEXT:    retq # encoding: [0xc3]
4616   %q = load i32, i32* %ptr_b
4617   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4618   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4619   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4620   ret <8 x i32> %res
4621 }
4622
4623 define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
4624 ; X86-LABEL: test_mask_sub_epi32_rmbkz_256:
4625 ; X86:       # %bb.0:
4626 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4627 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4628 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4629 ; X86-NEXT:    vpsubd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x00]
4630 ; X86-NEXT:    retl # encoding: [0xc3]
4631 ;
4632 ; X64-LABEL: test_mask_sub_epi32_rmbkz_256:
4633 ; X64:       # %bb.0:
4634 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4635 ; X64-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07]
4636 ; X64-NEXT:    retq # encoding: [0xc3]
4637   %q = load i32, i32* %ptr_b
4638   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4639   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4640   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4641   ret <8 x i32> %res
4642 }
4643
4644 declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
4645
4646 define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
4647 ; CHECK-LABEL: test_mask_add_epi32_rr_256:
4648 ; CHECK:       # %bb.0:
4649 ; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
4650 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4651   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4652   ret <8 x i32> %res
4653 }
4654
4655 define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
4656 ; X86-LABEL: test_mask_add_epi32_rrk_256:
4657 ; X86:       # %bb.0:
4658 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4659 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4660 ; X86-NEXT:    vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
4661 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4662 ; X86-NEXT:    retl # encoding: [0xc3]
4663 ;
4664 ; X64-LABEL: test_mask_add_epi32_rrk_256:
4665 ; X64:       # %bb.0:
4666 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4667 ; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
4668 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4669 ; X64-NEXT:    retq # encoding: [0xc3]
4670   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4671   ret <8 x i32> %res
4672 }
4673
4674 define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
4675 ; X86-LABEL: test_mask_add_epi32_rrkz_256:
4676 ; X86:       # %bb.0:
4677 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4678 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4679 ; X86-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
4680 ; X86-NEXT:    retl # encoding: [0xc3]
4681 ;
4682 ; X64-LABEL: test_mask_add_epi32_rrkz_256:
4683 ; X64:       # %bb.0:
4684 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4685 ; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
4686 ; X64-NEXT:    retq # encoding: [0xc3]
4687   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4688   ret <8 x i32> %res
4689 }
4690
4691 define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
4692 ; X86-LABEL: test_mask_add_epi32_rm_256:
4693 ; X86:       # %bb.0:
4694 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4695 ; X86-NEXT:    vpaddd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x00]
4696 ; X86-NEXT:    retl # encoding: [0xc3]
4697 ;
4698 ; X64-LABEL: test_mask_add_epi32_rm_256:
4699 ; X64:       # %bb.0:
4700 ; X64-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x07]
4701 ; X64-NEXT:    retq # encoding: [0xc3]
4702   %b = load <8 x i32>, <8 x i32>* %ptr_b
4703   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4704   ret <8 x i32> %res
4705 }
4706
4707 define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4708 ; X86-LABEL: test_mask_add_epi32_rmk_256:
4709 ; X86:       # %bb.0:
4710 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4711 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4712 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4713 ; X86-NEXT:    vpaddd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x08]
4714 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4715 ; X86-NEXT:    retl # encoding: [0xc3]
4716 ;
4717 ; X64-LABEL: test_mask_add_epi32_rmk_256:
4718 ; X64:       # %bb.0:
4719 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4720 ; X64-NEXT:    vpaddd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f]
4721 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4722 ; X64-NEXT:    retq # encoding: [0xc3]
4723   %b = load <8 x i32>, <8 x i32>* %ptr_b
4724   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4725   ret <8 x i32> %res
4726 }
4727
4728 define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
4729 ; X86-LABEL: test_mask_add_epi32_rmkz_256:
4730 ; X86:       # %bb.0:
4731 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4732 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4733 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4734 ; X86-NEXT:    vpaddd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x00]
4735 ; X86-NEXT:    retl # encoding: [0xc3]
4736 ;
4737 ; X64-LABEL: test_mask_add_epi32_rmkz_256:
4738 ; X64:       # %bb.0:
4739 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4740 ; X64-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
4741 ; X64-NEXT:    retq # encoding: [0xc3]
4742   %b = load <8 x i32>, <8 x i32>* %ptr_b
4743   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4744   ret <8 x i32> %res
4745 }
4746
4747 define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
4748 ; X86-LABEL: test_mask_add_epi32_rmb_256:
4749 ; X86:       # %bb.0:
4750 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4751 ; X86-NEXT:    vpaddd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x00]
4752 ; X86-NEXT:    retl # encoding: [0xc3]
4753 ;
4754 ; X64-LABEL: test_mask_add_epi32_rmb_256:
4755 ; X64:       # %bb.0:
4756 ; X64-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07]
4757 ; X64-NEXT:    retq # encoding: [0xc3]
4758   %q = load i32, i32* %ptr_b
4759   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4760   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4761   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4762   ret <8 x i32> %res
4763 }
4764
4765 define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4766 ; X86-LABEL: test_mask_add_epi32_rmbk_256:
4767 ; X86:       # %bb.0:
4768 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4769 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4770 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4771 ; X86-NEXT:    vpaddd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x08]
4772 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4773 ; X86-NEXT:    retl # encoding: [0xc3]
4774 ;
4775 ; X64-LABEL: test_mask_add_epi32_rmbk_256:
4776 ; X64:       # %bb.0:
4777 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4778 ; X64-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f]
4779 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4780 ; X64-NEXT:    retq # encoding: [0xc3]
4781   %q = load i32, i32* %ptr_b
4782   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4783   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4784   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4785   ret <8 x i32> %res
4786 }
4787
4788 define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
4789 ; X86-LABEL: test_mask_add_epi32_rmbkz_256:
4790 ; X86:       # %bb.0:
4791 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4792 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4793 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4794 ; X86-NEXT:    vpaddd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x00]
4795 ; X86-NEXT:    retl # encoding: [0xc3]
4796 ;
4797 ; X64-LABEL: test_mask_add_epi32_rmbkz_256:
4798 ; X64:       # %bb.0:
4799 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4800 ; X64-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07]
4801 ; X64-NEXT:    retq # encoding: [0xc3]
4802   %q = load i32, i32* %ptr_b
4803   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4804   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4805   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4806   ret <8 x i32> %res
4807 }
4808
4809 declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
4810
4811 define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4812 ; X86-LABEL: test_mm512_maskz_add_ps_256:
4813 ; X86:       # %bb.0:
4814 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4815 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4816 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1]
4817 ; X86-NEXT:    retl # encoding: [0xc3]
4818 ;
4819 ; X64-LABEL: test_mm512_maskz_add_ps_256:
4820 ; X64:       # %bb.0:
4821 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4822 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1]
4823 ; X64-NEXT:    retq # encoding: [0xc3]
4824   %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
4825   ret <8 x float> %res
4826 }
4827
4828 define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
4829 ; X86-LABEL: test_mm512_mask_add_ps_256:
4830 ; X86:       # %bb.0:
4831 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4832 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4833 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1]
4834 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
4835 ; X86-NEXT:    retl # encoding: [0xc3]
4836 ;
4837 ; X64-LABEL: test_mm512_mask_add_ps_256:
4838 ; X64:       # %bb.0:
4839 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4840 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1]
4841 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
4842 ; X64-NEXT:    retq # encoding: [0xc3]
4843   %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
4844   ret <8 x float> %res
4845 }
4846
4847 define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4848 ; CHECK-LABEL: test_mm512_add_ps_256:
4849 ; CHECK:       # %bb.0:
4850 ; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
4851 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4852   %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
4853   ret <8 x float> %res
4854 }
4855 declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
4856
4857 define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4858 ; X86-LABEL: test_mm512_maskz_add_ps_128:
4859 ; X86:       # %bb.0:
4860 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4861 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4862 ; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1]
4863 ; X86-NEXT:    retl # encoding: [0xc3]
4864 ;
4865 ; X64-LABEL: test_mm512_maskz_add_ps_128:
4866 ; X64:       # %bb.0:
4867 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4868 ; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1]
4869 ; X64-NEXT:    retq # encoding: [0xc3]
4870   %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
4871   ret <4 x float> %res
4872 }
4873
4874 define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
4875 ; X86-LABEL: test_mm512_mask_add_ps_128:
4876 ; X86:       # %bb.0:
4877 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4878 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4879 ; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1]
4880 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
4881 ; X86-NEXT:    retl # encoding: [0xc3]
4882 ;
4883 ; X64-LABEL: test_mm512_mask_add_ps_128:
4884 ; X64:       # %bb.0:
4885 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4886 ; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1]
4887 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
4888 ; X64-NEXT:    retq # encoding: [0xc3]
4889   %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
4890   ret <4 x float> %res
4891 }
4892
4893 define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4894 ; CHECK-LABEL: test_mm512_add_ps_128:
4895 ; CHECK:       # %bb.0:
4896 ; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
4897 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4898   %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
4899   ret <4 x float> %res
4900 }
4901 declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
4902
4903 define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4904 ; X86-LABEL: test_mm512_maskz_sub_ps_256:
4905 ; X86:       # %bb.0:
4906 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4907 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4908 ; X86-NEXT:    vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1]
4909 ; X86-NEXT:    retl # encoding: [0xc3]
4910 ;
4911 ; X64-LABEL: test_mm512_maskz_sub_ps_256:
4912 ; X64:       # %bb.0:
4913 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4914 ; X64-NEXT:    vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1]
4915 ; X64-NEXT:    retq # encoding: [0xc3]
4916   %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
4917   ret <8 x float> %res
4918 }
4919
4920 define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
4921 ; X86-LABEL: test_mm512_mask_sub_ps_256:
4922 ; X86:       # %bb.0:
4923 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4924 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4925 ; X86-NEXT:    vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1]
4926 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
4927 ; X86-NEXT:    retl # encoding: [0xc3]
4928 ;
4929 ; X64-LABEL: test_mm512_mask_sub_ps_256:
4930 ; X64:       # %bb.0:
4931 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4932 ; X64-NEXT:    vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1]
4933 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
4934 ; X64-NEXT:    retq # encoding: [0xc3]
4935   %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
4936   ret <8 x float> %res
4937 }
4938
4939 define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4940 ; CHECK-LABEL: test_mm512_sub_ps_256:
4941 ; CHECK:       # %bb.0:
4942 ; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5c,0xc1]
4943 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4944   %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
4945   ret <8 x float> %res
4946 }
4947 declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
4948
4949 define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4950 ; X86-LABEL: test_mm512_maskz_sub_ps_128:
4951 ; X86:       # %bb.0:
4952 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4953 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4954 ; X86-NEXT:    vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1]
4955 ; X86-NEXT:    retl # encoding: [0xc3]
4956 ;
4957 ; X64-LABEL: test_mm512_maskz_sub_ps_128:
4958 ; X64:       # %bb.0:
4959 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4960 ; X64-NEXT:    vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1]
4961 ; X64-NEXT:    retq # encoding: [0xc3]
4962   %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
4963   ret <4 x float> %res
4964 }
4965
4966 define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
4967 ; X86-LABEL: test_mm512_mask_sub_ps_128:
4968 ; X86:       # %bb.0:
4969 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4970 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4971 ; X86-NEXT:    vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1]
4972 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
4973 ; X86-NEXT:    retl # encoding: [0xc3]
4974 ;
4975 ; X64-LABEL: test_mm512_mask_sub_ps_128:
4976 ; X64:       # %bb.0:
4977 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4978 ; X64-NEXT:    vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1]
4979 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
4980 ; X64-NEXT:    retq # encoding: [0xc3]
4981   %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
4982   ret <4 x float> %res
4983 }
4984
4985 define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4986 ; CHECK-LABEL: test_mm512_sub_ps_128:
4987 ; CHECK:       # %bb.0:
4988 ; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1]
4989 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4990   %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
4991   ret <4 x float> %res
4992 }
4993 declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
4994
4995 define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4996 ; X86-LABEL: test_mm512_maskz_mul_ps_256:
4997 ; X86:       # %bb.0:
4998 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4999 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5000 ; X86-NEXT:    vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1]
5001 ; X86-NEXT:    retl # encoding: [0xc3]
5002 ;
5003 ; X64-LABEL: test_mm512_maskz_mul_ps_256:
5004 ; X64:       # %bb.0:
5005 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5006 ; X64-NEXT:    vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1]
5007 ; X64-NEXT:    retq # encoding: [0xc3]
5008   %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
5009   ret <8 x float> %res
5010 }
5011
5012 define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
5013 ; X86-LABEL: test_mm512_mask_mul_ps_256:
5014 ; X86:       # %bb.0:
5015 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5016 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5017 ; X86-NEXT:    vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1]
5018 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5019 ; X86-NEXT:    retl # encoding: [0xc3]
5020 ;
5021 ; X64-LABEL: test_mm512_mask_mul_ps_256:
5022 ; X64:       # %bb.0:
5023 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5024 ; X64-NEXT:    vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1]
5025 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5026 ; X64-NEXT:    retq # encoding: [0xc3]
5027   %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
5028   ret <8 x float> %res
5029 }
5030
5031 define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5032 ; CHECK-LABEL: test_mm512_mul_ps_256:
5033 ; CHECK:       # %bb.0:
5034 ; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x59,0xc1]
5035 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5036   %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
5037   ret <8 x float> %res
5038 }
5039 declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
5040
5041 define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5042 ; X86-LABEL: test_mm512_maskz_mul_ps_128:
5043 ; X86:       # %bb.0:
5044 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5045 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5046 ; X86-NEXT:    vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1]
5047 ; X86-NEXT:    retl # encoding: [0xc3]
5048 ;
5049 ; X64-LABEL: test_mm512_maskz_mul_ps_128:
5050 ; X64:       # %bb.0:
5051 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5052 ; X64-NEXT:    vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1]
5053 ; X64-NEXT:    retq # encoding: [0xc3]
5054   %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
5055   ret <4 x float> %res
5056 }
5057
5058 define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
5059 ; X86-LABEL: test_mm512_mask_mul_ps_128:
5060 ; X86:       # %bb.0:
5061 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5062 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5063 ; X86-NEXT:    vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1]
5064 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5065 ; X86-NEXT:    retl # encoding: [0xc3]
5066 ;
5067 ; X64-LABEL: test_mm512_mask_mul_ps_128:
5068 ; X64:       # %bb.0:
5069 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5070 ; X64-NEXT:    vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1]
5071 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5072 ; X64-NEXT:    retq # encoding: [0xc3]
5073   %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
5074   ret <4 x float> %res
5075 }
5076
5077 define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5078 ; CHECK-LABEL: test_mm512_mul_ps_128:
5079 ; CHECK:       # %bb.0:
5080 ; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1]
5081 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5082   %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
5083   ret <4 x float> %res
5084 }
5085 declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
5086
5087 define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5088 ; X86-LABEL: test_mm512_maskz_div_ps_256:
5089 ; X86:       # %bb.0:
5090 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5091 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5092 ; X86-NEXT:    vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1]
5093 ; X86-NEXT:    retl # encoding: [0xc3]
5094 ;
5095 ; X64-LABEL: test_mm512_maskz_div_ps_256:
5096 ; X64:       # %bb.0:
5097 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5098 ; X64-NEXT:    vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1]
5099 ; X64-NEXT:    retq # encoding: [0xc3]
5100   %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
5101   ret <8 x float> %res
5102 }
5103
5104 define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
5105 ; X86-LABEL: test_mm512_mask_div_ps_256:
5106 ; X86:       # %bb.0:
5107 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5108 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5109 ; X86-NEXT:    vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1]
5110 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5111 ; X86-NEXT:    retl # encoding: [0xc3]
5112 ;
5113 ; X64-LABEL: test_mm512_mask_div_ps_256:
5114 ; X64:       # %bb.0:
5115 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5116 ; X64-NEXT:    vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1]
5117 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5118 ; X64-NEXT:    retq # encoding: [0xc3]
5119   %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
5120   ret <8 x float> %res
5121 }
5122
5123 define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5124 ; CHECK-LABEL: test_mm512_div_ps_256:
5125 ; CHECK:       # %bb.0:
5126 ; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5e,0xc1]
5127 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5128   %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
5129   ret <8 x float> %res
5130 }
5131 declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
5132
5133 define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5134 ; X86-LABEL: test_mm512_maskz_div_ps_128:
5135 ; X86:       # %bb.0:
5136 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5137 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5138 ; X86-NEXT:    vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1]
5139 ; X86-NEXT:    retl # encoding: [0xc3]
5140 ;
5141 ; X64-LABEL: test_mm512_maskz_div_ps_128:
5142 ; X64:       # %bb.0:
5143 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5144 ; X64-NEXT:    vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1]
5145 ; X64-NEXT:    retq # encoding: [0xc3]
5146   %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
5147   ret <4 x float> %res
5148 }
5149
5150 define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
5151 ; X86-LABEL: test_mm512_mask_div_ps_128:
5152 ; X86:       # %bb.0:
5153 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5154 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5155 ; X86-NEXT:    vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1]
5156 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5157 ; X86-NEXT:    retl # encoding: [0xc3]
5158 ;
5159 ; X64-LABEL: test_mm512_mask_div_ps_128:
5160 ; X64:       # %bb.0:
5161 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5162 ; X64-NEXT:    vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1]
5163 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5164 ; X64-NEXT:    retq # encoding: [0xc3]
5165   %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
5166   ret <4 x float> %res
5167 }
5168
5169 define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5170 ; CHECK-LABEL: test_mm512_div_ps_128:
5171 ; CHECK:       # %bb.0:
5172 ; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1]
5173 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5174   %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
5175   ret <4 x float> %res
5176 }
5177 declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
5178
5179 declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
5180
5181 define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
5182 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
5183 ; X86:       # %bb.0:
5184 ; X86-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5185 ; X86-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5186 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5187 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5188 ; X86-NEXT:    vmovaps %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0xd0]
5189 ; X86-NEXT:    vmovaps %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc8]
5190 ; X86-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
5191 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
5192 ; X86-NEXT:    retl # encoding: [0xc3]
5193 ;
5194 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
5195 ; X64:       # %bb.0:
5196 ; X64-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5197 ; X64-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5198 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5199 ; X64-NEXT:    vmovaps %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0xd0]
5200 ; X64-NEXT:    vmovaps %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc8]
5201 ; X64-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
5202 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
5203 ; X64-NEXT:    retq # encoding: [0xc3]
5204     %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
5205     %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
5206     %res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4)
5207     %res3 = fadd <8 x float> %res, %res1
5208     %res4 = fadd <8 x float> %res2, %res3
5209     ret <8 x float> %res4
5210 }
5211
5212 declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
5213
5214 define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
5215 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
5216 ; X86:       # %bb.0:
5217 ; X86-NEXT:    vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c]
5218 ; X86-NEXT:    # ymm0 = ymm0[0,1],ymm1[2,3]
5219 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5220 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5221 ; X86-NEXT:    vmovapd %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0xd0]
5222 ; X86-NEXT:    vmovapd %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc8]
5223 ; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
5224 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
5225 ; X86-NEXT:    retl # encoding: [0xc3]
5226 ;
5227 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
5228 ; X64:       # %bb.0:
5229 ; X64-NEXT:    vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c]
5230 ; X64-NEXT:    # ymm0 = ymm0[0,1],ymm1[2,3]
5231 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5232 ; X64-NEXT:    vmovapd %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0xd0]
5233 ; X64-NEXT:    vmovapd %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc8]
5234 ; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
5235 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
5236 ; X64-NEXT:    retq # encoding: [0xc3]
5237     %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
5238     %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
5239     %res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4)
5240     %res3 = fadd <4 x double> %res, %res1
5241     %res4 = fadd <4 x double> %res2, %res3
5242     ret <4 x double> %res4
5243 }
5244
5245 declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
5246
5247 define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
5248 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
5249 ; X86:       # %bb.0:
5250 ; X86-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5251 ; X86-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5252 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5253 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5254 ; X86-NEXT:    vmovdqa32 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xd0]
5255 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
5256 ; X86-NEXT:    retl # encoding: [0xc3]
5257 ;
5258 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
5259 ; X64:       # %bb.0:
5260 ; X64-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5261 ; X64-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5262 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5263 ; X64-NEXT:    vmovdqa32 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xd0]
5264 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
5265 ; X64-NEXT:    retq # encoding: [0xc3]
5266     %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
5267     %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
5268     %res2 = add <8 x i32> %res, %res1
5269     ret <8 x i32> %res2
5270 }
5271
5272 declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
5273
5274 define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
5275 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
5276 ; X86:       # %bb.0:
5277 ; X86-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5278 ; X86-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5279 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5280 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5281 ; X86-NEXT:    vmovdqa64 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xd0]
5282 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5283 ; X86-NEXT:    retl # encoding: [0xc3]
5284 ;
5285 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
5286 ; X64:       # %bb.0:
5287 ; X64-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5288 ; X64-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5289 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5290 ; X64-NEXT:    vmovdqa64 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xd0]
5291 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5292 ; X64-NEXT:    retq # encoding: [0xc3]
5293     %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
5294     %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
5295     %res2 = add <4 x i64> %res, %res1
5296     ret <4 x i64> %res2
5297 }
5298
5299 declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
5300
5301 define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
5302 ; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
5303 ; X86:       # %bb.0:
5304 ; X86-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xd9,0x01]
5305 ; X86-NEXT:    # xmm3 = xmm0[1],xmm1[0]
5306 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5307 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5308 ; X86-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01]
5309 ; X86-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[0]
5310 ; X86-NEXT:    vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3]
5311 ; X86-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01]
5312 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[1],xmm1[0]
5313 ; X86-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
5314 ; X86-NEXT:    retl # encoding: [0xc3]
5315 ;
5316 ; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
5317 ; X64:       # %bb.0:
5318 ; X64-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xd9,0x01]
5319 ; X64-NEXT:    # xmm3 = xmm0[1],xmm1[0]
5320 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5321 ; X64-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01]
5322 ; X64-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[0]
5323 ; X64-NEXT:    vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3]
5324 ; X64-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01]
5325 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[1],xmm1[0]
5326 ; X64-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
5327 ; X64-NEXT:    retq # encoding: [0xc3]
5328   %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 %x4)
5329   %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 -1)
5330   %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> zeroinitializer, i8 %x4)
5331   %res3 = fadd <2 x double> %res, %res1
5332   %res4 = fadd <2 x double> %res2, %res3
5333   ret <2 x double> %res4
5334 }
5335
5336 declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
5337
5338 define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
5339 ; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
5340 ; X86:       # %bb.0:
5341 ; X86-NEXT:    vshufpd $6, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xc6,0xd9,0x06]
5342 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5343 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5344 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5345 ; X86-NEXT:    vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06]
5346 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5347 ; X86-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
5348 ; X86-NEXT:    retl # encoding: [0xc3]
5349 ;
5350 ; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
5351 ; X64:       # %bb.0:
5352 ; X64-NEXT:    vshufpd $6, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xc6,0xd9,0x06]
5353 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5354 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5355 ; X64-NEXT:    vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06]
5356 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5357 ; X64-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
5358 ; X64-NEXT:    retq # encoding: [0xc3]
5359   %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 %x4)
5360   %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 -1)
5361   %res2 = fadd <4 x double> %res, %res1
5362   ret <4 x double> %res2
5363 }
5364
5365 declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
5366
5367 define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
5368 ; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
5369 ; X86:       # %bb.0:
5370 ; X86-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xd9,0x16]
5371 ; X86-NEXT:    # xmm3 = xmm0[2,1],xmm1[1,0]
5372 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5373 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5374 ; X86-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16]
5375 ; X86-NEXT:    # xmm2 {%k1} = xmm0[2,1],xmm1[1,0]
5376 ; X86-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
5377 ; X86-NEXT:    retl # encoding: [0xc3]
5378 ;
5379 ; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
5380 ; X64:       # %bb.0:
5381 ; X64-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xd9,0x16]
5382 ; X64-NEXT:    # xmm3 = xmm0[2,1],xmm1[1,0]
5383 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5384 ; X64-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16]
5385 ; X64-NEXT:    # xmm2 {%k1} = xmm0[2,1],xmm1[1,0]
5386 ; X64-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
5387 ; X64-NEXT:    retq # encoding: [0xc3]
5388   %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
5389   %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
5390   %res2 = fadd <4 x float> %res, %res1
5391   ret <4 x float> %res2
5392 }
5393
5394 declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
5395
5396 define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
5397 ; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
5398 ; X86:       # %bb.0:
5399 ; X86-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xd9,0x16]
5400 ; X86-NEXT:    # ymm3 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5401 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5402 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5403 ; X86-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16]
5404 ; X86-NEXT:    # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5405 ; X86-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
5406 ; X86-NEXT:    retl # encoding: [0xc3]
5407 ;
5408 ; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
5409 ; X64:       # %bb.0:
5410 ; X64-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xd9,0x16]
5411 ; X64-NEXT:    # ymm3 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5412 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5413 ; X64-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16]
5414 ; X64-NEXT:    # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5415 ; X64-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
5416 ; X64-NEXT:    retq # encoding: [0xc3]
5417   %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
5418   %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
5419   %res2 = fadd <8 x float> %res, %res1
5420   ret <8 x float> %res2
5421 }
5422
5423 declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5424
5425 define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
5426 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_128:
5427 ; X86:       # %bb.0:
5428 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5429 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5430 ; X86-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1]
5431 ; X86-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1]
5432 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5433 ; X86-NEXT:    retl # encoding: [0xc3]
5434 ;
5435 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_128:
5436 ; X64:       # %bb.0:
5437 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5438 ; X64-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1]
5439 ; X64-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1]
5440 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5441 ; X64-NEXT:    retq # encoding: [0xc3]
5442   %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask)
5443   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
5444   %res2 = add <4 x i32> %res, %res1
5445   ret <4 x i32> %res2
5446 }
5447
5448 declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5449
5450 define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5451 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_256:
5452 ; X86:       # %bb.0:
5453 ; X86-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xd9]
5454 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5455 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5456 ; X86-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1]
5457 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5458 ; X86-NEXT:    retl # encoding: [0xc3]
5459 ;
5460 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_256:
5461 ; X64:       # %bb.0:
5462 ; X64-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xd9]
5463 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5464 ; X64-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1]
5465 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5466 ; X64-NEXT:    retq # encoding: [0xc3]
5467   %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
5468   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
5469   %res2 = add <8 x i32> %res, %res1
5470   ret <8 x i32> %res2
5471 }
5472
5473 declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5474
5475 define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5476 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_128:
5477 ; X86:       # %bb.0:
5478 ; X86-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xd9]
5479 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5480 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5481 ; X86-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1]
5482 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5483 ; X86-NEXT:    retl # encoding: [0xc3]
5484 ;
5485 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_128:
5486 ; X64:       # %bb.0:
5487 ; X64-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xd9]
5488 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5489 ; X64-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1]
5490 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5491 ; X64-NEXT:    retq # encoding: [0xc3]
5492   %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5493   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5494   %res2 = add <2 x i64> %res, %res1
5495   ret <2 x i64> %res2
5496 }
5497
5498 declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
5499
5500 define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
5501 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_256:
5502 ; X86:       # %bb.0:
5503 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5504 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5505 ; X86-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1]
5506 ; X86-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1]
5507 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5508 ; X86-NEXT:    retl # encoding: [0xc3]
5509 ;
5510 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_256:
5511 ; X64:       # %bb.0:
5512 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5513 ; X64-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1]
5514 ; X64-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1]
5515 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5516 ; X64-NEXT:    retq # encoding: [0xc3]
5517   %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
5518   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
5519   %res2 = add <4 x i64> %res, %res1
5520   ret <4 x i64> %res2
5521 }
5522
5523 declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5524
5525 define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) {
5526 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_128:
5527 ; X86:       # %bb.0:
5528 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5529 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5530 ; X86-NEXT:    vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1]
5531 ; X86-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1]
5532 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5533 ; X86-NEXT:    retl # encoding: [0xc3]
5534 ;
5535 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_128:
5536 ; X64:       # %bb.0:
5537 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5538 ; X64-NEXT:    vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1]
5539 ; X64-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1]
5540 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5541 ; X64-NEXT:    retq # encoding: [0xc3]
5542   %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
5543   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
5544   %res2 = add <4 x i32> %res, %res1
5545   ret <4 x i32> %res2
5546 }
5547
5548 declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5549
5550 define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5551 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_256:
5552 ; X86:       # %bb.0:
5553 ; X86-NEXT:    vpmaxud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xd9]
5554 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5555 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5556 ; X86-NEXT:    vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1]
5557 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5558 ; X86-NEXT:    retl # encoding: [0xc3]
5559 ;
5560 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_256:
5561 ; X64:       # %bb.0:
5562 ; X64-NEXT:    vpmaxud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xd9]
5563 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5564 ; X64-NEXT:    vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1]
5565 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5566 ; X64-NEXT:    retq # encoding: [0xc3]
5567   %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
5568   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
5569   %res2 = add <8 x i32> %res, %res1
5570   ret <8 x i32> %res2
5571 }
5572
5573 declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5574
5575 define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5576 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_128:
5577 ; X86:       # %bb.0:
5578 ; X86-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xd9]
5579 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5580 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5581 ; X86-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1]
5582 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5583 ; X86-NEXT:    retl # encoding: [0xc3]
5584 ;
5585 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_128:
5586 ; X64:       # %bb.0:
5587 ; X64-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xd9]
5588 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5589 ; X64-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1]
5590 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5591 ; X64-NEXT:    retq # encoding: [0xc3]
5592   %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5593   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5594   %res2 = add <2 x i64> %res, %res1
5595   ret <2 x i64> %res2
5596 }
5597
5598 declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
5599
5600 define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
5601 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_256:
5602 ; X86:       # %bb.0:
5603 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5604 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5605 ; X86-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1]
5606 ; X86-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1]
5607 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5608 ; X86-NEXT:    retl # encoding: [0xc3]
5609 ;
5610 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_256:
5611 ; X64:       # %bb.0:
5612 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5613 ; X64-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1]
5614 ; X64-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1]
5615 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5616 ; X64-NEXT:    retq # encoding: [0xc3]
5617   %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
5618   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
5619   %res2 = add <4 x i64> %res, %res1
5620   ret <4 x i64> %res2
5621 }
5622
5623 declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5624
5625 define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
5626 ; X86-LABEL: test_int_x86_avx512_mask_pmins_d_128:
5627 ; X86:       # %bb.0:
5628 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5629 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5630 ; X86-NEXT:    vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1]
5631 ; X86-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1]
5632 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5633 ; X86-NEXT:    retl # encoding: [0xc3]
5634 ;
5635 ; X64-LABEL: test_int_x86_avx512_mask_pmins_d_128:
5636 ; X64:       # %bb.0:
5637 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5638 ; X64-NEXT:    vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1]
5639 ; X64-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1]
5640 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5641 ; X64-NEXT:    retq # encoding: [0xc3]
5642   %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
5643   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
5644   %res2 = add <4 x i32> %res, %res1
5645   ret <4 x i32> %res2
5646 }
5647
5648 declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5649
5650 define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5651 ; X86-LABEL: test_int_x86_avx512_mask_pmins_d_256:
5652 ; X86:       # %bb.0:
5653 ; X86-NEXT:    vpminsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xd9]
5654 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5655 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5656 ; X86-NEXT:    vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1]
5657 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5658 ; X86-NEXT:    retl # encoding: [0xc3]
5659 ;
5660 ; X64-LABEL: test_int_x86_avx512_mask_pmins_d_256:
5661 ; X64:       # %bb.0:
5662 ; X64-NEXT:    vpminsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xd9]
5663 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5664 ; X64-NEXT:    vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1]
5665 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5666 ; X64-NEXT:    retq # encoding: [0xc3]
5667   %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
5668   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
5669   %res2 = add <8 x i32> %res, %res1
5670   ret <8 x i32> %res2
5671 }
5672
5673 declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5674
5675 define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5676 ; X86-LABEL: test_int_x86_avx512_mask_pmins_q_128:
5677 ; X86:       # %bb.0:
5678 ; X86-NEXT:    vpminsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x39,0xd9]
5679 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5680 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5681 ; X86-NEXT:    vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1]
5682 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5683 ; X86-NEXT:    retl # encoding: [0xc3]
5684 ;
5685 ; X64-LABEL: test_int_x86_avx512_mask_pmins_q_128:
5686 ; X64:       # %bb.0:
5687 ; X64-NEXT:    vpminsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x39,0xd9]
5688 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5689 ; X64-NEXT:    vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1]
5690 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5691 ; X64-NEXT:    retq # encoding: [0xc3]
5692   %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5693   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5694   %res2 = add <2 x i64> %res, %res1
5695   ret <2 x i64> %res2
5696 }
5697
5698 declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
5699
5700 define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
5701 ; X86-LABEL: test_int_x86_avx512_mask_pmins_q_256:
5702 ; X86:       # %bb.0:
5703 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5704 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5705 ; X86-NEXT:    vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1]
5706 ; X86-NEXT:    vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1]
5707 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5708 ; X86-NEXT:    retl # encoding: [0xc3]
5709 ;
5710 ; X64-LABEL: test_int_x86_avx512_mask_pmins_q_256:
5711 ; X64:       # %bb.0:
5712 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5713 ; X64-NEXT:    vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1]
5714 ; X64-NEXT:    vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1]
5715 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5716 ; X64-NEXT:    retq # encoding: [0xc3]
5717   %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
5718   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
5719   %res2 = add <4 x i64> %res, %res1
5720   ret <4 x i64> %res2
5721 }
5722
5723 declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5724
5725 define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
5726 ; X86-LABEL: test_int_x86_avx512_mask_pminu_d_128:
5727 ; X86:       # %bb.0:
5728 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5729 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5730 ; X86-NEXT:    vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1]
5731 ; X86-NEXT:    vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1]
5732 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5733 ; X86-NEXT:    retl # encoding: [0xc3]
5734 ;
5735 ; X64-LABEL: test_int_x86_avx512_mask_pminu_d_128:
5736 ; X64:       # %bb.0:
5737 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5738 ; X64-NEXT:    vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1]
5739 ; X64-NEXT:    vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1]
5740 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5741 ; X64-NEXT:    retq # encoding: [0xc3]
5742   %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
5743   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
5744   %res2 = add <4 x i32> %res, %res1
5745   ret <4 x i32> %res2
5746 }
5747
5748 declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5749
5750 define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5751 ; X86-LABEL: test_int_x86_avx512_mask_pminu_d_256:
5752 ; X86:       # %bb.0:
5753 ; X86-NEXT:    vpminud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xd9]
5754 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5755 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5756 ; X86-NEXT:    vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1]
5757 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5758 ; X86-NEXT:    retl # encoding: [0xc3]
5759 ;
5760 ; X64-LABEL: test_int_x86_avx512_mask_pminu_d_256:
5761 ; X64:       # %bb.0:
5762 ; X64-NEXT:    vpminud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xd9]
5763 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5764 ; X64-NEXT:    vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1]
5765 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5766 ; X64-NEXT:    retq # encoding: [0xc3]
5767   %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
5768   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
5769   %res2 = add <8 x i32> %res, %res1
5770   ret <8 x i32> %res2
5771 }
5772
5773 declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5774
5775 define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5776 ; X86-LABEL: test_int_x86_avx512_mask_pminu_q_128:
5777 ; X86:       # %bb.0:
5778 ; X86-NEXT:    vpminuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xd9]
5779 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5780 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5781 ; X86-NEXT:    vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1]
5782 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5783 ; X86-NEXT:    retl # encoding: [0xc3]
5784 ;
5785 ; X64-LABEL: test_int_x86_avx512_mask_pminu_q_128:
5786 ; X64:       # %bb.0:
5787 ; X64-NEXT:    vpminuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xd9]
5788 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5789 ; X64-NEXT:    vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1]
5790 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5791 ; X64-NEXT:    retq # encoding: [0xc3]
5792   %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5793   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5794   %res2 = add <2 x i64> %res, %res1
5795   ret <2 x i64> %res2
5796 }
5797
5798 declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
5799
5800 define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
5801 ; X86-LABEL: test_int_x86_avx512_mask_pminu_q_256:
5802 ; X86:       # %bb.0:
5803 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5804 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5805 ; X86-NEXT:    vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1]
5806 ; X86-NEXT:    vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1]
5807 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5808 ; X86-NEXT:    retl # encoding: [0xc3]
5809 ;
5810 ; X64-LABEL: test_int_x86_avx512_mask_pminu_q_256:
5811 ; X64:       # %bb.0:
5812 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5813 ; X64-NEXT:    vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1]
5814 ; X64-NEXT:    vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1]
5815 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5816 ; X64-NEXT:    retq # encoding: [0xc3]
5817   %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
5818   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
5819   %res2 = add <4 x i64> %res, %res1
5820   ret <4 x i64> %res2
5821 }
5822
5823 declare <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5824
5825 define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5826 ; X86-LABEL: test_int_x86_avx512_mask_psrl_q_128:
5827 ; X86:       # %bb.0:
5828 ; X86-NEXT:    vpsrlq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xd9]
5829 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5830 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5831 ; X86-NEXT:    vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1]
5832 ; X86-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1]
5833 ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
5834 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
5835 ; X86-NEXT:    retl # encoding: [0xc3]
5836 ;
5837 ; X64-LABEL: test_int_x86_avx512_mask_psrl_q_128:
5838 ; X64:       # %bb.0:
5839 ; X64-NEXT:    vpsrlq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xd9]
5840 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5841 ; X64-NEXT:    vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1]
5842 ; X64-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1]
5843 ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
5844 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
5845 ; X64-NEXT:    retq # encoding: [0xc3]
5846   %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5847   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5848   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
5849   %res3 = add <2 x i64> %res, %res1
5850   %res4 = add <2 x i64> %res3, %res2
5851   ret <2 x i64> %res4
5852 }
5853
5854 declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
5855
5856 define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
5857 ; X86-LABEL: test_int_x86_avx512_mask_psrl_q_256:
5858 ; X86:       # %bb.0:
5859 ; X86-NEXT:    vpsrlq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xd9]
5860 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5861 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5862 ; X86-NEXT:    vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1]
5863 ; X86-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1]
5864 ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
5865 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5866 ; X86-NEXT:    retl # encoding: [0xc3]
5867 ;
5868 ; X64-LABEL: test_int_x86_avx512_mask_psrl_q_256:
5869 ; X64:       # %bb.0:
5870 ; X64-NEXT:    vpsrlq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xd9]
5871 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5872 ; X64-NEXT:    vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1]
5873 ; X64-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1]
5874 ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
5875 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5876 ; X64-NEXT:    retq # encoding: [0xc3]
5877   %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
5878   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
5879   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
5880   %res3 = add <4 x i64> %res, %res1
5881   %res4 = add <4 x i64> %res3, %res2
5882   ret <4 x i64> %res4
5883 }
5884
5885 declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5886
5887 define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
5888 ; X86-LABEL: test_int_x86_avx512_mask_psrl_d_128:
5889 ; X86:       # %bb.0:
5890 ; X86-NEXT:    vpsrld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xd9]
5891 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5892 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5893 ; X86-NEXT:    vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1]
5894 ; X86-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1]
5895 ; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
5896 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5897 ; X86-NEXT:    retl # encoding: [0xc3]
5898 ;
5899 ; X64-LABEL: test_int_x86_avx512_mask_psrl_d_128:
5900 ; X64:       # %bb.0:
5901 ; X64-NEXT:    vpsrld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xd9]
5902 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5903 ; X64-NEXT:    vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1]
5904 ; X64-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1]
5905 ; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
5906 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5907 ; X64-NEXT:    retq # encoding: [0xc3]
5908   %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
5909   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
5910   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
5911   %res3 = add <4 x i32> %res, %res1
5912   %res4 = add <4 x i32> %res3, %res2
5913   ret <4 x i32> %res4
5914 }
5915
5916 declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
5917
5918 define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5919 ; X86-LABEL: test_int_x86_avx512_mask_psrl_d_256:
5920 ; X86:       # %bb.0:
5921 ; X86-NEXT:    vpsrld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xd9]
5922 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5923 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5924 ; X86-NEXT:    vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1]
5925 ; X86-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1]
5926 ; X86-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
5927 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
5928 ; X86-NEXT:    retl # encoding: [0xc3]
5929 ;
5930 ; X64-LABEL: test_int_x86_avx512_mask_psrl_d_256:
5931 ; X64:       # %bb.0:
5932 ; X64-NEXT:    vpsrld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xd9]
5933 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5934 ; X64-NEXT:    vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1]
5935 ; X64-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1]
5936 ; X64-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
5937 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
5938 ; X64-NEXT:    retq # encoding: [0xc3]
5939   %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
5940   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
5941   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
5942   %res3 = add <8 x i32> %res, %res1
5943   %res4 = add <8 x i32> %res2, %res3
5944   ret <8 x i32> %res4
5945 }
5946
5947 declare <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5948
5949 define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
5950 ; X86-LABEL: test_int_x86_avx512_mask_psra_d_128:
5951 ; X86:       # %bb.0:
5952 ; X86-NEXT:    vpsrad %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xd9]
5953 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5954 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5955 ; X86-NEXT:    vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1]
5956 ; X86-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1]
5957 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
5958 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5959 ; X86-NEXT:    retl # encoding: [0xc3]
5960 ;
5961 ; X64-LABEL: test_int_x86_avx512_mask_psra_d_128:
5962 ; X64:       # %bb.0:
5963 ; X64-NEXT:    vpsrad %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xd9]
5964 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5965 ; X64-NEXT:    vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1]
5966 ; X64-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1]
5967 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
5968 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5969 ; X64-NEXT:    retq # encoding: [0xc3]
5970   %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
5971   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
5972   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
5973   %res3 = add <4 x i32> %res, %res1
5974   %res4 = add <4 x i32> %res3, %res2
5975   ret <4 x i32> %res4
5976 }
5977
5978 declare <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
5979
5980 define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5981 ; X86-LABEL: test_int_x86_avx512_mask_psra_d_256:
5982 ; X86:       # %bb.0:
5983 ; X86-NEXT:    vpsrad %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xd9]
5984 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5985 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5986 ; X86-NEXT:    vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1]
5987 ; X86-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1]
5988 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
5989 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
5990 ; X86-NEXT:    retl # encoding: [0xc3]
5991 ;
5992 ; X64-LABEL: test_int_x86_avx512_mask_psra_d_256:
5993 ; X64:       # %bb.0:
5994 ; X64-NEXT:    vpsrad %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xd9]
5995 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5996 ; X64-NEXT:    vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1]
5997 ; X64-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1]
5998 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
5999 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6000 ; X64-NEXT:    retq # encoding: [0xc3]
6001   %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
6002   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6003   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
6004   %res3 = add <8 x i32> %res, %res1
6005   %res4 = add <8 x i32> %res3, %res2
6006   ret <8 x i32> %res4
6007 }
6008
6009 declare <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6010
6011 define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6012 ; X86-LABEL: test_int_x86_avx512_mask_psll_d_128:
6013 ; X86:       # %bb.0:
6014 ; X86-NEXT:    vpslld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xd9]
6015 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6016 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6017 ; X86-NEXT:    vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1]
6018 ; X86-NEXT:    vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1]
6019 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6020 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6021 ; X86-NEXT:    retl # encoding: [0xc3]
6022 ;
6023 ; X64-LABEL: test_int_x86_avx512_mask_psll_d_128:
6024 ; X64:       # %bb.0:
6025 ; X64-NEXT:    vpslld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xd9]
6026 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6027 ; X64-NEXT:    vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1]
6028 ; X64-NEXT:    vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1]
6029 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6030 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6031 ; X64-NEXT:    retq # encoding: [0xc3]
6032   %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6033   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6034   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6035   %res3 = add <4 x i32> %res, %res1
6036   %res4 = add <4 x i32> %res3, %res2
6037   ret <4 x i32> %res4
6038 }
6039
6040 declare <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
6041
6042 define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6043 ; X86-LABEL: test_int_x86_avx512_mask_psll_d_256:
6044 ; X86:       # %bb.0:
6045 ; X86-NEXT:    vpslld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xd9]
6046 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6047 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6048 ; X86-NEXT:    vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1]
6049 ; X86-NEXT:    vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1]
6050 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6051 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6052 ; X86-NEXT:    retl # encoding: [0xc3]
6053 ;
6054 ; X64-LABEL: test_int_x86_avx512_mask_psll_d_256:
6055 ; X64:       # %bb.0:
6056 ; X64-NEXT:    vpslld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xd9]
6057 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6058 ; X64-NEXT:    vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1]
6059 ; X64-NEXT:    vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1]
6060 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6061 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6062 ; X64-NEXT:    retq # encoding: [0xc3]
6063   %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
6064   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6065   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
6066   %res3 = add <8 x i32> %res, %res1
6067   %res4 = add <8 x i32> %res3, %res2
6068   ret <8 x i32> %res4
6069 }
6070
6071 declare <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
6072
6073 define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6074 ; X86-LABEL: test_int_x86_avx512_mask_psll_q_256:
6075 ; X86:       # %bb.0:
6076 ; X86-NEXT:    vpsllq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xd9]
6077 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6078 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6079 ; X86-NEXT:    vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1]
6080 ; X86-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1]
6081 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6082 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6083 ; X86-NEXT:    retl # encoding: [0xc3]
6084 ;
6085 ; X64-LABEL: test_int_x86_avx512_mask_psll_q_256:
6086 ; X64:       # %bb.0:
6087 ; X64-NEXT:    vpsllq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xd9]
6088 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6089 ; X64-NEXT:    vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1]
6090 ; X64-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1]
6091 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6092 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6093 ; X64-NEXT:    retq # encoding: [0xc3]
6094   %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
6095   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6096   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
6097   %res3 = add <4 x i64> %res, %res1
6098   %res4 = add <4 x i64> %res3, %res2
6099   ret <4 x i64> %res4
6100 }
6101
6102 declare <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64>, i32, <2 x i64>, i8)
6103
6104 define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
6105 ; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_128:
6106 ; X86:       # %bb.0:
6107 ; X86-NEXT:    vpsrlq $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x03]
6108 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6109 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6110 ; X86-NEXT:    vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03]
6111 ; X86-NEXT:    vpsrlq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x03]
6112 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6113 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6114 ; X86-NEXT:    retl # encoding: [0xc3]
6115 ;
6116 ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_128:
6117 ; X64:       # %bb.0:
6118 ; X64-NEXT:    vpsrlq $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x03]
6119 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6120 ; X64-NEXT:    vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03]
6121 ; X64-NEXT:    vpsrlq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x03]
6122 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6123 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6124 ; X64-NEXT:    retq # encoding: [0xc3]
6125   %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
6126   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
6127   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
6128   %res3 = add <2 x i64> %res, %res1
6129   %res4 = add <2 x i64> %res2, %res3
6130   ret <2 x i64> %res4
6131 }
6132
6133 declare <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64>, i32, <4 x i64>, i8)
6134
6135 define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
6136 ; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_256:
6137 ; X86:       # %bb.0:
6138 ; X86-NEXT:    vpsrlq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x03]
6139 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6140 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6141 ; X86-NEXT:    vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03]
6142 ; X86-NEXT:    vpsrlq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x03]
6143 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6144 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6145 ; X86-NEXT:    retl # encoding: [0xc3]
6146 ;
6147 ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_256:
6148 ; X64:       # %bb.0:
6149 ; X64-NEXT:    vpsrlq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x03]
6150 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6151 ; X64-NEXT:    vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03]
6152 ; X64-NEXT:    vpsrlq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x03]
6153 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6154 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6155 ; X64-NEXT:    retq # encoding: [0xc3]
6156   %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
6157   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
6158   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
6159   %res3 = add <4 x i64> %res, %res1
6160   %res4 = add <4 x i64> %res2, %res3
6161   ret <4 x i64> %res4
6162 }
6163
6164 declare <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32>, i32, <4 x i32>, i8)
6165
6166 define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
6167 ; X86-LABEL: test_int_x86_avx512_mask_psrl_di_128:
6168 ; X86:       # %bb.0:
6169 ; X86-NEXT:    vpsrld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x03]
6170 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6171 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6172 ; X86-NEXT:    vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03]
6173 ; X86-NEXT:    vpsrld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x03]
6174 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6175 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6176 ; X86-NEXT:    retl # encoding: [0xc3]
6177 ;
6178 ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_128:
6179 ; X64:       # %bb.0:
6180 ; X64-NEXT:    vpsrld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x03]
6181 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6182 ; X64-NEXT:    vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03]
6183 ; X64-NEXT:    vpsrld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x03]
6184 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6185 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6186 ; X64-NEXT:    retq # encoding: [0xc3]
6187   %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
6188   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
6189   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
6190   %res3 = add <4 x i32> %res, %res1
6191   %res4 = add <4 x i32> %res2, %res3
6192   ret <4 x i32> %res4
6193 }
6194
6195 declare <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32>, i32, <8 x i32>, i8)
6196
6197 define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
6198 ; X86-LABEL: test_int_x86_avx512_mask_psrl_di_256:
6199 ; X86:       # %bb.0:
6200 ; X86-NEXT:    vpsrld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x03]
6201 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6202 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6203 ; X86-NEXT:    vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03]
6204 ; X86-NEXT:    vpsrld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x03]
6205 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6206 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6207 ; X86-NEXT:    retl # encoding: [0xc3]
6208 ;
6209 ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_256:
6210 ; X64:       # %bb.0:
6211 ; X64-NEXT:    vpsrld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x03]
6212 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6213 ; X64-NEXT:    vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03]
6214 ; X64-NEXT:    vpsrld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x03]
6215 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6216 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6217 ; X64-NEXT:    retq # encoding: [0xc3]
6218   %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
6219   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
6220   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
6221   %res3 = add <8 x i32> %res, %res1
6222   %res4 = add <8 x i32> %res2, %res3
6223   ret <8 x i32> %res4
6224 }
6225
6226 declare <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32>, i32, <4 x i32>, i8)
6227
6228 define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
6229 ; X86-LABEL: test_int_x86_avx512_mask_psll_di_128:
6230 ; X86:       # %bb.0:
6231 ; X86-NEXT:    vpslld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xf0,0x03]
6232 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6233 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6234 ; X86-NEXT:    vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03]
6235 ; X86-NEXT:    vpslld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xf0,0x03]
6236 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6237 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6238 ; X86-NEXT:    retl # encoding: [0xc3]
6239 ;
6240 ; X64-LABEL: test_int_x86_avx512_mask_psll_di_128:
6241 ; X64:       # %bb.0:
6242 ; X64-NEXT:    vpslld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xf0,0x03]
6243 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6244 ; X64-NEXT:    vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03]
6245 ; X64-NEXT:    vpslld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xf0,0x03]
6246 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6247 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6248 ; X64-NEXT:    retq # encoding: [0xc3]
6249   %res = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
6250   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
6251   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
6252   %res3 = add <4 x i32> %res, %res1
6253   %res4 = add <4 x i32> %res3, %res2
6254   ret <4 x i32> %res4
6255 }
6256
6257 declare <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32>, i32, <8 x i32>, i8)
6258
6259 define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
6260 ; X86-LABEL: test_int_x86_avx512_mask_psll_di_256:
6261 ; X86:       # %bb.0:
6262 ; X86-NEXT:    vpslld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xf0,0x03]
6263 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6264 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6265 ; X86-NEXT:    vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03]
6266 ; X86-NEXT:    vpslld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xf0,0x03]
6267 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6268 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6269 ; X86-NEXT:    retl # encoding: [0xc3]
6270 ;
6271 ; X64-LABEL: test_int_x86_avx512_mask_psll_di_256:
6272 ; X64:       # %bb.0:
6273 ; X64-NEXT:    vpslld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xf0,0x03]
6274 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6275 ; X64-NEXT:    vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03]
6276 ; X64-NEXT:    vpslld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xf0,0x03]
6277 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6278 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6279 ; X64-NEXT:    retq # encoding: [0xc3]
6280   %res = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
6281   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
6282   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
6283   %res3 = add <8 x i32> %res, %res1
6284   %res4 = add <8 x i32> %res3, %res2
6285   ret <8 x i32> %res4
6286 }
6287
6288 declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6289
6290 define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6291 ; X86-LABEL: test_int_x86_avx512_mask_psrlv2_di:
6292 ; X86:       # %bb.0:
6293 ; X86-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xd9]
6294 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6295 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6296 ; X86-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1]
6297 ; X86-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1]
6298 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
6299 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6300 ; X86-NEXT:    retl # encoding: [0xc3]
6301 ;
6302 ; X64-LABEL: test_int_x86_avx512_mask_psrlv2_di:
6303 ; X64:       # %bb.0:
6304 ; X64-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xd9]
6305 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6306 ; X64-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1]
6307 ; X64-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1]
6308 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
6309 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6310 ; X64-NEXT:    retq # encoding: [0xc3]
6311   %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6312   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6313   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6314   %res3 = add <2 x i64> %res, %res1
6315   %res4 = add <2 x i64> %res3, %res2
6316   ret <2 x i64> %res4
6317 }
6318
6319 declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6320
6321 define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6322 ; X86-LABEL: test_int_x86_avx512_mask_psrlv4_di:
6323 ; X86:       # %bb.0:
6324 ; X86-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xd9]
6325 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6326 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6327 ; X86-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1]
6328 ; X86-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1]
6329 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6330 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6331 ; X86-NEXT:    retl # encoding: [0xc3]
6332 ;
6333 ; X64-LABEL: test_int_x86_avx512_mask_psrlv4_di:
6334 ; X64:       # %bb.0:
6335 ; X64-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xd9]
6336 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6337 ; X64-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1]
6338 ; X64-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1]
6339 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6340 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6341 ; X64-NEXT:    retq # encoding: [0xc3]
6342   %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6343   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6344   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6345   %res3 = add <4 x i64> %res, %res1
6346   %res4 = add <4 x i64> %res3, %res2
6347   ret <4 x i64> %res4
6348 }
6349
6350 declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6351
6352 define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6353 ; X86-LABEL: test_int_x86_avx512_mask_psrlv4_si:
6354 ; X86:       # %bb.0:
6355 ; X86-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xd9]
6356 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6357 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6358 ; X86-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1]
6359 ; X86-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1]
6360 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6361 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6362 ; X86-NEXT:    retl # encoding: [0xc3]
6363 ;
6364 ; X64-LABEL: test_int_x86_avx512_mask_psrlv4_si:
6365 ; X64:       # %bb.0:
6366 ; X64-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xd9]
6367 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6368 ; X64-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1]
6369 ; X64-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1]
6370 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6371 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6372 ; X64-NEXT:    retq # encoding: [0xc3]
6373   %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6374   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6375   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6376   %res3 = add <4 x i32> %res, %res1
6377   %res4 = add <4 x i32> %res3, %res2
6378   ret <4 x i32> %res4
6379 }
6380
6381 declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6382
6383 define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6384 ; X86-LABEL: test_int_x86_avx512_mask_psrlv8_si:
6385 ; X86:       # %bb.0:
6386 ; X86-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xd9]
6387 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6388 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6389 ; X86-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1]
6390 ; X86-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1]
6391 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6392 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6393 ; X86-NEXT:    retl # encoding: [0xc3]
6394 ;
6395 ; X64-LABEL: test_int_x86_avx512_mask_psrlv8_si:
6396 ; X64:       # %bb.0:
6397 ; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xd9]
6398 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6399 ; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1]
6400 ; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1]
6401 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6402 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6403 ; X64-NEXT:    retq # encoding: [0xc3]
6404   %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6405   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6406   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6407   %res3 = add <8 x i32> %res, %res1
6408   %res4 = add <8 x i32> %res3, %res2
6409   ret <8 x i32> %res4
6410 }
6411
6412 declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6413
6414 define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6415 ; X86-LABEL: test_int_x86_avx512_mask_psrav4_si:
6416 ; X86:       # %bb.0:
6417 ; X86-NEXT:    vpsravd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xd9]
6418 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6419 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6420 ; X86-NEXT:    vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1]
6421 ; X86-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1]
6422 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6423 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6424 ; X86-NEXT:    retl # encoding: [0xc3]
6425 ;
6426 ; X64-LABEL: test_int_x86_avx512_mask_psrav4_si:
6427 ; X64:       # %bb.0:
6428 ; X64-NEXT:    vpsravd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xd9]
6429 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6430 ; X64-NEXT:    vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1]
6431 ; X64-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1]
6432 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6433 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6434 ; X64-NEXT:    retq # encoding: [0xc3]
6435   %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6436   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6437   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6438   %res3 = add <4 x i32> %res, %res1
6439   %res4 = add <4 x i32> %res3, %res2
6440   ret <4 x i32> %res4
6441 }
6442
6443 declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6444
6445 define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6446 ; X86-LABEL: test_int_x86_avx512_mask_psrav8_si:
6447 ; X86:       # %bb.0:
6448 ; X86-NEXT:    vpsravd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xd9]
6449 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6450 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6451 ; X86-NEXT:    vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1]
6452 ; X86-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1]
6453 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6454 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6455 ; X86-NEXT:    retl # encoding: [0xc3]
6456 ;
6457 ; X64-LABEL: test_int_x86_avx512_mask_psrav8_si:
6458 ; X64:       # %bb.0:
6459 ; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xd9]
6460 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6461 ; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1]
6462 ; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1]
6463 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6464 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6465 ; X64-NEXT:    retq # encoding: [0xc3]
6466   %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6467   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6468   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6469   %res3 = add <8 x i32> %res, %res1
6470   %res4 = add <8 x i32> %res3, %res2
6471   ret <8 x i32> %res4
6472 }
6473
6474 define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {
6475 ; X86-LABEL: test_int_x86_avx512_mask_psrav8_si_const:
6476 ; X86:       # %bb.0:
6477 ; X86-NEXT:    vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
6478 ; X86-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
6479 ; X86-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
6480 ; X86-NEXT:    vpsravd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
6481 ; X86-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
6482 ; X86-NEXT:    retl # encoding: [0xc3]
6483 ;
6484 ; X64-LABEL: test_int_x86_avx512_mask_psrav8_si_const:
6485 ; X64:       # %bb.0:
6486 ; X64-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
6487 ; X64-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
6488 ; X64-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
6489 ; X64-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
6490 ; X64-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
6491 ; X64-NEXT:    retq # encoding: [0xc3]
6492   %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
6493   ret <8 x i32> %res
6494 }
6495
6496 declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6497
6498 define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6499 ; X86-LABEL: test_int_x86_avx512_mask_psllv2_di:
6500 ; X86:       # %bb.0:
6501 ; X86-NEXT:    vpsllvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xd9]
6502 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6503 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6504 ; X86-NEXT:    vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1]
6505 ; X86-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1]
6506 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
6507 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6508 ; X86-NEXT:    retl # encoding: [0xc3]
6509 ;
6510 ; X64-LABEL: test_int_x86_avx512_mask_psllv2_di:
6511 ; X64:       # %bb.0:
6512 ; X64-NEXT:    vpsllvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xd9]
6513 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6514 ; X64-NEXT:    vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1]
6515 ; X64-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1]
6516 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
6517 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6518 ; X64-NEXT:    retq # encoding: [0xc3]
6519   %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6520   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6521   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6522   %res3 = add <2 x i64> %res, %res1
6523   %res4 = add <2 x i64> %res3, %res2
6524   ret <2 x i64> %res4
6525 }
6526
6527 declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6528
6529 define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6530 ; X86-LABEL: test_int_x86_avx512_mask_psllv4_di:
6531 ; X86:       # %bb.0:
6532 ; X86-NEXT:    vpsllvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xd9]
6533 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6534 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6535 ; X86-NEXT:    vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1]
6536 ; X86-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1]
6537 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6538 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6539 ; X86-NEXT:    retl # encoding: [0xc3]
6540 ;
6541 ; X64-LABEL: test_int_x86_avx512_mask_psllv4_di:
6542 ; X64:       # %bb.0:
6543 ; X64-NEXT:    vpsllvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xd9]
6544 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6545 ; X64-NEXT:    vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1]
6546 ; X64-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1]
6547 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6548 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6549 ; X64-NEXT:    retq # encoding: [0xc3]
6550   %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6551   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6552   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6553   %res3 = add <4 x i64> %res, %res1
6554   %res4 = add <4 x i64> %res3, %res2
6555   ret <4 x i64> %res4
6556 }
6557
6558 declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6559
6560 define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6561 ; X86-LABEL: test_int_x86_avx512_mask_psllv4_si:
6562 ; X86:       # %bb.0:
6563 ; X86-NEXT:    vpsllvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xd9]
6564 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6565 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6566 ; X86-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1]
6567 ; X86-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1]
6568 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6569 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6570 ; X86-NEXT:    retl # encoding: [0xc3]
6571 ;
6572 ; X64-LABEL: test_int_x86_avx512_mask_psllv4_si:
6573 ; X64:       # %bb.0:
6574 ; X64-NEXT:    vpsllvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xd9]
6575 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6576 ; X64-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1]
6577 ; X64-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1]
6578 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6579 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6580 ; X64-NEXT:    retq # encoding: [0xc3]
6581   %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6582   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6583   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6584   %res3 = add <4 x i32> %res, %res1
6585   %res4 = add <4 x i32> %res3, %res2
6586   ret <4 x i32> %res4
6587 }
6588
6589 declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6590
6591 define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6592 ; X86-LABEL: test_int_x86_avx512_mask_psllv8_si:
6593 ; X86:       # %bb.0:
6594 ; X86-NEXT:    vpsllvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xd9]
6595 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6596 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6597 ; X86-NEXT:    vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1]
6598 ; X86-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1]
6599 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6600 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6601 ; X86-NEXT:    retl # encoding: [0xc3]
6602 ;
6603 ; X64-LABEL: test_int_x86_avx512_mask_psllv8_si:
6604 ; X64:       # %bb.0:
6605 ; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xd9]
6606 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6607 ; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1]
6608 ; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1]
6609 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6610 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6611 ; X64-NEXT:    retq # encoding: [0xc3]
6612   %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6613   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6614   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6615   %res3 = add <8 x i32> %res, %res1
6616   %res4 = add <8 x i32> %res3, %res2
6617   ret <8 x i32> %res4
6618 }
6619
6620 declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8)
6621
6622 define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
6623 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
6624 ; X86:       # %bb.0:
6625 ; X86-NEXT:    vpmovzxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xd0]
6626 ; X86-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6627 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6628 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6629 ; X86-NEXT:    vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8]
6630 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6631 ; X86-NEXT:    vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0]
6632 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6633 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6634 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6635 ; X86-NEXT:    retl # encoding: [0xc3]
6636 ;
6637 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
6638 ; X64:       # %bb.0:
6639 ; X64-NEXT:    vpmovzxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xd0]
6640 ; X64-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6641 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6642 ; X64-NEXT:    vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8]
6643 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6644 ; X64-NEXT:    vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0]
6645 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6646 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6647 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6648 ; X64-NEXT:    retq # encoding: [0xc3]
6649   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
6650   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
6651   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
6652   %res3 = add <4 x i32> %res, %res1
6653   %res4 = add <4 x i32> %res3, %res2
6654   ret <4 x i32> %res4
6655 }
6656
6657 declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8)
6658
6659 define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
6660 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
6661 ; X86:       # %bb.0:
6662 ; X86-NEXT:    vpmovzxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x31,0xd0]
6663 ; X86-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6664 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6665 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6666 ; X86-NEXT:    vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8]
6667 ; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6668 ; X86-NEXT:    vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0]
6669 ; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6670 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6671 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6672 ; X86-NEXT:    retl # encoding: [0xc3]
6673 ;
6674 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
6675 ; X64:       # %bb.0:
6676 ; X64-NEXT:    vpmovzxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x31,0xd0]
6677 ; X64-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6678 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6679 ; X64-NEXT:    vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8]
6680 ; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6681 ; X64-NEXT:    vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0]
6682 ; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6683 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6684 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6685 ; X64-NEXT:    retq # encoding: [0xc3]
6686   %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
6687   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
6688   %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
6689   %res3 = add <8 x i32> %res, %res1
6690   %res4 = add <8 x i32> %res3, %res2
6691   ret <8 x i32> %res4
6692 }
6693
6694 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8)
6695
6696 define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
6697 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
6698 ; X86:       # %bb.0:
6699 ; X86-NEXT:    vpmovzxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xd0]
6700 ; X86-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6701 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6702 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6703 ; X86-NEXT:    vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8]
6704 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6705 ; X86-NEXT:    vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0]
6706 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6707 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6708 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6709 ; X86-NEXT:    retl # encoding: [0xc3]
6710 ;
6711 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
6712 ; X64:       # %bb.0:
6713 ; X64-NEXT:    vpmovzxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xd0]
6714 ; X64-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6715 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6716 ; X64-NEXT:    vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8]
6717 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6718 ; X64-NEXT:    vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0]
6719 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6720 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6721 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6722 ; X64-NEXT:    retq # encoding: [0xc3]
6723   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
6724   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
6725   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
6726   %res3 = add <2 x i64> %res, %res1
6727   %res4 = add <2 x i64> %res3, %res2
6728   ret <2 x i64> %res4
6729 }
6730
6731 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8)
6732
6733 define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
6734 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
6735 ; X86:       # %bb.0:
6736 ; X86-NEXT:    vpmovzxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x32,0xd0]
6737 ; X86-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6738 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6739 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6740 ; X86-NEXT:    vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8]
6741 ; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6742 ; X86-NEXT:    vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0]
6743 ; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6744 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6745 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6746 ; X86-NEXT:    retl # encoding: [0xc3]
6747 ;
6748 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
6749 ; X64:       # %bb.0:
6750 ; X64-NEXT:    vpmovzxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x32,0xd0]
6751 ; X64-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6752 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6753 ; X64-NEXT:    vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8]
6754 ; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6755 ; X64-NEXT:    vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0]
6756 ; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6757 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6758 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6759 ; X64-NEXT:    retq # encoding: [0xc3]
6760   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
6761   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
6762   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
6763   %res3 = add <4 x i64> %res, %res1
6764   %res4 = add <4 x i64> %res3, %res2
6765   ret <4 x i64> %res4
6766 }
6767
6768 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8)
6769
6770 define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
6771 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
6772 ; X86:       # %bb.0:
6773 ; X86-NEXT:    vpmovzxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xd0]
6774 ; X86-NEXT:    # xmm2 = xmm0[0],zero,xmm0[1],zero
6775 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6776 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6777 ; X86-NEXT:    vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8]
6778 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero
6779 ; X86-NEXT:    vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0]
6780 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero
6781 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6782 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6783 ; X86-NEXT:    retl # encoding: [0xc3]
6784 ;
6785 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
6786 ; X64:       # %bb.0:
6787 ; X64-NEXT:    vpmovzxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xd0]
6788 ; X64-NEXT:    # xmm2 = xmm0[0],zero,xmm0[1],zero
6789 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6790 ; X64-NEXT:    vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8]
6791 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero
6792 ; X64-NEXT:    vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0]
6793 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero
6794 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6795 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6796 ; X64-NEXT:    retq # encoding: [0xc3]
6797   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
6798   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
6799   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
6800   %res3 = add <2 x i64> %res, %res1
6801   %res4 = add <2 x i64> %res3, %res2
6802   ret <2 x i64> %res4
6803 }
6804
6805 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8)
6806
6807 define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
6808 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
6809 ; X86:       # %bb.0:
6810 ; X86-NEXT:    vpmovzxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x35,0xd0]
6811 ; X86-NEXT:    # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6812 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6813 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6814 ; X86-NEXT:    vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8]
6815 ; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6816 ; X86-NEXT:    vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0]
6817 ; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6818 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6819 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6820 ; X86-NEXT:    retl # encoding: [0xc3]
6821 ;
6822 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
6823 ; X64:       # %bb.0:
6824 ; X64-NEXT:    vpmovzxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x35,0xd0]
6825 ; X64-NEXT:    # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6826 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6827 ; X64-NEXT:    vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8]
6828 ; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6829 ; X64-NEXT:    vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0]
6830 ; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6831 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6832 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6833 ; X64-NEXT:    retq # encoding: [0xc3]
6834   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
6835   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
6836   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
6837   %res3 = add <4 x i64> %res, %res1
6838   %res4 = add <4 x i64> %res3, %res2
6839   ret <4 x i64> %res4
6840 }
6841
6842 declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8)
6843
6844 define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
6845 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
6846 ; X86:       # %bb.0:
6847 ; X86-NEXT:    vpmovzxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xd0]
6848 ; X86-NEXT:    # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6849 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6850 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6851 ; X86-NEXT:    vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8]
6852 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6853 ; X86-NEXT:    vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0]
6854 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6855 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6856 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6857 ; X86-NEXT:    retl # encoding: [0xc3]
6858 ;
6859 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
6860 ; X64:       # %bb.0:
6861 ; X64-NEXT:    vpmovzxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xd0]
6862 ; X64-NEXT:    # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6863 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6864 ; X64-NEXT:    vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8]
6865 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6866 ; X64-NEXT:    vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0]
6867 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6868 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6869 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6870 ; X64-NEXT:    retq # encoding: [0xc3]
6871   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
6872   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
6873   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
6874   %res3 = add <4 x i32> %res, %res1
6875   %res4 = add <4 x i32> %res3, %res2
6876   ret <4 x i32> %res4
6877 }
6878
6879 declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8)
6880
6881 define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
6882 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
6883 ; X86:       # %bb.0:
6884 ; X86-NEXT:    vpmovzxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xd0]
6885 ; X86-NEXT:    # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6886 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6887 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6888 ; X86-NEXT:    vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8]
6889 ; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6890 ; X86-NEXT:    vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0]
6891 ; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6892 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6893 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6894 ; X86-NEXT:    retl # encoding: [0xc3]
6895 ;
6896 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
6897 ; X64:       # %bb.0:
6898 ; X64-NEXT:    vpmovzxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xd0]
6899 ; X64-NEXT:    # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6900 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6901 ; X64-NEXT:    vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8]
6902 ; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6903 ; X64-NEXT:    vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0]
6904 ; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6905 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6906 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6907 ; X64-NEXT:    retq # encoding: [0xc3]
6908   %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
6909   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
6910   %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
6911   %res3 = add <8 x i32> %res, %res1
6912   %res4 = add <8 x i32> %res3, %res2
6913   ret <8 x i32> %res4
6914 }
6915
6916 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8)
6917
6918 define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
6919 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
6920 ; X86:       # %bb.0:
6921 ; X86-NEXT:    vpmovzxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xd0]
6922 ; X86-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6923 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6924 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6925 ; X86-NEXT:    vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8]
6926 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6927 ; X86-NEXT:    vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0]
6928 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6929 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6930 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6931 ; X86-NEXT:    retl # encoding: [0xc3]
6932 ;
6933 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
6934 ; X64:       # %bb.0:
6935 ; X64-NEXT:    vpmovzxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xd0]
6936 ; X64-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6937 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6938 ; X64-NEXT:    vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8]
6939 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6940 ; X64-NEXT:    vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0]
6941 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6942 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6943 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6944 ; X64-NEXT:    retq # encoding: [0xc3]
6945   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
6946   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
6947   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
6948   %res3 = add <2 x i64> %res, %res1
6949   %res4 = add <2 x i64> %res3, %res2
6950   ret <2 x i64> %res4
6951 }
6952
6953 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8)
6954
6955 define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
6956 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
6957 ; X86:       # %bb.0:
6958 ; X86-NEXT:    vpmovzxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x34,0xd0]
6959 ; X86-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6960 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6961 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6962 ; X86-NEXT:    vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8]
6963 ; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6964 ; X86-NEXT:    vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0]
6965 ; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6966 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6967 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6968 ; X86-NEXT:    retl # encoding: [0xc3]
6969 ;
6970 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
6971 ; X64:       # %bb.0:
6972 ; X64-NEXT:    vpmovzxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x34,0xd0]
6973 ; X64-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6974 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6975 ; X64-NEXT:    vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8]
6976 ; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6977 ; X64-NEXT:    vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0]
6978 ; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6979 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6980 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6981 ; X64-NEXT:    retq # encoding: [0xc3]
6982   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
6983   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
6984   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
6985   %res3 = add <4 x i64> %res, %res1
6986   %res4 = add <4 x i64> %res3, %res2
6987   ret <4 x i64> %res4
6988 }
6989
6990 declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8)
6991
6992 define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
6993 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
6994 ; X86:       # %bb.0:
6995 ; X86-NEXT:    vpmovsxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xd0]
6996 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6997 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6998 ; X86-NEXT:    vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8]
6999 ; X86-NEXT:    vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0]
7000 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
7001 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
7002 ; X86-NEXT:    retl # encoding: [0xc3]
7003 ;
7004 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
7005 ; X64:       # %bb.0:
7006 ; X64-NEXT:    vpmovsxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xd0]
7007 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7008 ; X64-NEXT:    vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8]
7009 ; X64-NEXT:    vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0]
7010 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
7011 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
7012 ; X64-NEXT:    retq # encoding: [0xc3]
7013   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
7014   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
7015   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
7016   %res3 = add <4 x i32> %res, %res1
7017   %res4 = add <4 x i32> %res3, %res2
7018   ret <4 x i32> %res4
7019 }
7020
7021 declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8)
7022
7023 define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
7024 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
7025 ; X86:       # %bb.0:
7026 ; X86-NEXT:    vpmovsxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0xd0]
7027 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7028 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7029 ; X86-NEXT:    vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8]
7030 ; X86-NEXT:    vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0]
7031 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
7032 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
7033 ; X86-NEXT:    retl # encoding: [0xc3]
7034 ;
7035 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
7036 ; X64:       # %bb.0:
7037 ; X64-NEXT:    vpmovsxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0xd0]
7038 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7039 ; X64-NEXT:    vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8]
7040 ; X64-NEXT:    vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0]
7041 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
7042 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
7043 ; X64-NEXT:    retq # encoding: [0xc3]
7044   %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
7045   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
7046   %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
7047   %res3 = add <8 x i32> %res, %res1
7048   %res4 = add <8 x i32> %res3, %res2
7049   ret <8 x i32> %res4
7050 }
7051
7052 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8)
7053
7054 define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
7055 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
7056 ; X86:       # %bb.0:
7057 ; X86-NEXT:    vpmovsxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xd0]
7058 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7059 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7060 ; X86-NEXT:    vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8]
7061 ; X86-NEXT:    vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0]
7062 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7063 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7064 ; X86-NEXT:    retl # encoding: [0xc3]
7065 ;
7066 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
7067 ; X64:       # %bb.0:
7068 ; X64-NEXT:    vpmovsxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xd0]
7069 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7070 ; X64-NEXT:    vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8]
7071 ; X64-NEXT:    vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0]
7072 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7073 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7074 ; X64-NEXT:    retq # encoding: [0xc3]
7075   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
7076   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
7077   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
7078   %res3 = add <2 x i64> %res, %res1
7079   %res4 = add <2 x i64> %res3, %res2
7080   ret <2 x i64> %res4
7081 }
7082
7083 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8)
7084
7085 define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
7086 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
7087 ; X86:       # %bb.0:
7088 ; X86-NEXT:    vpmovsxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0xd0]
7089 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7090 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7091 ; X86-NEXT:    vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8]
7092 ; X86-NEXT:    vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0]
7093 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7094 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7095 ; X86-NEXT:    retl # encoding: [0xc3]
7096 ;
7097 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
7098 ; X64:       # %bb.0:
7099 ; X64-NEXT:    vpmovsxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0xd0]
7100 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7101 ; X64-NEXT:    vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8]
7102 ; X64-NEXT:    vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0]
7103 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7104 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7105 ; X64-NEXT:    retq # encoding: [0xc3]
7106   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
7107   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
7108   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
7109   %res3 = add <4 x i64> %res, %res1
7110   %res4 = add <4 x i64> %res3, %res2
7111   ret <4 x i64> %res4
7112 }
7113
7114 declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8)
7115
7116 define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
7117 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
7118 ; X86:       # %bb.0:
7119 ; X86-NEXT:    vpmovsxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xd0]
7120 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7121 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7122 ; X86-NEXT:    vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8]
7123 ; X86-NEXT:    vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0]
7124 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
7125 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
7126 ; X86-NEXT:    retl # encoding: [0xc3]
7127 ;
7128 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
7129 ; X64:       # %bb.0:
7130 ; X64-NEXT:    vpmovsxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xd0]
7131 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7132 ; X64-NEXT:    vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8]
7133 ; X64-NEXT:    vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0]
7134 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
7135 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
7136 ; X64-NEXT:    retq # encoding: [0xc3]
7137   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
7138   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
7139   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
7140   %res3 = add <4 x i32> %res, %res1
7141   %res4 = add <4 x i32> %res3, %res2
7142   ret <4 x i32> %res4
7143 }
7144
7145 declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8)
7146
7147 define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
7148 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
7149 ; X86:       # %bb.0:
7150 ; X86-NEXT:    vpmovsxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x23,0xd0]
7151 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7152 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7153 ; X86-NEXT:    vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8]
7154 ; X86-NEXT:    vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0]
7155 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
7156 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
7157 ; X86-NEXT:    retl # encoding: [0xc3]
7158 ;
7159 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
7160 ; X64:       # %bb.0:
7161 ; X64-NEXT:    vpmovsxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x23,0xd0]
7162 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7163 ; X64-NEXT:    vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8]
7164 ; X64-NEXT:    vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0]
7165 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
7166 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
7167 ; X64-NEXT:    retq # encoding: [0xc3]
7168   %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
7169   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
7170   %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
7171   %res3 = add <8 x i32> %res, %res1
7172   %res4 = add <8 x i32> %res3, %res2
7173   ret <8 x i32> %res4
7174 }
7175
7176 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8)
7177
7178 define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
7179 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
7180 ; X86:       # %bb.0:
7181 ; X86-NEXT:    vpmovsxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xd0]
7182 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7183 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7184 ; X86-NEXT:    vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8]
7185 ; X86-NEXT:    vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0]
7186 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7187 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7188 ; X86-NEXT:    retl # encoding: [0xc3]
7189 ;
7190 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
7191 ; X64:       # %bb.0:
7192 ; X64-NEXT:    vpmovsxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xd0]
7193 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7194 ; X64-NEXT:    vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8]
7195 ; X64-NEXT:    vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0]
7196 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7197 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7198 ; X64-NEXT:    retq # encoding: [0xc3]
7199   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
7200   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
7201   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
7202   %res3 = add <2 x i64> %res, %res1
7203   %res4 = add <2 x i64> %res3, %res2
7204   ret <2 x i64> %res4
7205 }
7206
7207 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8)
7208
7209 define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
7210 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
7211 ; X86:       # %bb.0:
7212 ; X86-NEXT:    vpmovsxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x24,0xd0]
7213 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7214 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7215 ; X86-NEXT:    vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8]
7216 ; X86-NEXT:    vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0]
7217 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7218 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7219 ; X86-NEXT:    retl # encoding: [0xc3]
7220 ;
7221 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
7222 ; X64:       # %bb.0:
7223 ; X64-NEXT:    vpmovsxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x24,0xd0]
7224 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7225 ; X64-NEXT:    vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8]
7226 ; X64-NEXT:    vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0]
7227 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7228 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7229 ; X64-NEXT:    retq # encoding: [0xc3]
7230   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
7231   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
7232   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
7233   %res3 = add <4 x i64> %res, %res1
7234   %res4 = add <4 x i64> %res3, %res2
7235   ret <4 x i64> %res4
7236 }
7237
7238 declare <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
7239
7240 define <2 x i64>@test_int_x86_avx512_mask_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
7241 ; X86-LABEL: test_int_x86_avx512_mask_psra_q_128:
7242 ; X86:       # %bb.0:
7243 ; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xd9]
7244 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7245 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7246 ; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
7247 ; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
7248 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
7249 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
7250 ; X86-NEXT:    retl # encoding: [0xc3]
7251 ;
7252 ; X64-LABEL: test_int_x86_avx512_mask_psra_q_128:
7253 ; X64:       # %bb.0:
7254 ; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xd9]
7255 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7256 ; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
7257 ; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
7258 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
7259 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
7260 ; X64-NEXT:    retq # encoding: [0xc3]
7261   %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
7262   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
7263   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
7264   %res3 = add <2 x i64> %res, %res1
7265   %res4 = add <2 x i64> %res3, %res2
7266   ret <2 x i64> %res4
7267 }
7268
7269 declare <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
7270
7271 define <4 x i64>@test_int_x86_avx512_mask_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
7272 ; X86-LABEL: test_int_x86_avx512_mask_psra_q_256:
7273 ; X86:       # %bb.0:
7274 ; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm3 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xd9]
7275 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7276 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7277 ; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
7278 ; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
7279 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
7280 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
7281 ; X86-NEXT:    retl # encoding: [0xc3]
7282 ;
7283 ; X64-LABEL: test_int_x86_avx512_mask_psra_q_256:
7284 ; X64:       # %bb.0:
7285 ; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm3 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xd9]
7286 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7287 ; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
7288 ; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
7289 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
7290 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
7291 ; X64-NEXT:    retq # encoding: [0xc3]
7292   %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
7293   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
7294   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
7295   %res3 = add <4 x i64> %res, %res1
7296   %res4 = add <4 x i64> %res3, %res2
7297   ret <4 x i64> %res4
7298 }
7299
7300 declare <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64>, i32, <2 x i64>, i8)
7301
7302 define <2 x i64>@test_int_x86_avx512_mask_psra_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
7303 ; X86-LABEL: test_int_x86_avx512_mask_psra_qi_128:
7304 ; X86:       # %bb.0:
7305 ; X86-NEXT:    vpsraq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xe0,0x03]
7306 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
7307 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7308 ; X86-NEXT:    vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03]
7309 ; X86-NEXT:    vpsraq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x03]
7310 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7311 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7312 ; X86-NEXT:    retl # encoding: [0xc3]
7313 ;
7314 ; X64-LABEL: test_int_x86_avx512_mask_psra_qi_128:
7315 ; X64:       # %bb.0:
7316 ; X64-NEXT:    vpsraq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xe0,0x03]
7317 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
7318 ; X64-NEXT:    vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03]
7319 ; X64-NEXT:    vpsraq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x03]
7320 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7321 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7322 ; X64-NEXT:    retq # encoding: [0xc3]
7323   %res = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
7324   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
7325   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
7326   %res3 = add <2 x i64> %res, %res1
7327   %res4 = add <2 x i64> %res3, %res2
7328   ret <2 x i64> %res4
7329 }
7330
7331 declare <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64>, i32, <4 x i64>, i8)
7332
7333 define <4 x i64>@test_int_x86_avx512_mask_psra_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
7334 ; X86-LABEL: test_int_x86_avx512_mask_psra_qi_256:
7335 ; X86:       # %bb.0:
7336 ; X86-NEXT:    vpsraq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xe0,0x03]
7337 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
7338 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7339 ; X86-NEXT:    vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03]
7340 ; X86-NEXT:    vpsraq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x03]
7341 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7342 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7343 ; X86-NEXT:    retl # encoding: [0xc3]
7344 ;
7345 ; X64-LABEL: test_int_x86_avx512_mask_psra_qi_256:
7346 ; X64:       # %bb.0:
7347 ; X64-NEXT:    vpsraq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xe0,0x03]
7348 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
7349 ; X64-NEXT:    vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03]
7350 ; X64-NEXT:    vpsraq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x03]
7351 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7352 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7353 ; X64-NEXT:    retq # encoding: [0xc3]
7354   %res = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
7355   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
7356   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
7357   %res3 = add <4 x i64> %res, %res1
7358   %res4 = add <4 x i64> %res3, %res2
7359   ret <4 x i64> %res4
7360 }
7361
7362 declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
7363
7364 define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
7365 ; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128:
7366 ; X86:       # %bb.0:
7367 ; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xd9]
7368 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7369 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7370 ; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
7371 ; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
7372 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
7373 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
7374 ; X86-NEXT:    retl # encoding: [0xc3]
7375 ;
7376 ; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128:
7377 ; X64:       # %bb.0:
7378 ; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xd9]
7379 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7380 ; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
7381 ; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
7382 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
7383 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
7384 ; X64-NEXT:    retq # encoding: [0xc3]
7385   %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
7386   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
7387   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
7388   %res3 = add <2 x i64> %res, %res1
7389   %res4 = add <2 x i64> %res3, %res2
7390   ret <2 x i64> %res4
7391 }
7392
7393 define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {
7394 ; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:
7395 ; X86:       # %bb.0:
7396 ; X86-NEXT:    vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,0,4294967287,4294967295]
7397 ; X86-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
7398 ; X86-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
7399 ; X86-NEXT:    vpsravq {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
7400 ; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
7401 ; X86-NEXT:    retl # encoding: [0xc3]
7402 ;
7403 ; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:
7404 ; X64:       # %bb.0:
7405 ; X64-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,18446744073709551607]
7406 ; X64-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
7407 ; X64-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
7408 ; X64-NEXT:    vpsravq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
7409 ; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
7410 ; X64-NEXT:    retq # encoding: [0xc3]
7411   %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
7412   ret <2 x i64> %res
7413 }
7414
7415 declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
7416
7417 define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
7418 ; X86-LABEL: test_int_x86_avx512_mask_psrav_q_256:
7419 ; X86:       # %bb.0:
7420 ; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xd9]
7421 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7422 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7423 ; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
7424 ; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
7425 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
7426 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
7427 ; X86-NEXT:    retl # encoding: [0xc3]
7428 ;
7429 ; X64-LABEL: test_int_x86_avx512_mask_psrav_q_256:
7430 ; X64:       # %bb.0:
7431 ; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xd9]
7432 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7433 ; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
7434 ; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
7435 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
7436 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
7437 ; X64-NEXT:    retq # encoding: [0xc3]
7438   %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
7439   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
7440   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
7441   %res3 = add <4 x i64> %res, %res1
7442   %res4 = add <4 x i64> %res3, %res2
7443   ret <4 x i64> %res4
7444 }
7445
7446 declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
7447
7448 define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
7449 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
7450 ; X86:       # %bb.0:
7451 ; X86-NEXT:    vcvtdq2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xd0]
7452 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7453 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7454 ; X86-NEXT:    vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
7455 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
7456 ; X86-NEXT:    retl # encoding: [0xc3]
7457 ;
7458 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
7459 ; X64:       # %bb.0:
7460 ; X64-NEXT:    vcvtdq2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xd0]
7461 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7462 ; X64-NEXT:    vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
7463 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
7464 ; X64-NEXT:    retq # encoding: [0xc3]
7465   %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
7466   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
7467   %res2 = fadd <2 x double> %res, %res1
7468   ret <2 x double> %res2
7469 }
7470
7471 declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
7472
7473 define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
7474 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
7475 ; X86:       # %bb.0:
7476 ; X86-NEXT:    vcvtdq2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xd0]
7477 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7478 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7479 ; X86-NEXT:    vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
7480 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
7481 ; X86-NEXT:    retl # encoding: [0xc3]
7482 ;
7483 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
7484 ; X64:       # %bb.0:
7485 ; X64-NEXT:    vcvtdq2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xd0]
7486 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7487 ; X64-NEXT:    vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
7488 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
7489 ; X64-NEXT:    retq # encoding: [0xc3]
7490   %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
7491   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
7492   %res2 = fadd <4 x double> %res, %res1
7493   ret <4 x double> %res2
7494 }
7495
7496 declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
7497
7498 define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
7499 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
7500 ; X86:       # %bb.0:
7501 ; X86-NEXT:    vcvtudq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0]
7502 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7503 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7504 ; X86-NEXT:    vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
7505 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
7506 ; X86-NEXT:    retl # encoding: [0xc3]
7507 ;
7508 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
7509 ; X64:       # %bb.0:
7510 ; X64-NEXT:    vcvtudq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0]
7511 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7512 ; X64-NEXT:    vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
7513 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
7514 ; X64-NEXT:    retq # encoding: [0xc3]
7515   %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
7516   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
7517   %res2 = fadd <2 x double> %res, %res1
7518   ret <2 x double> %res2
7519 }
7520
7521 declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
7522
7523 define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
7524 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
7525 ; X86:       # %bb.0:
7526 ; X86-NEXT:    vcvtudq2pd %xmm0, %ymm2 # encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0]
7527 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7528 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7529 ; X86-NEXT:    vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
7530 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
7531 ; X86-NEXT:    retl # encoding: [0xc3]
7532 ;
7533 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
7534 ; X64:       # %bb.0:
7535 ; X64-NEXT:    vcvtudq2pd %xmm0, %ymm2 # encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0]
7536 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7537 ; X64-NEXT:    vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
7538 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
7539 ; X64-NEXT:    retq # encoding: [0xc3]
7540   %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
7541   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
7542   %res2 = fadd <4 x double> %res, %res1
7543   ret <4 x double> %res2
7544 }
7545
7546 declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
7547
7548 define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
7549 ; X86-LABEL: test_int_x86_avx512_mask_valign_d_128:
7550 ; X86:       # %bb.0:
7551 ; X86-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
7552 ; X86-NEXT:    # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
7553 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7554 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7555 ; X86-NEXT:    valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02]
7556 ; X86-NEXT:    # xmm2 {%k1} = xmm1[2,3],xmm0[0,1]
7557 ; X86-NEXT:    valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02]
7558 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1]
7559 ; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
7560 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
7561 ; X86-NEXT:    retl # encoding: [0xc3]
7562 ;
7563 ; X64-LABEL: test_int_x86_avx512_mask_valign_d_128:
7564 ; X64:       # %bb.0:
7565 ; X64-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
7566 ; X64-NEXT:    # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
7567 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7568 ; X64-NEXT:    valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02]
7569 ; X64-NEXT:    # xmm2 {%k1} = xmm1[2,3],xmm0[0,1]
7570 ; X64-NEXT:    valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02]
7571 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1]
7572 ; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
7573 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
7574 ; X64-NEXT:    retq # encoding: [0xc3]
7575   %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 %x4)
7576   %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 -1)
7577   %res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> zeroinitializer,i8 %x4)
7578   %res3 = add <4 x i32> %res, %res1
7579   %res4 = add <4 x i32> %res3, %res2
7580   ret <4 x i32> %res4
7581 }
7582
7583 declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
7584
7585 define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
7586 ; X86-LABEL: test_int_x86_avx512_mask_valign_d_256:
7587 ; X86:       # %bb.0:
7588 ; X86-NEXT:    valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
7589 ; X86-NEXT:    # ymm3 = ymm1[3],ymm0[0,1,2]
7590 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7591 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7592 ; X86-NEXT:    valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06]
7593 ; X86-NEXT:    # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5]
7594 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
7595 ; X86-NEXT:    retl # encoding: [0xc3]
7596 ;
7597 ; X64-LABEL: test_int_x86_avx512_mask_valign_d_256:
7598 ; X64:       # %bb.0:
7599 ; X64-NEXT:    valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
7600 ; X64-NEXT:    # ymm3 = ymm1[3],ymm0[0,1,2]
7601 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7602 ; X64-NEXT:    valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06]
7603 ; X64-NEXT:    # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5]
7604 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
7605 ; X64-NEXT:    retq # encoding: [0xc3]
7606   %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 %x4)
7607   %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 -1)
7608   %res2 = add <8 x i32> %res, %res1
7609   ret <8 x i32> %res2
7610 }
7611
7612 declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
7613
7614 define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
7615 ; X86-LABEL: test_int_x86_avx512_mask_valign_q_128:
7616 ; X86:       # %bb.0:
7617 ; X86-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
7618 ; X86-NEXT:    # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
7619 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7620 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7621 ; X86-NEXT:    valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01]
7622 ; X86-NEXT:    # xmm2 {%k1} = xmm1[1],xmm0[0]
7623 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
7624 ; X86-NEXT:    retl # encoding: [0xc3]
7625 ;
7626 ; X64-LABEL: test_int_x86_avx512_mask_valign_q_128:
7627 ; X64:       # %bb.0:
7628 ; X64-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
7629 ; X64-NEXT:    # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
7630 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7631 ; X64-NEXT:    valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01]
7632 ; X64-NEXT:    # xmm2 {%k1} = xmm1[1],xmm0[0]
7633 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
7634 ; X64-NEXT:    retq # encoding: [0xc3]
7635   %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 %x4)
7636   %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 -1)
7637   %res2 = add <2 x i64> %res, %res1
7638   ret <2 x i64> %res2
7639 }
7640
7641 declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
7642
7643 define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
7644 ; X86-LABEL: test_int_x86_avx512_mask_valign_q_256:
7645 ; X86:       # %bb.0:
7646 ; X86-NEXT:    valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
7647 ; X86-NEXT:    # ymm3 = ymm1[3],ymm0[0,1,2]
7648 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7649 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7650 ; X86-NEXT:    valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03]
7651 ; X86-NEXT:    # ymm2 {%k1} = ymm1[3],ymm0[0,1,2]
7652 ; X86-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
7653 ; X86-NEXT:    retl # encoding: [0xc3]
7654 ;
7655 ; X64-LABEL: test_int_x86_avx512_mask_valign_q_256:
7656 ; X64:       # %bb.0:
7657 ; X64-NEXT:    valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
7658 ; X64-NEXT:    # ymm3 = ymm1[3],ymm0[0,1,2]
7659 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7660 ; X64-NEXT:    valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03]
7661 ; X64-NEXT:    # ymm2 {%k1} = ymm1[3],ymm0[0,1,2]
7662 ; X64-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
7663 ; X64-NEXT:    retq # encoding: [0xc3]
7664   %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 %x4)
7665   %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 -1)
7666   %res2 = add <4 x i64> %res, %res1
7667   ret <4 x i64> %res2
7668 }
7669
7670 declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
7671
7672 define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
7673 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
7674 ; X86:       # %bb.0:
7675 ; X86-NEXT:    vpermilpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xd9]
7676 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7677 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7678 ; X86-NEXT:    vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1]
7679 ; X86-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1]
7680 ; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
7681 ; X86-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
7682 ; X86-NEXT:    retl # encoding: [0xc3]
7683 ;
7684 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
7685 ; X64:       # %bb.0:
7686 ; X64-NEXT:    vpermilpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xd9]
7687 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7688 ; X64-NEXT:    vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1]
7689 ; X64-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1]
7690 ; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
7691 ; X64-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
7692 ; X64-NEXT:    retq # encoding: [0xc3]
7693   %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
7694   %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
7695   %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
7696   %res3 = fadd <4 x double> %res, %res1
7697   %res4 = fadd <4 x double> %res2, %res3
7698   ret <4 x double> %res4
7699 }
7700
7701 declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
7702
7703 define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
7704 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
7705 ; X86:       # %bb.0:
7706 ; X86-NEXT:    vpermilpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xd9]
7707 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7708 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7709 ; X86-NEXT:    vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1]
7710 ; X86-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1]
7711 ; X86-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
7712 ; X86-NEXT:    vaddpd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3]
7713 ; X86-NEXT:    retl # encoding: [0xc3]
7714 ;
7715 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
7716 ; X64:       # %bb.0:
7717 ; X64-NEXT:    vpermilpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xd9]
7718 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7719 ; X64-NEXT:    vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1]
7720 ; X64-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1]
7721 ; X64-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
7722 ; X64-NEXT:    vaddpd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3]
7723 ; X64-NEXT:    retq # encoding: [0xc3]
7724   %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
7725   %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3)
7726   %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
7727   %res3 = fadd <2 x double> %res, %res1
7728   %res4 = fadd <2 x double> %res3, %res2
7729   ret <2 x double> %res4
7730 }
7731
7732 declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
7733
7734 define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
7735 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
7736 ; X86:       # %bb.0:
7737 ; X86-NEXT:    vpermilps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xd9]
7738 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7739 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7740 ; X86-NEXT:    vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1]
7741 ; X86-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1]
7742 ; X86-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
7743 ; X86-NEXT:    vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
7744 ; X86-NEXT:    retl # encoding: [0xc3]
7745 ;
7746 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
7747 ; X64:       # %bb.0:
7748 ; X64-NEXT:    vpermilps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xd9]
7749 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7750 ; X64-NEXT:    vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1]
7751 ; X64-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1]
7752 ; X64-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
7753 ; X64-NEXT:    vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
7754 ; X64-NEXT:    retq # encoding: [0xc3]
7755   %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
7756   %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
7757   %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
7758   %res3 = fadd <8 x float> %res, %res1
7759   %res4 = fadd <8 x float> %res3, %res2
7760   ret <8 x float> %res4
7761 }
7762
7763 declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
7764
7765 define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
7766 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
7767 ; X86:       # %bb.0:
7768 ; X86-NEXT:    vpermilps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xd9]
7769 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7770 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7771 ; X86-NEXT:    vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1]
7772 ; X86-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1]
7773 ; X86-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
7774 ; X86-NEXT:    vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
7775 ; X86-NEXT:    retl # encoding: [0xc3]
7776 ;
7777 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
7778 ; X64:       # %bb.0:
7779 ; X64-NEXT:    vpermilps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xd9]
7780 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7781 ; X64-NEXT:    vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1]
7782 ; X64-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1]
7783 ; X64-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
7784 ; X64-NEXT:    vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
7785 ; X64-NEXT:    retq # encoding: [0xc3]
7786   %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
7787   %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3)
7788   %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
7789   %res3 = fadd <4 x float> %res, %res1
7790   %res4 = fadd <4 x float> %res2, %res3
7791   ret <4 x float> %res4
7792 }
7793
7794 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8)
7795
7796 define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) {
7797 ; X86-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
7798 ; X86:       # %bb.0:
7799 ; X86-NEXT:    vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
7800 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7801 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7802 ; X86-NEXT:    vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01]
7803 ; X86-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01]
7804 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
7805 ; X86-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
7806 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
7807 ; X86-NEXT:    retl # encoding: [0xc3]
7808 ;
7809 ; X64-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
7810 ; X64:       # %bb.0:
7811 ; X64-NEXT:    vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
7812 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7813 ; X64-NEXT:    vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01]
7814 ; X64-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01]
7815 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
7816 ; X64-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
7817 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
7818 ; X64-NEXT:    retq # encoding: [0xc3]
7819   %res  = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3)
7820   %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3)
7821   %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1)
7822   %res3 = fadd <4 x float> %res, %res1
7823   %res4 = fadd <4 x float> %res2, %res3
7824   ret <4 x float> %res4
7825 }
7826
7827 declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
7828
7829 define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
7830 ; X86-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
7831 ; X86:       # %bb.0:
7832 ; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
7833 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7834 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7835 ; X86-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01]
7836 ; X86-NEXT:    vaddps %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xd3]
7837 ; X86-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01]
7838 ; X86-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
7839 ; X86-NEXT:    retl # encoding: [0xc3]
7840 ;
7841 ; X64-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
7842 ; X64:       # %bb.0:
7843 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
7844 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7845 ; X64-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01]
7846 ; X64-NEXT:    vaddps %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xd3]
7847 ; X64-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01]
7848 ; X64-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
7849 ; X64-NEXT:    retq # encoding: [0xc3]
7850   %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
7851   %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
7852   %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
7853   %res3 = fadd <8 x float> %res, %res1
7854   %res4 = fadd <8 x float> %res2, %res3
7855   ret <8 x float> %res4
7856 }
7857
7858 declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
7859
7860 define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
7861 ; X86-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
7862 ; X86:       # %bb.0:
7863 ; X86-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
7864 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7865 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7866 ; X86-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01]
7867 ; X86-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01]
7868 ; X86-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
7869 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
7870 ; X86-NEXT:    retl # encoding: [0xc3]
7871 ;
7872 ; X64-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
7873 ; X64:       # %bb.0:
7874 ; X64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
7875 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7876 ; X64-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01]
7877 ; X64-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01]
7878 ; X64-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
7879 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
7880 ; X64-NEXT:    retq # encoding: [0xc3]
7881
7882   %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
7883   %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
7884   %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
7885   %res3 = add <8 x i32> %res, %res1
7886   %res4 = add <8 x i32> %res2, %res3
7887   ret <8 x i32> %res4
7888 }
7889
7890 define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
7891 ; X86-LABEL: test_mm512_maskz_max_ps_256:
7892 ; X86:       # %bb.0:
7893 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7894 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7895 ; X86-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
7896 ; X86-NEXT:    retl # encoding: [0xc3]
7897 ;
7898 ; X64-LABEL: test_mm512_maskz_max_ps_256:
7899 ; X64:       # %bb.0:
7900 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7901 ; X64-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
7902 ; X64-NEXT:    retq # encoding: [0xc3]
7903   %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
7904   ret <8 x float> %res
7905 }
7906
7907 define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
7908 ; X86-LABEL: test_mm512_mask_max_ps_256:
7909 ; X86:       # %bb.0:
7910 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7911 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7912 ; X86-NEXT:    vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
7913 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
7914 ; X86-NEXT:    retl # encoding: [0xc3]
7915 ;
7916 ; X64-LABEL: test_mm512_mask_max_ps_256:
7917 ; X64:       # %bb.0:
7918 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7919 ; X64-NEXT:    vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
7920 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
7921 ; X64-NEXT:    retq # encoding: [0xc3]
7922   %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
7923   ret <8 x float> %res
7924 }
7925
7926 define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
7927 ; CHECK-LABEL: test_mm512_max_ps_256:
7928 ; CHECK:       # %bb.0:
7929 ; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
7930 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7931   %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
7932   ret <8 x float> %res
7933 }
7934 declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
7935
7936 define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
7937 ; X86-LABEL: test_mm512_maskz_max_ps_128:
7938 ; X86:       # %bb.0:
7939 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7940 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7941 ; X86-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
7942 ; X86-NEXT:    retl # encoding: [0xc3]
7943 ;
7944 ; X64-LABEL: test_mm512_maskz_max_ps_128:
7945 ; X64:       # %bb.0:
7946 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7947 ; X64-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
7948 ; X64-NEXT:    retq # encoding: [0xc3]
7949   %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
7950   ret <4 x float> %res
7951 }
7952
7953 define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
7954 ; X86-LABEL: test_mm512_mask_max_ps_128:
7955 ; X86:       # %bb.0:
7956 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7957 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7958 ; X86-NEXT:    vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
7959 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
7960 ; X86-NEXT:    retl # encoding: [0xc3]
7961 ;
7962 ; X64-LABEL: test_mm512_mask_max_ps_128:
7963 ; X64:       # %bb.0:
7964 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7965 ; X64-NEXT:    vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
7966 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
7967 ; X64-NEXT:    retq # encoding: [0xc3]
7968   %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
7969   ret <4 x float> %res
7970 }
7971
7972 define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
7973 ; CHECK-LABEL: test_mm512_max_ps_128:
7974 ; CHECK:       # %bb.0:
7975 ; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
7976 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7977   %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
7978   ret <4 x float> %res
7979 }
7980 declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
7981
7982 define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
7983 ; X86-LABEL: test_mm512_maskz_min_ps_256:
7984 ; X86:       # %bb.0:
7985 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7986 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7987 ; X86-NEXT:    vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
7988 ; X86-NEXT:    retl # encoding: [0xc3]
7989 ;
7990 ; X64-LABEL: test_mm512_maskz_min_ps_256:
7991 ; X64:       # %bb.0:
7992 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7993 ; X64-NEXT:    vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
7994 ; X64-NEXT:    retq # encoding: [0xc3]
7995   %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
7996   ret <8 x float> %res
7997 }
7998
7999 define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
8000 ; X86-LABEL: test_mm512_mask_min_ps_256:
8001 ; X86:       # %bb.0:
8002 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8003 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8004 ; X86-NEXT:    vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
8005 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
8006 ; X86-NEXT:    retl # encoding: [0xc3]
8007 ;
8008 ; X64-LABEL: test_mm512_mask_min_ps_256:
8009 ; X64:       # %bb.0:
8010 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8011 ; X64-NEXT:    vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
8012 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
8013 ; X64-NEXT:    retq # encoding: [0xc3]
8014   %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
8015   ret <8 x float> %res
8016 }
8017
8018 define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
8019 ; CHECK-LABEL: test_mm512_min_ps_256:
8020 ; CHECK:       # %bb.0:
8021 ; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
8022 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8023   %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
8024   ret <8 x float> %res
8025 }
8026 declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
8027
8028 define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
8029 ; X86-LABEL: test_mm512_maskz_min_ps_128:
8030 ; X86:       # %bb.0:
8031 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8032 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8033 ; X86-NEXT:    vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
8034 ; X86-NEXT:    retl # encoding: [0xc3]
8035 ;
8036 ; X64-LABEL: test_mm512_maskz_min_ps_128:
8037 ; X64:       # %bb.0:
8038 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8039 ; X64-NEXT:    vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
8040 ; X64-NEXT:    retq # encoding: [0xc3]
8041   %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
8042   ret <4 x float> %res
8043 }
8044
8045 define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
8046 ; X86-LABEL: test_mm512_mask_min_ps_128:
8047 ; X86:       # %bb.0:
8048 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8049 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8050 ; X86-NEXT:    vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
8051 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
8052 ; X86-NEXT:    retl # encoding: [0xc3]
8053 ;
8054 ; X64-LABEL: test_mm512_mask_min_ps_128:
8055 ; X64:       # %bb.0:
8056 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8057 ; X64-NEXT:    vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
8058 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
8059 ; X64-NEXT:    retq # encoding: [0xc3]
8060   %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
8061   ret <4 x float> %res
8062 }
8063
8064 define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
8065 ; CHECK-LABEL: test_mm512_min_ps_128:
8066 ; CHECK:       # %bb.0:
8067 ; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
8068 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8069   %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
8070   ret <4 x float> %res
8071 }
8072 declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
8073
8074 define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
8075 ; CHECK-LABEL: test_cmp_d_256:
8076 ; CHECK:       # %bb.0:
8077 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
8078 ; CHECK-NEXT:    vpcmpgtd %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x66,0xc8]
8079 ; CHECK-NEXT:    vpcmpled %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x02]
8080 ; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x04]
8081 ; CHECK-NEXT:    vpcmpnltd %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe1,0x05]
8082 ; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xe9]
8083 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8084 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8085 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8086 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8087 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8088 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8089 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8090 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8091 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8092 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8093 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8094 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8095 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8096 ; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
8097 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8098 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8099 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8100   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
8101   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8102   %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
8103   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8104   %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
8105   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8106   %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
8107   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8108   %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
8109   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8110   %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
8111   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8112   %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
8113   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8114   %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
8115   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8116   ret <8 x i8> %vec7
8117 }
8118
8119 define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
8120 ; X86-LABEL: test_mask_cmp_d_256:
8121 ; X86:       # %bb.0:
8122 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
8123 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8124 ; X86-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
8125 ; X86-NEXT:    vpcmpgtd %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xd0]
8126 ; X86-NEXT:    vpcmpled %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02]
8127 ; X86-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
8128 ; X86-NEXT:    vpcmpnltd %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x05]
8129 ; X86-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
8130 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
8131 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8132 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
8133 ; X86-NEXT:    kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca]
8134 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
8135 ; X86-NEXT:    kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
8136 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
8137 ; X86-NEXT:    kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
8138 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
8139 ; X86-NEXT:    kmovw %k5, %ecx # encoding: [0xc5,0xf8,0x93,0xcd]
8140 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
8141 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
8142 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
8143 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8144 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8145 ; X86-NEXT:    retl # encoding: [0xc3]
8146 ;
8147 ; X64-LABEL: test_mask_cmp_d_256:
8148 ; X64:       # %bb.0:
8149 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8150 ; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
8151 ; X64-NEXT:    vpcmpgtd %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xd0]
8152 ; X64-NEXT:    vpcmpled %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02]
8153 ; X64-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
8154 ; X64-NEXT:    vpcmpnltd %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x05]
8155 ; X64-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
8156 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8157 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8158 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8159 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8160 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8161 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8162 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8163 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8164 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8165 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8166 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8167 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8168 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8169 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
8170 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8171 ; X64-NEXT:    retq # encoding: [0xc3]
8172   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
8173   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8174   %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
8175   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8176   %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
8177   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8178   %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
8179   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8180   %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
8181   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8182   %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
8183   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8184   %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
8185   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8186   %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
8187   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8188   ret <8 x i8> %vec7
8189 }
8190
8191 declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
8192
8193 define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
8194 ; CHECK-LABEL: test_ucmp_d_256:
8195 ; CHECK:       # %bb.0:
8196 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
8197 ; CHECK-NEXT:    vpcmpltud %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x01]
8198 ; CHECK-NEXT:    vpcmpleud %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd1,0x02]
8199 ; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x04]
8200 ; CHECK-NEXT:    vpcmpnltud %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x05]
8201 ; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe9,0x06]
8202 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8203 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8204 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8205 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8206 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8207 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8208 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8209 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8210 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8211 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8212 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8213 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8214 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8215 ; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
8216 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8217 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8218 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8219   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
8220   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8221   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
8222   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8223   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
8224   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8225   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
8226   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8227   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
8228   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8229   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
8230   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8231   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
8232   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8233   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
8234   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8235   ret <8 x i8> %vec7
8236 }
8237
8238 define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
8239 ; X86-LABEL: test_mask_ucmp_d_256:
8240 ; X86:       # %bb.0:
8241 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
8242 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8243 ; X86-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
8244 ; X86-NEXT:    vpcmpltud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01]
8245 ; X86-NEXT:    vpcmpleud %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02]
8246 ; X86-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
8247 ; X86-NEXT:    vpcmpnltud %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x05]
8248 ; X86-NEXT:    vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
8249 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
8250 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8251 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
8252 ; X86-NEXT:    kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca]
8253 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
8254 ; X86-NEXT:    kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
8255 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
8256 ; X86-NEXT:    kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
8257 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
8258 ; X86-NEXT:    kmovw %k5, %ecx # encoding: [0xc5,0xf8,0x93,0xcd]
8259 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
8260 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
8261 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
8262 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8263 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8264 ; X86-NEXT:    retl # encoding: [0xc3]
8265 ;
8266 ; X64-LABEL: test_mask_ucmp_d_256:
8267 ; X64:       # %bb.0:
8268 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8269 ; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
8270 ; X64-NEXT:    vpcmpltud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01]
8271 ; X64-NEXT:    vpcmpleud %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02]
8272 ; X64-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
8273 ; X64-NEXT:    vpcmpnltud %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x05]
8274 ; X64-NEXT:    vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
8275 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8276 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8277 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8278 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8279 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8280 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8281 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8282 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8283 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8284 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8285 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8286 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8287 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8288 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
8289 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8290 ; X64-NEXT:    retq # encoding: [0xc3]
8291   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
8292   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8293   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
8294   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8295   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
8296   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8297   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
8298   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8299   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
8300   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8301   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
8302   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8303   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
8304   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8305   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
8306   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8307   ret <8 x i8> %vec7
8308 }
8309
8310 declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
8311
8312 define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
8313 ; CHECK-LABEL: test_cmp_q_256:
8314 ; CHECK:       # %bb.0:
8315 ; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
8316 ; CHECK-NEXT:    vpcmpgtq %ymm0, %ymm1, %k1 # encoding: [0x62,0xf2,0xf5,0x28,0x37,0xc8]
8317 ; CHECK-NEXT:    vpcmpleq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02]
8318 ; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
8319 ; CHECK-NEXT:    vpcmpnltq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x05]
8320 ; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xe9]
8321 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8322 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8323 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8324 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8325 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8326 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8327 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8328 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8329 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8330 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8331 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8332 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8333 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8334 ; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
8335 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8336 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8337 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8338   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
8339   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8340   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
8341   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8342   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
8343   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8344   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
8345   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8346   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
8347   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8348   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
8349   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8350   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
8351   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8352   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
8353   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8354   ret <8 x i8> %vec7
8355 }
8356
8357 define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
8358 ; X86-LABEL: test_mask_cmp_q_256:
8359 ; X86:       # %bb.0:
8360 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8361 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8362 ; X86-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
8363 ; X86-NEXT:    vpcmpgtq %ymm0, %ymm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x2a,0x37,0xc8]
8364 ; X86-NEXT:    vpcmpleq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xd9,0x02]
8365 ; X86-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
8366 ; X86-NEXT:    vpcmpnltq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe9,0x05]
8367 ; X86-NEXT:    vpcmpgtq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x37,0xf1]
8368 ; X86-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8369 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8370 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8371 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8372 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8373 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8374 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8375 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8376 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8377 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8378 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8379 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8380 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8381 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8382 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8383 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8384 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8385 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8386 ; X86-NEXT:    retl # encoding: [0xc3]
8387 ;
8388 ; X64-LABEL: test_mask_cmp_q_256:
8389 ; X64:       # %bb.0:
8390 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
8391 ; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
8392 ; X64-NEXT:    vpcmpgtq %ymm0, %ymm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x2a,0x37,0xc8]
8393 ; X64-NEXT:    vpcmpleq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xd9,0x02]
8394 ; X64-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
8395 ; X64-NEXT:    vpcmpnltq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe9,0x05]
8396 ; X64-NEXT:    vpcmpgtq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x37,0xf1]
8397 ; X64-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8398 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8399 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8400 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8401 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8402 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8403 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8404 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8405 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8406 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8407 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8408 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8409 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8410 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8411 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8412 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8413 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8414 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8415 ; X64-NEXT:    retq # encoding: [0xc3]
8416   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
8417   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8418   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
8419   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8420   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
8421   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8422   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
8423   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8424   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
8425   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8426   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
8427   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8428   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
8429   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8430   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
8431   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8432   ret <8 x i8> %vec7
8433 }
8434
8435 declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
8436
8437 define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
8438 ; CHECK-LABEL: test_ucmp_q_256:
8439 ; CHECK:       # %bb.0:
8440 ; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
8441 ; CHECK-NEXT:    vpcmpltuq %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01]
8442 ; CHECK-NEXT:    vpcmpleuq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02]
8443 ; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
8444 ; CHECK-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x05]
8445 ; CHECK-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x06]
8446 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8447 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8448 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8449 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8450 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8451 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8452 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8453 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8454 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8455 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8456 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8457 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8458 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8459 ; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
8460 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8461 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8462 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8463   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
8464   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8465   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
8466   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8467   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
8468   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8469   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
8470   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8471   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
8472   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8473   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
8474   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8475   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
8476   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8477   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
8478   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8479   ret <8 x i8> %vec7
8480 }
8481
8482 define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
8483 ; X86-LABEL: test_mask_ucmp_q_256:
8484 ; X86:       # %bb.0:
8485 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8486 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8487 ; X86-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
8488 ; X86-NEXT:    vpcmpltuq %ymm1, %ymm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xc9,0x01]
8489 ; X86-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xd9,0x02]
8490 ; X86-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
8491 ; X86-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xe9,0x05]
8492 ; X86-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xf1,0x06]
8493 ; X86-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8494 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8495 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8496 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8497 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8498 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8499 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8500 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8501 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8502 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8503 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8504 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8505 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8506 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8507 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8508 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8509 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8510 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8511 ; X86-NEXT:    retl # encoding: [0xc3]
8512 ;
8513 ; X64-LABEL: test_mask_ucmp_q_256:
8514 ; X64:       # %bb.0:
8515 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
8516 ; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
8517 ; X64-NEXT:    vpcmpltuq %ymm1, %ymm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xc9,0x01]
8518 ; X64-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xd9,0x02]
8519 ; X64-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
8520 ; X64-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xe9,0x05]
8521 ; X64-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xf1,0x06]
8522 ; X64-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8523 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8524 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8525 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8526 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8527 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8528 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8529 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8530 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8531 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8532 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8533 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8534 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8535 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8536 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8537 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8538 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8539 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
8540 ; X64-NEXT:    retq # encoding: [0xc3]
8541   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
8542   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8543   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
8544   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8545   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
8546   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8547   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
8548   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8549   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
8550   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8551   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
8552   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8553   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
8554   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8555   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
8556   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8557   ret <8 x i8> %vec7
8558 }
8559
8560 declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
8561
8562 define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
8563 ; CHECK-LABEL: test_cmp_d_128:
8564 ; CHECK:       # %bb.0:
8565 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
8566 ; CHECK-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc8]
8567 ; CHECK-NEXT:    vpcmpled %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02]
8568 ; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
8569 ; CHECK-NEXT:    vpcmpnltd %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x05]
8570 ; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xe9]
8571 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8572 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8573 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8574 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8575 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8576 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8577 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8578 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8579 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8580 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8581 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8582 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8583 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8584 ; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
8585 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8586 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8587   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
8588   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8589   %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
8590   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8591   %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
8592   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8593   %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
8594   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8595   %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
8596   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8597   %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
8598   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8599   %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
8600   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8601   %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
8602   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8603   ret <8 x i8> %vec7
8604 }
8605
8606 define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
8607 ; X86-LABEL: test_mask_cmp_d_128:
8608 ; X86:       # %bb.0:
8609 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8610 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8611 ; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
8612 ; X86-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf1,0x75,0x0a,0x66,0xc8]
8613 ; X86-NEXT:    vpcmpled %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xd9,0x02]
8614 ; X86-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
8615 ; X86-NEXT:    vpcmpnltd %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe9,0x05]
8616 ; X86-NEXT:    vpcmpgtd %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x66,0xf1]
8617 ; X86-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8618 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8619 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8620 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8621 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8622 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8623 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8624 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8625 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8626 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8627 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8628 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8629 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8630 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8631 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8632 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8633 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8634 ; X86-NEXT:    retl # encoding: [0xc3]
8635 ;
8636 ; X64-LABEL: test_mask_cmp_d_128:
8637 ; X64:       # %bb.0:
8638 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
8639 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
8640 ; X64-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf1,0x75,0x0a,0x66,0xc8]
8641 ; X64-NEXT:    vpcmpled %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xd9,0x02]
8642 ; X64-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
8643 ; X64-NEXT:    vpcmpnltd %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe9,0x05]
8644 ; X64-NEXT:    vpcmpgtd %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x66,0xf1]
8645 ; X64-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8646 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8647 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8648 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8649 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8650 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8651 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8652 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8653 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8654 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8655 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8656 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8657 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8658 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8659 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8660 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8661 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8662 ; X64-NEXT:    retq # encoding: [0xc3]
8663   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
8664   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8665   %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
8666   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8667   %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
8668   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8669   %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
8670   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8671   %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
8672   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8673   %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
8674   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8675   %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
8676   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8677   %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
8678   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8679   ret <8 x i8> %vec7
8680 }
8681
8682 declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
8683
8684 define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
8685 ; CHECK-LABEL: test_ucmp_d_128:
8686 ; CHECK:       # %bb.0:
8687 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
8688 ; CHECK-NEXT:    vpcmpltud %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01]
8689 ; CHECK-NEXT:    vpcmpleud %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02]
8690 ; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
8691 ; CHECK-NEXT:    vpcmpnltud %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x05]
8692 ; CHECK-NEXT:    vpcmpnleud %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x06]
8693 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8694 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8695 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8696 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8697 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8698 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8699 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8700 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8701 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8702 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8703 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8704 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8705 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8706 ; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
8707 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8708 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8709   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
8710   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8711   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
8712   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8713   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
8714   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8715   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
8716   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8717   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
8718   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8719   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
8720   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8721   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
8722   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8723   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
8724   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8725   ret <8 x i8> %vec7
8726 }
8727
8728 define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
8729 ; X86-LABEL: test_mask_ucmp_d_128:
8730 ; X86:       # %bb.0:
8731 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8732 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8733 ; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
8734 ; X86-NEXT:    vpcmpltud %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xc9,0x01]
8735 ; X86-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xd9,0x02]
8736 ; X86-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
8737 ; X86-NEXT:    vpcmpnltud %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xe9,0x05]
8738 ; X86-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xf1,0x06]
8739 ; X86-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8740 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8741 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8742 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8743 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8744 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8745 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8746 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8747 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8748 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8749 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8750 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8751 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8752 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8753 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8754 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8755 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8756 ; X86-NEXT:    retl # encoding: [0xc3]
8757 ;
8758 ; X64-LABEL: test_mask_ucmp_d_128:
8759 ; X64:       # %bb.0:
8760 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
8761 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
8762 ; X64-NEXT:    vpcmpltud %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xc9,0x01]
8763 ; X64-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xd9,0x02]
8764 ; X64-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
8765 ; X64-NEXT:    vpcmpnltud %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xe9,0x05]
8766 ; X64-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xf1,0x06]
8767 ; X64-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8768 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8769 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8770 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8771 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8772 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8773 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8774 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8775 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8776 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8777 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8778 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8779 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8780 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8781 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8782 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8783 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8784 ; X64-NEXT:    retq # encoding: [0xc3]
8785   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
8786   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8787   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
8788   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8789   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
8790   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8791   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
8792   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8793   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
8794   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8795   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
8796   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8797   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
8798   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8799   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
8800   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8801   ret <8 x i8> %vec7
8802 }
8803
8804 declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
8805
8806 define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
8807 ; CHECK-LABEL: test_cmp_q_128:
8808 ; CHECK:       # %bb.0:
8809 ; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
8810 ; CHECK-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x08,0x37,0xc8]
8811 ; CHECK-NEXT:    vpcmpleq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02]
8812 ; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
8813 ; CHECK-NEXT:    vpcmpnltq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x05]
8814 ; CHECK-NEXT:    vpcmpgtq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xe9]
8815 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8816 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8817 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8818 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8819 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8820 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8821 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8822 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8823 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8824 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8825 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8826 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8827 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8828 ; CHECK-NEXT:    movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00]
8829 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8830 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8831   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
8832   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8833   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
8834   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8835   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
8836   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8837   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
8838   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8839   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
8840   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8841   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
8842   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8843   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
8844   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8845   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
8846   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8847   ret <8 x i8> %vec7
8848 }
8849
8850 define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
8851 ; X86-LABEL: test_mask_cmp_q_128:
8852 ; X86:       # %bb.0:
8853 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8854 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8855 ; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
8856 ; X86-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x0a,0x37,0xc8]
8857 ; X86-NEXT:    vpcmpleq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xd9,0x02]
8858 ; X86-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
8859 ; X86-NEXT:    vpcmpnltq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe9,0x05]
8860 ; X86-NEXT:    vpcmpgtq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x37,0xf1]
8861 ; X86-NEXT:    kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
8862 ; X86-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
8863 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8864 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8865 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8866 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8867 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8868 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8869 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8870 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8871 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8872 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8873 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8874 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8875 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8876 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8877 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8878 ; X86-NEXT:    retl # encoding: [0xc3]
8879 ;
8880 ; X64-LABEL: test_mask_cmp_q_128:
8881 ; X64:       # %bb.0:
8882 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
8883 ; X64-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
8884 ; X64-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x0a,0x37,0xc8]
8885 ; X64-NEXT:    vpcmpleq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xd9,0x02]
8886 ; X64-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
8887 ; X64-NEXT:    vpcmpnltq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe9,0x05]
8888 ; X64-NEXT:    vpcmpgtq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x37,0xf1]
8889 ; X64-NEXT:    kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
8890 ; X64-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
8891 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8892 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8893 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8894 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8895 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8896 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8897 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8898 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8899 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8900 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8901 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8902 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8903 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8904 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8905 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8906 ; X64-NEXT:    retq # encoding: [0xc3]
8907   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
8908   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8909   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
8910   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8911   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
8912   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8913   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
8914   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8915   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
8916   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8917   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
8918   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8919   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
8920   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8921   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
8922   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8923   ret <8 x i8> %vec7
8924 }
8925
8926 declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
8927
8928 define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
8929 ; CHECK-LABEL: test_ucmp_q_128:
8930 ; CHECK:       # %bb.0:
8931 ; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
8932 ; CHECK-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01]
8933 ; CHECK-NEXT:    vpcmpleuq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02]
8934 ; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
8935 ; CHECK-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x05]
8936 ; CHECK-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x06]
8937 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8938 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8939 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8940 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8941 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8942 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8943 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8944 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8945 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8946 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8947 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8948 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8949 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8950 ; CHECK-NEXT:    movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00]
8951 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8952 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8953   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
8954   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8955   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
8956   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8957   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
8958   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8959   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
8960   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8961   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
8962   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8963   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
8964   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8965   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
8966   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8967   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
8968   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8969   ret <8 x i8> %vec7
8970 }
8971
8972 define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
8973 ; X86-LABEL: test_mask_ucmp_q_128:
8974 ; X86:       # %bb.0:
8975 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8976 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8977 ; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
8978 ; X86-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xc9,0x01]
8979 ; X86-NEXT:    vpcmpleuq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xd9,0x02]
8980 ; X86-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
8981 ; X86-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xe9,0x05]
8982 ; X86-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xf1,0x06]
8983 ; X86-NEXT:    kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
8984 ; X86-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
8985 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8986 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8987 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8988 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8989 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8990 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8991 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8992 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8993 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8994 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8995 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8996 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8997 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8998 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8999 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
9000 ; X86-NEXT:    retl # encoding: [0xc3]
9001 ;
9002 ; X64-LABEL: test_mask_ucmp_q_128:
9003 ; X64:       # %bb.0:
9004 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
9005 ; X64-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
9006 ; X64-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xc9,0x01]
9007 ; X64-NEXT:    vpcmpleuq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xd9,0x02]
9008 ; X64-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
9009 ; X64-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xe9,0x05]
9010 ; X64-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xf1,0x06]
9011 ; X64-NEXT:    kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
9012 ; X64-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
9013 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9014 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
9015 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
9016 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9017 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
9018 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9019 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
9020 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9021 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
9022 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
9023 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
9024 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
9025 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
9026 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9027 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
9028 ; X64-NEXT:    retq # encoding: [0xc3]
9029   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
9030   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
9031   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
9032   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
9033   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
9034   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
9035   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
9036   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
9037   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
9038   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
9039   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
9040   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
9041   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
9042   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
9043   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
9044   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
9045   ret <8 x i8> %vec7
9046 }
9047
9048 declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
9049
9050 declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x float>, i8)
9051
9052 define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) {
9053 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
9054 ; X86:       # %bb.0:
9055 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
9056 ; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
9057 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9058 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9059 ; X86-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
9060 ; X86-NEXT:    vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
9061 ; X86-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
9062 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
9063 ; X86-NEXT:    retl # encoding: [0xc3]
9064 ;
9065 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
9066 ; X64:       # %bb.0:
9067 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
9068 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
9069 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9070 ; X64-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
9071 ; X64-NEXT:    vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
9072 ; X64-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
9073 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
9074 ; X64-NEXT:    retq # encoding: [0xc3]
9075   %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
9076   %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
9077   %res3 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %mask)
9078   %res4 = fadd <8 x float> %res1, %res2
9079   %res5 = fadd <8 x float> %res3, %res4
9080   ret <8 x float> %res5
9081 }
9082
9083 define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256_load(<4 x float>* %x0ptr, <8 x float> %x2, i8 %mask) {
9084 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load:
9085 ; X86:       # %bb.0:
9086 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9087 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9088 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9089 ; X86-NEXT:    vbroadcastf32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x00]
9090 ; X86-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
9091 ; X86-NEXT:    retl # encoding: [0xc3]
9092 ;
9093 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load:
9094 ; X64:       # %bb.0:
9095 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9096 ; X64-NEXT:    vbroadcastf32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x07]
9097 ; X64-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
9098 ; X64-NEXT:    retq # encoding: [0xc3]
9099   %x0 = load <4 x float>, <4 x float>* %x0ptr
9100   %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
9101   ret <8 x float> %res
9102 }
9103
9104 declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, i8)
9105
9106 define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) {
9107 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
9108 ; X86:       # %bb.0:
9109 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
9110 ; X86-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
9111 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9112 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9113 ; X86-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01]
9114 ; X86-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
9115 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
9116 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
9117 ; X86-NEXT:    retl # encoding: [0xc3]
9118 ;
9119 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
9120 ; X64:       # %bb.0:
9121 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
9122 ; X64-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
9123 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9124 ; X64-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01]
9125 ; X64-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
9126 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
9127 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
9128 ; X64-NEXT:    retq # encoding: [0xc3]
9129   %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
9130   %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
9131   %res3 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
9132   %res4 = add <8 x i32> %res1, %res2
9133   %res5 = add <8 x i32> %res3, %res4
9134   ret <8 x i32> %res5
9135 }
9136
9137 define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256_load(<4 x i32>* %x0ptr, <8 x i32> %x2, i8 %mask) {
9138 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load:
9139 ; X86:       # %bb.0:
9140 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9141 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9142 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9143 ; X86-NEXT:    vbroadcasti32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x00]
9144 ; X86-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
9145 ; X86-NEXT:    retl # encoding: [0xc3]
9146 ;
9147 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load:
9148 ; X64:       # %bb.0:
9149 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9150 ; X64-NEXT:    vbroadcasti32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x07]
9151 ; X64-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
9152 ; X64-NEXT:    retq # encoding: [0xc3]
9153   %x0 = load <4 x i32>, <4 x i32>* %x0ptr
9154   %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
9155   ret <8 x i32> %res
9156 }
9157
9158 declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)
9159
9160 define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
9161 ; X86-LABEL: test_int_x86_avx512_mask_pabs_q_128:
9162 ; X86:       # %bb.0:
9163 ; X86-NEXT:    vpabsq %xmm0, %xmm2 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xd0]
9164 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9165 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9166 ; X86-NEXT:    vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8]
9167 ; X86-NEXT:    vpaddq %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc2]
9168 ; X86-NEXT:    retl # encoding: [0xc3]
9169 ;
9170 ; X64-LABEL: test_int_x86_avx512_mask_pabs_q_128:
9171 ; X64:       # %bb.0:
9172 ; X64-NEXT:    vpabsq %xmm0, %xmm2 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xd0]
9173 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9174 ; X64-NEXT:    vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8]
9175 ; X64-NEXT:    vpaddq %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc2]
9176 ; X64-NEXT:    retq # encoding: [0xc3]
9177   %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
9178   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
9179   %res2 = add <2 x i64> %res, %res1
9180   ret <2 x i64> %res2
9181 }
9182
9183 declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)
9184
9185 define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
9186 ; X86-LABEL: test_int_x86_avx512_mask_pabs_q_256:
9187 ; X86:       # %bb.0:
9188 ; X86-NEXT:    vpabsq %ymm0, %ymm2 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xd0]
9189 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9190 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9191 ; X86-NEXT:    vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8]
9192 ; X86-NEXT:    vpaddq %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc2]
9193 ; X86-NEXT:    retl # encoding: [0xc3]
9194 ;
9195 ; X64-LABEL: test_int_x86_avx512_mask_pabs_q_256:
9196 ; X64:       # %bb.0:
9197 ; X64-NEXT:    vpabsq %ymm0, %ymm2 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xd0]
9198 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9199 ; X64-NEXT:    vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8]
9200 ; X64-NEXT:    vpaddq %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc2]
9201 ; X64-NEXT:    retq # encoding: [0xc3]
9202   %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
9203   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
9204   %res2 = add <4 x i64> %res, %res1
9205   ret <4 x i64> %res2
9206 }
9207
9208 declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)
9209
9210 define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
9211 ; X86-LABEL: test_int_x86_avx512_mask_pabs_d_128:
9212 ; X86:       # %bb.0:
9213 ; X86-NEXT:    vpabsd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xd0]
9214 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9215 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9216 ; X86-NEXT:    vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8]
9217 ; X86-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
9218 ; X86-NEXT:    retl # encoding: [0xc3]
9219 ;
9220 ; X64-LABEL: test_int_x86_avx512_mask_pabs_d_128:
9221 ; X64:       # %bb.0:
9222 ; X64-NEXT:    vpabsd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xd0]
9223 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9224 ; X64-NEXT:    vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8]
9225 ; X64-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
9226 ; X64-NEXT:    retq # encoding: [0xc3]
9227   %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
9228   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
9229   %res2 = add <4 x i32> %res, %res1
9230   ret <4 x i32> %res2
9231 }
9232
9233 declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)
9234
9235 define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
9236 ; X86-LABEL: test_int_x86_avx512_mask_pabs_d_256:
9237 ; X86:       # %bb.0:
9238 ; X86-NEXT:    vpabsd %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xd0]
9239 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9240 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9241 ; X86-NEXT:    vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8]
9242 ; X86-NEXT:    vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
9243 ; X86-NEXT:    retl # encoding: [0xc3]
9244 ;
9245 ; X64-LABEL: test_int_x86_avx512_mask_pabs_d_256:
9246 ; X64:       # %bb.0:
9247 ; X64-NEXT:    vpabsd %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xd0]
9248 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9249 ; X64-NEXT:    vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8]
9250 ; X64-NEXT:    vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
9251 ; X64-NEXT:    retq # encoding: [0xc3]
9252   %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
9253   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
9254   %res2 = add <8 x i32> %res, %res1
9255   ret <8 x i32> %res2
9256 }
9257
9258 declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8)
9259
9260 define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
9261 ; X86-LABEL: test_int_x86_avx512_ptestm_d_128:
9262 ; X86:       # %bb.0:
9263 ; X86-NEXT:    vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
9264 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9265 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9266 ; X86-NEXT:    vptestmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9]
9267 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9268 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9269 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9270 ; X86-NEXT:    # kill: def $al killed $al killed $eax
9271 ; X86-NEXT:    retl # encoding: [0xc3]
9272 ;
9273 ; X64-LABEL: test_int_x86_avx512_ptestm_d_128:
9274 ; X64:       # %bb.0:
9275 ; X64-NEXT:    vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
9276 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9277 ; X64-NEXT:    vptestmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9]
9278 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9279 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9280 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9281 ; X64-NEXT:    # kill: def $al killed $al killed $eax
9282 ; X64-NEXT:    retq # encoding: [0xc3]
9283   %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
9284   %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
9285   %res2 = add i8 %res, %res1
9286   ret i8 %res2
9287 }
9288
9289 declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8)
9290
9291 define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
9292 ; X86-LABEL: test_int_x86_avx512_ptestm_d_256:
9293 ; X86:       # %bb.0:
9294 ; X86-NEXT:    vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
9295 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9296 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
9297 ; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
9298 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9299 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9300 ; X86-NEXT:    retl # encoding: [0xc3]
9301 ;
9302 ; X64-LABEL: test_int_x86_avx512_ptestm_d_256:
9303 ; X64:       # %bb.0:
9304 ; X64-NEXT:    vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
9305 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9306 ; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
9307 ; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
9308 ; X64-NEXT:    # kill: def $al killed $al killed $eax
9309 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9310 ; X64-NEXT:    retq # encoding: [0xc3]
9311   %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
9312   %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
9313   %res2 = add i8 %res, %res1
9314   ret i8 %res2
9315 }
9316
9317 declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8)
9318
9319 define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
9320 ; X86-LABEL: test_int_x86_avx512_ptestm_q_128:
9321 ; X86:       # %bb.0:
9322 ; X86-NEXT:    vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
9323 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9324 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9325 ; X86-NEXT:    vptestmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9]
9326 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9327 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9328 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9329 ; X86-NEXT:    # kill: def $al killed $al killed $eax
9330 ; X86-NEXT:    retl # encoding: [0xc3]
9331 ;
9332 ; X64-LABEL: test_int_x86_avx512_ptestm_q_128:
9333 ; X64:       # %bb.0:
9334 ; X64-NEXT:    vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
9335 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9336 ; X64-NEXT:    vptestmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9]
9337 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9338 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9339 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9340 ; X64-NEXT:    # kill: def $al killed $al killed $eax
9341 ; X64-NEXT:    retq # encoding: [0xc3]
9342   %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
9343   %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
9344   %res2 = add i8 %res, %res1
9345   ret i8 %res2
9346 }
9347
9348 declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8)
9349
9350 define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
9351 ; X86-LABEL: test_int_x86_avx512_ptestm_q_256:
9352 ; X86:       # %bb.0:
9353 ; X86-NEXT:    vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
9354 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9355 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9356 ; X86-NEXT:    vptestmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9]
9357 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9358 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9359 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9360 ; X86-NEXT:    # kill: def $al killed $al killed $eax
9361 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9362 ; X86-NEXT:    retl # encoding: [0xc3]
9363 ;
9364 ; X64-LABEL: test_int_x86_avx512_ptestm_q_256:
9365 ; X64:       # %bb.0:
9366 ; X64-NEXT:    vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
9367 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9368 ; X64-NEXT:    vptestmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9]
9369 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9370 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9371 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9372 ; X64-NEXT:    # kill: def $al killed $al killed $eax
9373 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9374 ; X64-NEXT:    retq # encoding: [0xc3]
9375   %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
9376   %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
9377   %res2 = add i8 %res, %res1
9378   ret i8 %res2
9379 }
9380
9381 declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2)
9382
9383 define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
9384 ; X86-LABEL: test_int_x86_avx512_ptestnm_d_128:
9385 ; X86:       # %bb.0:
9386 ; X86-NEXT:    vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
9387 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9388 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9389 ; X86-NEXT:    vptestnmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9]
9390 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9391 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9392 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9393 ; X86-NEXT:    # kill: def $al killed $al killed $eax
9394 ; X86-NEXT:    retl # encoding: [0xc3]
9395 ;
9396 ; X64-LABEL: test_int_x86_avx512_ptestnm_d_128:
9397 ; X64:       # %bb.0:
9398 ; X64-NEXT:    vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
9399 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9400 ; X64-NEXT:    vptestnmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9]
9401 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9402 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9403 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9404 ; X64-NEXT:    # kill: def $al killed $al killed $eax
9405 ; X64-NEXT:    retq # encoding: [0xc3]
9406   %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
9407   %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
9408   %res2 = add i8 %res, %res1
9409   ret i8 %res2
9410 }
9411
9412 declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2)
9413
9414 define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
9415 ; X86-LABEL: test_int_x86_avx512_ptestnm_d_256:
9416 ; X86:       # %bb.0:
9417 ; X86-NEXT:    vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
9418 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9419 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
9420 ; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
9421 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9422 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9423 ; X86-NEXT:    retl # encoding: [0xc3]
9424 ;
9425 ; X64-LABEL: test_int_x86_avx512_ptestnm_d_256:
9426 ; X64:       # %bb.0:
9427 ; X64-NEXT:    vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
9428 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9429 ; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
9430 ; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
9431 ; X64-NEXT:    # kill: def $al killed $al killed $eax
9432 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9433 ; X64-NEXT:    retq # encoding: [0xc3]
9434   %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
9435   %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
9436   %res2 = add i8 %res, %res1
9437   ret i8 %res2
9438 }
9439
9440 declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2)
9441
9442 define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
9443 ; X86-LABEL: test_int_x86_avx512_ptestnm_q_128:
9444 ; X86:       # %bb.0:
9445 ; X86-NEXT:    vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
9446 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9447 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9448 ; X86-NEXT:    vptestnmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9]
9449 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9450 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9451 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9452 ; X86-NEXT:    # kill: def $al killed $al killed $eax
9453 ; X86-NEXT:    retl # encoding: [0xc3]
9454 ;
9455 ; X64-LABEL: test_int_x86_avx512_ptestnm_q_128:
9456 ; X64:       # %bb.0:
9457 ; X64-NEXT:    vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
9458 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9459 ; X64-NEXT:    vptestnmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9]
9460 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9461 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9462 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9463 ; X64-NEXT:    # kill: def $al killed $al killed $eax
9464 ; X64-NEXT:    retq # encoding: [0xc3]
9465   %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
9466   %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
9467   %res2 = add i8 %res, %res1
9468   ret i8 %res2
9469 }
9470
9471 declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2)
9472
9473 define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
9474 ; X86-LABEL: test_int_x86_avx512_ptestnm_q_256:
9475 ; X86:       # %bb.0:
9476 ; X86-NEXT:    vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
9477 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9478 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9479 ; X86-NEXT:    vptestnmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9]
9480 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9481 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9482 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9483 ; X86-NEXT:    # kill: def $al killed $al killed $eax
9484 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9485 ; X86-NEXT:    retl # encoding: [0xc3]
9486 ;
9487 ; X64-LABEL: test_int_x86_avx512_ptestnm_q_256:
9488 ; X64:       # %bb.0:
9489 ; X64-NEXT:    vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
9490 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9491 ; X64-NEXT:    vptestnmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9]
9492 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9493 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9494 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
9495 ; X64-NEXT:    # kill: def $al killed $al killed $eax
9496 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9497 ; X64-NEXT:    retq # encoding: [0xc3]
9498   %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
9499   %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
9500   %res2 = add i8 %res, %res1
9501   ret i8 %res2
9502 }
9503
9504 define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
9505 ; CHECK-LABEL: test_cmpps_256:
9506 ; CHECK:       # %bb.0:
9507 ; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
9508 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9509 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
9510 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9511 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9512   %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
9513   ret i8 %res
9514 }
9515 declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8)
9516
9517 define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
9518 ; CHECK-LABEL: test_cmpps_128:
9519 ; CHECK:       # %bb.0:
9520 ; CHECK-NEXT:    vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
9521 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9522 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
9523 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9524   %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
9525   ret i8 %res
9526 }
9527 declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8)
9528
9529 define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
9530 ; CHECK-LABEL: test_cmppd_256:
9531 ; CHECK:       # %bb.0:
9532 ; CHECK-NEXT:    vcmplepd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
9533 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9534 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
9535 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9536 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9537   %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
9538   ret i8 %res
9539 }
9540 declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8)
9541
9542 define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
9543 ; CHECK-LABEL: test_cmppd_128:
9544 ; CHECK:       # %bb.0:
9545 ; CHECK-NEXT:    vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
9546 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9547 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
9548 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9549   %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
9550   ret i8 %res
9551 }
9552 declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)
9553
9554 define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
9555 ; CHECK-LABEL: test_mask_mul_epi32_rr_128:
9556 ; CHECK:       # %bb.0:
9557 ; CHECK-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1]
9558 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9559   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
9560   ret < 2 x i64> %res
9561 }
9562
9563 define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
9564 ; X86-LABEL: test_mask_mul_epi32_rrk_128:
9565 ; X86:       # %bb.0:
9566 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9567 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9568 ; X86-NEXT:    vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
9569 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9570 ; X86-NEXT:    retl # encoding: [0xc3]
9571 ;
9572 ; X64-LABEL: test_mask_mul_epi32_rrk_128:
9573 ; X64:       # %bb.0:
9574 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9575 ; X64-NEXT:    vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
9576 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9577 ; X64-NEXT:    retq # encoding: [0xc3]
9578   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
9579   ret < 2 x i64> %res
9580 }
9581
9582 define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
9583 ; X86-LABEL: test_mask_mul_epi32_rrkz_128:
9584 ; X86:       # %bb.0:
9585 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9586 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9587 ; X86-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
9588 ; X86-NEXT:    retl # encoding: [0xc3]
9589 ;
9590 ; X64-LABEL: test_mask_mul_epi32_rrkz_128:
9591 ; X64:       # %bb.0:
9592 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9593 ; X64-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
9594 ; X64-NEXT:    retq # encoding: [0xc3]
9595   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
9596   ret < 2 x i64> %res
9597 }
9598
9599 define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
9600 ; X86-LABEL: test_mask_mul_epi32_rm_128:
9601 ; X86:       # %bb.0:
9602 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9603 ; X86-NEXT:    vpmuldq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x00]
9604 ; X86-NEXT:    retl # encoding: [0xc3]
9605 ;
9606 ; X64-LABEL: test_mask_mul_epi32_rm_128:
9607 ; X64:       # %bb.0:
9608 ; X64-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x07]
9609 ; X64-NEXT:    retq # encoding: [0xc3]
9610   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9611   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
9612   ret < 2 x i64> %res
9613 }
9614
9615 define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
9616 ; X86-LABEL: test_mask_mul_epi32_rmk_128:
9617 ; X86:       # %bb.0:
9618 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9619 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9620 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9621 ; X86-NEXT:    vpmuldq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x08]
9622 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9623 ; X86-NEXT:    retl # encoding: [0xc3]
9624 ;
9625 ; X64-LABEL: test_mask_mul_epi32_rmk_128:
9626 ; X64:       # %bb.0:
9627 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9628 ; X64-NEXT:    vpmuldq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f]
9629 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9630 ; X64-NEXT:    retq # encoding: [0xc3]
9631   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9632   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
9633   ret < 2 x i64> %res
9634 }
9635
9636 define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
9637 ; X86-LABEL: test_mask_mul_epi32_rmkz_128:
9638 ; X86:       # %bb.0:
9639 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9640 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9641 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9642 ; X86-NEXT:    vpmuldq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x00]
9643 ; X86-NEXT:    retl # encoding: [0xc3]
9644 ;
9645 ; X64-LABEL: test_mask_mul_epi32_rmkz_128:
9646 ; X64:       # %bb.0:
9647 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9648 ; X64-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07]
9649 ; X64-NEXT:    retq # encoding: [0xc3]
9650   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9651   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
9652   ret < 2 x i64> %res
9653 }
9654
9655 define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
9656 ; X86-LABEL: test_mask_mul_epi32_rmb_128:
9657 ; X86:       # %bb.0:
9658 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9659 ; X86-NEXT:    vpbroadcastd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x08]
9660 ; X86-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1]
9661 ; X86-NEXT:    retl # encoding: [0xc3]
9662 ;
9663 ; X64-LABEL: test_mask_mul_epi32_rmb_128:
9664 ; X64:       # %bb.0:
9665 ; X64-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07]
9666 ; X64-NEXT:    retq # encoding: [0xc3]
9667   %q = load i64, i64* %ptr_b
9668   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
9669   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
9670   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
9671   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
9672   ret < 2 x i64> %res
9673 }
9674
9675 define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
9676 ; X86-LABEL: test_mask_mul_epi32_rmbk_128:
9677 ; X86:       # %bb.0:
9678 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9679 ; X86-NEXT:    vpbroadcastd (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x10]
9680 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
9681 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9682 ; X86-NEXT:    vpmuldq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xca]
9683 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9684 ; X86-NEXT:    retl # encoding: [0xc3]
9685 ;
9686 ; X64-LABEL: test_mask_mul_epi32_rmbk_128:
9687 ; X64:       # %bb.0:
9688 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9689 ; X64-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f]
9690 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9691 ; X64-NEXT:    retq # encoding: [0xc3]
9692   %q = load i64, i64* %ptr_b
9693   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
9694   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
9695   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
9696   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
9697   ret < 2 x i64> %res
9698 }
9699
9700 define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
9701 ; X86-LABEL: test_mask_mul_epi32_rmbkz_128:
9702 ; X86:       # %bb.0:
9703 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9704 ; X86-NEXT:    vpbroadcastd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x08]
9705 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
9706 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9707 ; X86-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
9708 ; X86-NEXT:    retl # encoding: [0xc3]
9709 ;
9710 ; X64-LABEL: test_mask_mul_epi32_rmbkz_128:
9711 ; X64:       # %bb.0:
9712 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9713 ; X64-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07]
9714 ; X64-NEXT:    retq # encoding: [0xc3]
9715   %q = load i64, i64* %ptr_b
9716   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
9717   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
9718   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
9719   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
9720   ret < 2 x i64> %res
9721 }
9722
9723 declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
9724
9725 define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
9726 ; CHECK-LABEL: test_mask_mul_epi32_rr_256:
9727 ; CHECK:       # %bb.0:
9728 ; CHECK-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0xc1]
9729 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9730   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
9731   ret < 4 x i64> %res
9732 }
9733
9734 define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
9735 ; X86-LABEL: test_mask_mul_epi32_rrk_256:
9736 ; X86:       # %bb.0:
9737 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9738 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9739 ; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
9740 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9741 ; X86-NEXT:    retl # encoding: [0xc3]
9742 ;
9743 ; X64-LABEL: test_mask_mul_epi32_rrk_256:
9744 ; X64:       # %bb.0:
9745 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9746 ; X64-NEXT:    vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
9747 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9748 ; X64-NEXT:    retq # encoding: [0xc3]
9749   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
9750   ret < 4 x i64> %res
9751 }
9752
9753 define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
9754 ; X86-LABEL: test_mask_mul_epi32_rrkz_256:
9755 ; X86:       # %bb.0:
9756 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9757 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9758 ; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
9759 ; X86-NEXT:    retl # encoding: [0xc3]
9760 ;
9761 ; X64-LABEL: test_mask_mul_epi32_rrkz_256:
9762 ; X64:       # %bb.0:
9763 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9764 ; X64-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
9765 ; X64-NEXT:    retq # encoding: [0xc3]
9766   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
9767   ret < 4 x i64> %res
9768 }
9769
9770 define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
9771 ; X86-LABEL: test_mask_mul_epi32_rm_256:
9772 ; X86:       # %bb.0:
9773 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9774 ; X86-NEXT:    vpmuldq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x00]
9775 ; X86-NEXT:    retl # encoding: [0xc3]
9776 ;
9777 ; X64-LABEL: test_mask_mul_epi32_rm_256:
9778 ; X64:       # %bb.0:
9779 ; X64-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x07]
9780 ; X64-NEXT:    retq # encoding: [0xc3]
9781   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
9782   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
9783   ret < 4 x i64> %res
9784 }
9785
9786 define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
9787 ; X86-LABEL: test_mask_mul_epi32_rmk_256:
9788 ; X86:       # %bb.0:
9789 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9790 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9791 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9792 ; X86-NEXT:    vpmuldq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x08]
9793 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9794 ; X86-NEXT:    retl # encoding: [0xc3]
9795 ;
9796 ; X64-LABEL: test_mask_mul_epi32_rmk_256:
9797 ; X64:       # %bb.0:
9798 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9799 ; X64-NEXT:    vpmuldq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f]
9800 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9801 ; X64-NEXT:    retq # encoding: [0xc3]
9802   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
9803   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
9804   ret < 4 x i64> %res
9805 }
9806
9807 define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
9808 ; X86-LABEL: test_mask_mul_epi32_rmkz_256:
9809 ; X86:       # %bb.0:
9810 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9811 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9812 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9813 ; X86-NEXT:    vpmuldq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x00]
9814 ; X86-NEXT:    retl # encoding: [0xc3]
9815 ;
9816 ; X64-LABEL: test_mask_mul_epi32_rmkz_256:
9817 ; X64:       # %bb.0:
9818 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9819 ; X64-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07]
9820 ; X64-NEXT:    retq # encoding: [0xc3]
9821   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
9822   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
9823   ret < 4 x i64> %res
9824 }
9825
9826 define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
9827 ; X86-LABEL: test_mask_mul_epi32_rmb_256:
9828 ; X86:       # %bb.0:
9829 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9830 ; X86-NEXT:    vpbroadcastd (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x08]
9831 ; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0xc1]
9832 ; X86-NEXT:    retl # encoding: [0xc3]
9833 ;
9834 ; X64-LABEL: test_mask_mul_epi32_rmb_256:
9835 ; X64:       # %bb.0:
9836 ; X64-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07]
9837 ; X64-NEXT:    retq # encoding: [0xc3]
9838   %q = load i64, i64* %ptr_b
9839   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
9840   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
9841   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
9842   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
9843   ret < 4 x i64> %res
9844 }
9845
9846 define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
9847 ; X86-LABEL: test_mask_mul_epi32_rmbk_256:
9848 ; X86:       # %bb.0:
9849 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9850 ; X86-NEXT:    vpbroadcastd (%eax), %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x10]
9851 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
9852 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9853 ; X86-NEXT:    vpmuldq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xca]
9854 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9855 ; X86-NEXT:    retl # encoding: [0xc3]
9856 ;
9857 ; X64-LABEL: test_mask_mul_epi32_rmbk_256:
9858 ; X64:       # %bb.0:
9859 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9860 ; X64-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f]
9861 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9862 ; X64-NEXT:    retq # encoding: [0xc3]
9863   %q = load i64, i64* %ptr_b
9864   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
9865   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
9866   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
9867   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
9868   ret < 4 x i64> %res
9869 }
9870
9871 define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
9872 ; X86-LABEL: test_mask_mul_epi32_rmbkz_256:
9873 ; X86:       # %bb.0:
9874 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9875 ; X86-NEXT:    vpbroadcastd (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x08]
9876 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
9877 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9878 ; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
9879 ; X86-NEXT:    retl # encoding: [0xc3]
9880 ;
9881 ; X64-LABEL: test_mask_mul_epi32_rmbkz_256:
9882 ; X64:       # %bb.0:
9883 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9884 ; X64-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07]
9885 ; X64-NEXT:    retq # encoding: [0xc3]
9886   %q = load i64, i64* %ptr_b
9887   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
9888   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
9889   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
9890   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
9891   ret < 4 x i64> %res
9892 }
9893
9894 declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
9895
9896 define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
9897 ; CHECK-LABEL: test_mask_mul_epu32_rr_128:
9898 ; CHECK:       # %bb.0:
9899 ; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
9900 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9901   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
9902   ret < 2 x i64> %res
9903 }
9904
9905 define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
9906 ; X86-LABEL: test_mask_mul_epu32_rrk_128:
9907 ; X86:       # %bb.0:
9908 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9909 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9910 ; X86-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
9911 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9912 ; X86-NEXT:    retl # encoding: [0xc3]
9913 ;
9914 ; X64-LABEL: test_mask_mul_epu32_rrk_128:
9915 ; X64:       # %bb.0:
9916 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9917 ; X64-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
9918 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9919 ; X64-NEXT:    retq # encoding: [0xc3]
9920   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
9921   ret < 2 x i64> %res
9922 }
9923
9924 define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
9925 ; X86-LABEL: test_mask_mul_epu32_rrkz_128:
9926 ; X86:       # %bb.0:
9927 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9928 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9929 ; X86-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
9930 ; X86-NEXT:    retl # encoding: [0xc3]
9931 ;
9932 ; X64-LABEL: test_mask_mul_epu32_rrkz_128:
9933 ; X64:       # %bb.0:
9934 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9935 ; X64-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
9936 ; X64-NEXT:    retq # encoding: [0xc3]
9937   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
9938   ret < 2 x i64> %res
9939 }
9940
9941 define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
9942 ; X86-LABEL: test_mask_mul_epu32_rm_128:
9943 ; X86:       # %bb.0:
9944 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9945 ; X86-NEXT:    vpmuludq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x00]
9946 ; X86-NEXT:    retl # encoding: [0xc3]
9947 ;
9948 ; X64-LABEL: test_mask_mul_epu32_rm_128:
9949 ; X64:       # %bb.0:
9950 ; X64-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x07]
9951 ; X64-NEXT:    retq # encoding: [0xc3]
9952   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9953   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
9954   ret < 2 x i64> %res
9955 }
9956
9957 define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
9958 ; X86-LABEL: test_mask_mul_epu32_rmk_128:
9959 ; X86:       # %bb.0:
9960 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9961 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9962 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9963 ; X86-NEXT:    vpmuludq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x08]
9964 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9965 ; X86-NEXT:    retl # encoding: [0xc3]
9966 ;
9967 ; X64-LABEL: test_mask_mul_epu32_rmk_128:
9968 ; X64:       # %bb.0:
9969 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9970 ; X64-NEXT:    vpmuludq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f]
9971 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9972 ; X64-NEXT:    retq # encoding: [0xc3]
9973   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9974   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
9975   ret < 2 x i64> %res
9976 }
9977
9978 define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
9979 ; X86-LABEL: test_mask_mul_epu32_rmkz_128:
9980 ; X86:       # %bb.0:
9981 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9982 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9983 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9984 ; X86-NEXT:    vpmuludq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x00]
9985 ; X86-NEXT:    retl # encoding: [0xc3]
9986 ;
9987 ; X64-LABEL: test_mask_mul_epu32_rmkz_128:
9988 ; X64:       # %bb.0:
9989 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9990 ; X64-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07]
9991 ; X64-NEXT:    retq # encoding: [0xc3]
9992   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9993   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
9994   ret < 2 x i64> %res
9995 }
9996
9997 define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
9998 ; X86-LABEL: test_mask_mul_epu32_rmb_128:
9999 ; X86:       # %bb.0:
10000 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10001 ; X86-NEXT:    vpbroadcastd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x08]
10002 ; X86-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
10003 ; X86-NEXT:    retl # encoding: [0xc3]
10004 ;
10005 ; X64-LABEL: test_mask_mul_epu32_rmb_128:
10006 ; X64:       # %bb.0:
10007 ; X64-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07]
10008 ; X64-NEXT:    retq # encoding: [0xc3]
10009   %q = load i64, i64* %ptr_b
10010   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
10011   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
10012   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
10013   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
10014   ret < 2 x i64> %res
10015 }
10016
10017 define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
10018 ; X86-LABEL: test_mask_mul_epu32_rmbk_128:
10019 ; X86:       # %bb.0:
10020 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10021 ; X86-NEXT:    vpbroadcastd (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x10]
10022 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
10023 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10024 ; X86-NEXT:    vpmuludq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xca]
10025 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
10026 ; X86-NEXT:    retl # encoding: [0xc3]
10027 ;
10028 ; X64-LABEL: test_mask_mul_epu32_rmbk_128:
10029 ; X64:       # %bb.0:
10030 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10031 ; X64-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f]
10032 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
10033 ; X64-NEXT:    retq # encoding: [0xc3]
10034   %q = load i64, i64* %ptr_b
10035   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
10036   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
10037   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
10038   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
10039   ret < 2 x i64> %res
10040 }
10041
10042 define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
10043 ; X86-LABEL: test_mask_mul_epu32_rmbkz_128:
10044 ; X86:       # %bb.0:
10045 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10046 ; X86-NEXT:    vpbroadcastd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x08]
10047 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
10048 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10049 ; X86-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
10050 ; X86-NEXT:    retl # encoding: [0xc3]
10051 ;
10052 ; X64-LABEL: test_mask_mul_epu32_rmbkz_128:
10053 ; X64:       # %bb.0:
10054 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10055 ; X64-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07]
10056 ; X64-NEXT:    retq # encoding: [0xc3]
10057   %q = load i64, i64* %ptr_b
10058   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
10059   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
10060   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
10061   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
10062   ret < 2 x i64> %res
10063 }
10064
10065 declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
10066
10067 define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
10068 ; CHECK-LABEL: test_mask_mul_epu32_rr_256:
10069 ; CHECK:       # %bb.0:
10070 ; CHECK-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1]
10071 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10072   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
10073   ret < 4 x i64> %res
10074 }
10075
10076 define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
10077 ; X86-LABEL: test_mask_mul_epu32_rrk_256:
10078 ; X86:       # %bb.0:
10079 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10080 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10081 ; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
10082 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
10083 ; X86-NEXT:    retl # encoding: [0xc3]
10084 ;
10085 ; X64-LABEL: test_mask_mul_epu32_rrk_256:
10086 ; X64:       # %bb.0:
10087 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10088 ; X64-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
10089 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
10090 ; X64-NEXT:    retq # encoding: [0xc3]
10091   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
10092   ret < 4 x i64> %res
10093 }
10094
10095 define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
10096 ; X86-LABEL: test_mask_mul_epu32_rrkz_256:
10097 ; X86:       # %bb.0:
10098 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10099 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10100 ; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
10101 ; X86-NEXT:    retl # encoding: [0xc3]
10102 ;
10103 ; X64-LABEL: test_mask_mul_epu32_rrkz_256:
10104 ; X64:       # %bb.0:
10105 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10106 ; X64-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
10107 ; X64-NEXT:    retq # encoding: [0xc3]
10108   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
10109   ret < 4 x i64> %res
10110 }
10111
10112 define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
10113 ; X86-LABEL: test_mask_mul_epu32_rm_256:
10114 ; X86:       # %bb.0:
10115 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10116 ; X86-NEXT:    vpmuludq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x00]
10117 ; X86-NEXT:    retl # encoding: [0xc3]
10118 ;
10119 ; X64-LABEL: test_mask_mul_epu32_rm_256:
10120 ; X64:       # %bb.0:
10121 ; X64-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x07]
10122 ; X64-NEXT:    retq # encoding: [0xc3]
10123   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
10124   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
10125   ret < 4 x i64> %res
10126 }
10127
10128 define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
10129 ; X86-LABEL: test_mask_mul_epu32_rmk_256:
10130 ; X86:       # %bb.0:
10131 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10132 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
10133 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
10134 ; X86-NEXT:    vpmuludq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x08]
10135 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10136 ; X86-NEXT:    retl # encoding: [0xc3]
10137 ;
10138 ; X64-LABEL: test_mask_mul_epu32_rmk_256:
10139 ; X64:       # %bb.0:
10140 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10141 ; X64-NEXT:    vpmuludq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f]
10142 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10143 ; X64-NEXT:    retq # encoding: [0xc3]
10144   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
10145   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
10146   ret < 4 x i64> %res
10147 }
10148
10149 define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
10150 ; X86-LABEL: test_mask_mul_epu32_rmkz_256:
10151 ; X86:       # %bb.0:
10152 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10153 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
10154 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
10155 ; X86-NEXT:    vpmuludq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x00]
10156 ; X86-NEXT:    retl # encoding: [0xc3]
10157 ;
10158 ; X64-LABEL: test_mask_mul_epu32_rmkz_256:
10159 ; X64:       # %bb.0:
10160 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10161 ; X64-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07]
10162 ; X64-NEXT:    retq # encoding: [0xc3]
10163   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
10164   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
10165   ret < 4 x i64> %res
10166 }
10167
10168 define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
10169 ; X86-LABEL: test_mask_mul_epu32_rmb_256:
10170 ; X86:       # %bb.0:
10171 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10172 ; X86-NEXT:    vpbroadcastd (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x08]
10173 ; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1]
10174 ; X86-NEXT:    retl # encoding: [0xc3]
10175 ;
10176 ; X64-LABEL: test_mask_mul_epu32_rmb_256:
10177 ; X64:       # %bb.0:
10178 ; X64-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07]
10179 ; X64-NEXT:    retq # encoding: [0xc3]
10180   %q = load i64, i64* %ptr_b
10181   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
10182   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
10183   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
10184   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
10185   ret < 4 x i64> %res
10186 }
10187
10188 define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
10189 ; X86-LABEL: test_mask_mul_epu32_rmbk_256:
10190 ; X86:       # %bb.0:
10191 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10192 ; X86-NEXT:    vpbroadcastd (%eax), %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x10]
10193 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
10194 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10195 ; X86-NEXT:    vpmuludq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xca]
10196 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10197 ; X86-NEXT:    retl # encoding: [0xc3]
10198 ;
10199 ; X64-LABEL: test_mask_mul_epu32_rmbk_256:
10200 ; X64:       # %bb.0:
10201 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10202 ; X64-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f]
10203 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10204 ; X64-NEXT:    retq # encoding: [0xc3]
10205   %q = load i64, i64* %ptr_b
10206   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
10207   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
10208   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
10209   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
10210   ret < 4 x i64> %res
10211 }
10212
10213 define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
10214 ; X86-LABEL: test_mask_mul_epu32_rmbkz_256:
10215 ; X86:       # %bb.0:
10216 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10217 ; X86-NEXT:    vpbroadcastd (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x08]
10218 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
10219 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10220 ; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
10221 ; X86-NEXT:    retl # encoding: [0xc3]
10222 ;
10223 ; X64-LABEL: test_mask_mul_epu32_rmbkz_256:
10224 ; X64:       # %bb.0:
10225 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10226 ; X64-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07]
10227 ; X64-NEXT:    retq # encoding: [0xc3]
10228   %q = load i64, i64* %ptr_b
10229   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
10230   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
10231   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
10232   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
10233   ret < 4 x i64> %res
10234 }
10235
10236 declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
10237
10238 declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8)
10239
10240 define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
10241 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
10242 ; X86:       # %bb.0:
10243 ; X86-NEXT:    vcvtdq2ps %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xd0]
10244 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10245 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10246 ; X86-NEXT:    vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8]
10247 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10248 ; X86-NEXT:    retl # encoding: [0xc3]
10249 ;
10250 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
10251 ; X64:       # %bb.0:
10252 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xd0]
10253 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10254 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8]
10255 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10256 ; X64-NEXT:    retq # encoding: [0xc3]
10257   %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
10258   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
10259   %res2 = fadd <4 x float> %res, %res1
10260   ret <4 x float> %res2
10261 }
10262
10263 declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8)
10264
10265 define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
10266 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
10267 ; X86:       # %bb.0:
10268 ; X86-NEXT:    vcvtdq2ps %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xd0]
10269 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10270 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10271 ; X86-NEXT:    vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8]
10272 ; X86-NEXT:    vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
10273 ; X86-NEXT:    retl # encoding: [0xc3]
10274 ;
10275 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
10276 ; X64:       # %bb.0:
10277 ; X64-NEXT:    vcvtdq2ps %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xd0]
10278 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10279 ; X64-NEXT:    vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8]
10280 ; X64-NEXT:    vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
10281 ; X64-NEXT:    retq # encoding: [0xc3]
10282   %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
10283   %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
10284   %res2 = fadd <8 x float> %res, %res1
10285   ret <8 x float> %res2
10286 }
10287
10288 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8)
10289
10290 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
10291 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
10292 ; X86:       # %bb.0:
10293 ; X86-NEXT:    vcvtpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xd0]
10294 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10295 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10296 ; X86-NEXT:    vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8]
10297 ; X86-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10298 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10299 ; X86-NEXT:    retl # encoding: [0xc3]
10300 ;
10301 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
10302 ; X64:       # %bb.0:
10303 ; X64-NEXT:    vcvtpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xd0]
10304 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10305 ; X64-NEXT:    vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8]
10306 ; X64-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10307 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10308 ; X64-NEXT:    retq # encoding: [0xc3]
10309   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
10310   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
10311   %res2 = add <4 x i32> %res, %res1
10312   ret <4 x i32> %res2
10313 }
10314
10315 declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8)
10316
10317 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) {
10318 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
10319 ; X86:       # %bb.0:
10320 ; X86-NEXT:    vcvtpd2ps %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xd0]
10321 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10322 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10323 ; X86-NEXT:    vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8]
10324 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10325 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10326 ; X86-NEXT:    retl # encoding: [0xc3]
10327 ;
10328 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
10329 ; X64:       # %bb.0:
10330 ; X64-NEXT:    vcvtpd2ps %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xd0]
10331 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10332 ; X64-NEXT:    vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8]
10333 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10334 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10335 ; X64-NEXT:    retq # encoding: [0xc3]
10336   %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2)
10337   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1)
10338   %res2 = fadd <4 x float> %res, %res1
10339   ret <4 x float> %res2
10340 }
10341
10342 declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8)
10343
10344 define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) {
10345 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
10346 ; X86:       # %bb.0:
10347 ; X86-NEXT:    vcvtps2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xd0]
10348 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10349 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10350 ; X86-NEXT:    vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8]
10351 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
10352 ; X86-NEXT:    retl # encoding: [0xc3]
10353 ;
10354 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
10355 ; X64:       # %bb.0:
10356 ; X64-NEXT:    vcvtps2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xd0]
10357 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10358 ; X64-NEXT:    vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8]
10359 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
10360 ; X64-NEXT:    retq # encoding: [0xc3]
10361   %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2)
10362   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1)
10363   %res2 = fadd <4 x double> %res, %res1
10364   ret <4 x double> %res2
10365 }
10366
10367 declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8)
10368
10369 define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) {
10370 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
10371 ; X86:       # %bb.0:
10372 ; X86-NEXT:    vcvtps2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xd0]
10373 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10374 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10375 ; X86-NEXT:    vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8]
10376 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
10377 ; X86-NEXT:    retl # encoding: [0xc3]
10378 ;
10379 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
10380 ; X64:       # %bb.0:
10381 ; X64-NEXT:    vcvtps2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xd0]
10382 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10383 ; X64-NEXT:    vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8]
10384 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
10385 ; X64-NEXT:    retq # encoding: [0xc3]
10386   %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2)
10387   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1)
10388   %res2 = fadd <2 x double> %res, %res1
10389   ret <2 x double> %res2
10390 }
10391
10392 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8)
10393
10394 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
10395 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
10396 ; X86:       # %bb.0:
10397 ; X86-NEXT:    vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0]
10398 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10399 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10400 ; X86-NEXT:    vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8]
10401 ; X86-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10402 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10403 ; X86-NEXT:    retl # encoding: [0xc3]
10404 ;
10405 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
10406 ; X64:       # %bb.0:
10407 ; X64-NEXT:    vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0]
10408 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10409 ; X64-NEXT:    vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8]
10410 ; X64-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10411 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10412 ; X64-NEXT:    retq # encoding: [0xc3]
10413   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
10414   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
10415   %res2 = add <4 x i32> %res, %res1
10416   ret <4 x i32> %res2
10417 }
10418
10419 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8)
10420
10421 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
10422 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
10423 ; X86:       # %bb.0:
10424 ; X86-NEXT:    vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0]
10425 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10426 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10427 ; X86-NEXT:    vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8]
10428 ; X86-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10429 ; X86-NEXT:    retl # encoding: [0xc3]
10430 ;
10431 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
10432 ; X64:       # %bb.0:
10433 ; X64-NEXT:    vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0]
10434 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10435 ; X64-NEXT:    vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8]
10436 ; X64-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10437 ; X64-NEXT:    retq # encoding: [0xc3]
10438   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
10439   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
10440   %res2 = add <4 x i32> %res, %res1
10441   ret <4 x i32> %res2
10442 }
10443
10444 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8)
10445
10446 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
10447 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
10448 ; X86:       # %bb.0:
10449 ; X86-NEXT:    vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0]
10450 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10451 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10452 ; X86-NEXT:    vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8]
10453 ; X86-NEXT:    vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
10454 ; X86-NEXT:    retl # encoding: [0xc3]
10455 ;
10456 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
10457 ; X64:       # %bb.0:
10458 ; X64-NEXT:    vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0]
10459 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10460 ; X64-NEXT:    vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8]
10461 ; X64-NEXT:    vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
10462 ; X64-NEXT:    retq # encoding: [0xc3]
10463   %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
10464   %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
10465   %res2 = add <8 x i32> %res, %res1
10466   ret <8 x i32> %res2
10467 }
10468
10469 declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
10470
10471 define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
10472 ; X86-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
10473 ; X86:       # %bb.0:
10474 ; X86-NEXT:    vpermps %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
10475 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10476 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10477 ; X86-NEXT:    vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
10478 ; X86-NEXT:    vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
10479 ; X86-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
10480 ; X86-NEXT:    vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
10481 ; X86-NEXT:    retl # encoding: [0xc3]
10482 ;
10483 ; X64-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
10484 ; X64:       # %bb.0:
10485 ; X64-NEXT:    vpermps %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
10486 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10487 ; X64-NEXT:    vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
10488 ; X64-NEXT:    vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
10489 ; X64-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
10490 ; X64-NEXT:    vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
10491 ; X64-NEXT:    retq # encoding: [0xc3]
10492   %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
10493   %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
10494   %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
10495   %res3 = fadd <8 x float> %res, %res1
10496   %res4 = fadd <8 x float> %res3, %res2
10497   ret <8 x float> %res4
10498 }
10499
10500 declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
10501
10502 define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
10503 ; X86-LABEL: test_int_x86_avx512_mask_permvar_si_256:
10504 ; X86:       # %bb.0:
10505 ; X86-NEXT:    vpermd %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
10506 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10507 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10508 ; X86-NEXT:    vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
10509 ; X86-NEXT:    vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
10510 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10511 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
10512 ; X86-NEXT:    retl # encoding: [0xc3]
10513 ;
10514 ; X64-LABEL: test_int_x86_avx512_mask_permvar_si_256:
10515 ; X64:       # %bb.0:
10516 ; X64-NEXT:    vpermd %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
10517 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10518 ; X64-NEXT:    vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
10519 ; X64-NEXT:    vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
10520 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10521 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
10522 ; X64-NEXT:    retq # encoding: [0xc3]
10523   %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
10524   %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
10525   %res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
10526   %res3 = add <8 x i32> %res, %res1
10527   %res4 = add <8 x i32> %res3, %res2
10528   ret <8 x i32> %res4
10529 }
10530
10531 declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
10532
10533 define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
10534 ; X86-LABEL: test_int_x86_avx512_mask_permvar_df_256:
10535 ; X86:       # %bb.0:
10536 ; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
10537 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10538 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10539 ; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
10540 ; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
10541 ; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
10542 ; X86-NEXT:    vaddpd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
10543 ; X86-NEXT:    retl # encoding: [0xc3]
10544 ;
10545 ; X64-LABEL: test_int_x86_avx512_mask_permvar_df_256:
10546 ; X64:       # %bb.0:
10547 ; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
10548 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10549 ; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
10550 ; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
10551 ; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
10552 ; X64-NEXT:    vaddpd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
10553 ; X64-NEXT:    retq # encoding: [0xc3]
10554   %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
10555   %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
10556   %res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
10557   %res3 = fadd <4 x double> %res, %res1
10558   %res4 = fadd <4 x double> %res3, %res2
10559   ret <4 x double> %res4
10560 }
10561
10562 declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
10563
10564 define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
10565 ; X86-LABEL: test_int_x86_avx512_mask_permvar_di_256:
10566 ; X86:       # %bb.0:
10567 ; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
10568 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10569 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10570 ; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
10571 ; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
10572 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10573 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
10574 ; X86-NEXT:    retl # encoding: [0xc3]
10575 ;
10576 ; X64-LABEL: test_int_x86_avx512_mask_permvar_di_256:
10577 ; X64:       # %bb.0:
10578 ; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
10579 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10580 ; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
10581 ; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
10582 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10583 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
10584 ; X64-NEXT:    retq # encoding: [0xc3]
10585   %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
10586   %res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
10587   %res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
10588   %res3 = add <4 x i64> %res, %res1
10589   %res4 = add <4 x i64> %res3, %res2
10590   ret <4 x i64> %res4
10591 }
10592
10593 declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
10594
10595 define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
10596 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
10597 ; X86:       # %bb.0:
10598 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10599 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
10600 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10601 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10602 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
10603 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
10604 ; X86-NEXT:    retl # encoding: [0xc3]
10605 ;
10606 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
10607 ; X64:       # %bb.0:
10608 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10609 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
10610 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10611 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
10612 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
10613 ; X64-NEXT:    retq # encoding: [0xc3]
10614   %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
10615   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
10616   %res2 = add <4 x i32> %res, %res1
10617   ret <4 x i32> %res2
10618 }
10619
10620 declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
10621
10622 define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
10623 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
10624 ; X86:       # %bb.0:
10625 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10626 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
10627 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10628 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10629 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
10630 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
10631 ; X86-NEXT:    retl # encoding: [0xc3]
10632 ;
10633 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
10634 ; X64:       # %bb.0:
10635 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10636 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
10637 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10638 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
10639 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
10640 ; X64-NEXT:    retq # encoding: [0xc3]
10641   %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
10642   %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
10643   %res2 = add <4 x i32> %res, %res1
10644   ret <4 x i32> %res2
10645 }
10646
10647 declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
10648
10649 define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
10650 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
10651 ; X86:       # %bb.0:
10652 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10653 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
10654 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10655 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10656 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
10657 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10658 ; X86-NEXT:    retl # encoding: [0xc3]
10659 ;
10660 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
10661 ; X64:       # %bb.0:
10662 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10663 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
10664 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10665 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
10666 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10667 ; X64-NEXT:    retq # encoding: [0xc3]
10668   %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
10669   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
10670   %res2 = add <8 x i32> %res, %res1
10671   ret <8 x i32> %res2
10672 }
10673
10674 declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
10675
10676 define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
10677 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
10678 ; X86:       # %bb.0:
10679 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10680 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
10681 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10682 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10683 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
10684 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10685 ; X86-NEXT:    retl # encoding: [0xc3]
10686 ;
10687 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
10688 ; X64:       # %bb.0:
10689 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10690 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
10691 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10692 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
10693 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10694 ; X64-NEXT:    retq # encoding: [0xc3]
10695   %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
10696   %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
10697   %res2 = add <8 x i32> %res, %res1
10698   ret <8 x i32> %res2
10699 }
10700
10701 declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
10702
10703 define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
10704 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
10705 ; X86:       # %bb.0:
10706 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10707 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
10708 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10709 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10710 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
10711 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
10712 ; X86-NEXT:    retl # encoding: [0xc3]
10713 ;
10714 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
10715 ; X64:       # %bb.0:
10716 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10717 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
10718 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10719 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
10720 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
10721 ; X64-NEXT:    retq # encoding: [0xc3]
10722   %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
10723   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
10724   %res2 = add <2 x i64> %res, %res1
10725   ret <2 x i64> %res2
10726 }
10727
10728 declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
10729
10730 define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
10731 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
10732 ; X86:       # %bb.0:
10733 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10734 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
10735 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10736 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10737 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
10738 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
10739 ; X86-NEXT:    retl # encoding: [0xc3]
10740 ;
10741 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
10742 ; X64:       # %bb.0:
10743 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10744 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
10745 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10746 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
10747 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
10748 ; X64-NEXT:    retq # encoding: [0xc3]
10749   %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
10750   %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
10751   %res2 = add <2 x i64> %res, %res1
10752   ret <2 x i64> %res2
10753 }
10754
10755 declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
10756
10757 define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
10758 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
10759 ; X86:       # %bb.0:
10760 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10761 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
10762 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10763 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10764 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
10765 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10766 ; X86-NEXT:    retl # encoding: [0xc3]
10767 ;
10768 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
10769 ; X64:       # %bb.0:
10770 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10771 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
10772 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10773 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
10774 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10775 ; X64-NEXT:    retq # encoding: [0xc3]
10776   %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
10777   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
10778   %res2 = add <4 x i64> %res, %res1
10779   ret <4 x i64> %res2
10780 }
10781
10782 declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
10783
10784 define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
10785 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
10786 ; X86:       # %bb.0:
10787 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10788 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
10789 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10790 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10791 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
10792 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10793 ; X86-NEXT:    retl # encoding: [0xc3]
10794 ;
10795 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
10796 ; X64:       # %bb.0:
10797 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10798 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
10799 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10800 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
10801 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10802 ; X64-NEXT:    retq # encoding: [0xc3]
10803   %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
10804   %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
10805   %res2 = add <4 x i64> %res, %res1
10806   ret <4 x i64> %res2
10807 }
10808
10809 declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
10810
10811 define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
10812 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
10813 ; X86:       # %bb.0:
10814 ; X86-NEXT:    vcvtudq2ps %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xd0]
10815 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10816 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10817 ; X86-NEXT:    vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8]
10818 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10819 ; X86-NEXT:    retl # encoding: [0xc3]
10820 ;
10821 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
10822 ; X64:       # %bb.0:
10823 ; X64-NEXT:    vcvtudq2ps %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xd0]
10824 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10825 ; X64-NEXT:    vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8]
10826 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10827 ; X64-NEXT:    retq # encoding: [0xc3]
10828   %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
10829   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
10830   %res2 = fadd <4 x float> %res, %res1
10831   ret <4 x float> %res2
10832 }
10833
10834 declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8)
10835
10836 define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
10837 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
10838 ; X86:       # %bb.0:
10839 ; X86-NEXT:    vcvtudq2ps %ymm0, %ymm2 # encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xd0]
10840 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10841 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10842 ; X86-NEXT:    vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8]
10843 ; X86-NEXT:    vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
10844 ; X86-NEXT:    retl # encoding: [0xc3]
10845 ;
10846 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
10847 ; X64:       # %bb.0:
10848 ; X64-NEXT:    vcvtudq2ps %ymm0, %ymm2 # encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xd0]
10849 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10850 ; X64-NEXT:    vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8]
10851 ; X64-NEXT:    vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
10852 ; X64-NEXT:    retq # encoding: [0xc3]
10853   %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
10854   %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
10855   %res2 = fadd <8 x float> %res, %res1
10856   ret <8 x float> %res2
10857 }
10858
10859 declare <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
10860
10861 define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
10862 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
10863 ; X86:       # %bb.0:
10864 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10865 ; X86-NEXT:    vpermt2d %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda]
10866 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10867 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10868 ; X86-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
10869 ; X86-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10870 ; X86-NEXT:    retl # encoding: [0xc3]
10871 ;
10872 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
10873 ; X64:       # %bb.0:
10874 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10875 ; X64-NEXT:    vpermt2d %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda]
10876 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10877 ; X64-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
10878 ; X64-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10879 ; X64-NEXT:    retq # encoding: [0xc3]
10880   %res = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
10881   %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
10882   %res2 = add <4 x i32> %res, %res1
10883   ret <4 x i32> %res2
10884 }
10885
10886 declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
10887
10888 define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
10889 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
10890 ; X86:       # %bb.0:
10891 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
10892 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
10893 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10894 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10895 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
10896 ; X86-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10897 ; X86-NEXT:    retl # encoding: [0xc3]
10898 ;
10899 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
10900 ; X64:       # %bb.0:
10901 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
10902 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
10903 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10904 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
10905 ; X64-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10906 ; X64-NEXT:    retq # encoding: [0xc3]
10907   %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
10908   %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
10909   %res2 = add <4 x i32> %res, %res1
10910   ret <4 x i32> %res2
10911 }
10912
10913 declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
10914
10915 define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
10916 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
10917 ; X86:       # %bb.0:
10918 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
10919 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
10920 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10921 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10922 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca]
10923 ; X86-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10924 ; X86-NEXT:    retl # encoding: [0xc3]
10925 ;
10926 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
10927 ; X64:       # %bb.0:
10928 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
10929 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
10930 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10931 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca]
10932 ; X64-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10933 ; X64-NEXT:    retq # encoding: [0xc3]
10934   %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
10935   %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
10936   %res2 = add <4 x i32> %res, %res1
10937   ret <4 x i32> %res2
10938 }
10939
10940 declare <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
10941
10942 define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
10943 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
10944 ; X86:       # %bb.0:
10945 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10946 ; X86-NEXT:    vpermt2d %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda]
10947 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10948 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10949 ; X86-NEXT:    vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
10950 ; X86-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
10951 ; X86-NEXT:    retl # encoding: [0xc3]
10952 ;
10953 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
10954 ; X64:       # %bb.0:
10955 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10956 ; X64-NEXT:    vpermt2d %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda]
10957 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10958 ; X64-NEXT:    vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
10959 ; X64-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
10960 ; X64-NEXT:    retq # encoding: [0xc3]
10961   %res = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
10962   %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
10963   %res2 = add <8 x i32> %res, %res1
10964   ret <8 x i32> %res2
10965 }
10966
10967 declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
10968
10969 define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
10970 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
10971 ; X86:       # %bb.0:
10972 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
10973 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
10974 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10975 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10976 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
10977 ; X86-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
10978 ; X86-NEXT:    retl # encoding: [0xc3]
10979 ;
10980 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
10981 ; X64:       # %bb.0:
10982 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
10983 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
10984 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10985 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
10986 ; X64-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
10987 ; X64-NEXT:    retq # encoding: [0xc3]
10988   %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
10989   %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
10990   %res2 = add <8 x i32> %res, %res1
10991   ret <8 x i32> %res2
10992 }
10993
10994 declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
10995
10996 define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
10997 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
10998 ; X86:       # %bb.0:
10999 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11000 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
11001 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11002 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11003 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca]
11004 ; X86-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
11005 ; X86-NEXT:    retl # encoding: [0xc3]
11006 ;
11007 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
11008 ; X64:       # %bb.0:
11009 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11010 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
11011 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11012 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca]
11013 ; X64-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
11014 ; X64-NEXT:    retq # encoding: [0xc3]
11015   %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
11016   %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
11017   %res2 = add <8 x i32> %res, %res1
11018   ret <8 x i32> %res2
11019 }
11020
11021 declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
11022
11023 define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
11024 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
11025 ; X86:       # %bb.0:
11026 ; X86-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
11027 ; X86-NEXT:    vpermt2pd %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
11028 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11029 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11030 ; X86-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
11031 ; X86-NEXT:    vaddpd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
11032 ; X86-NEXT:    retl # encoding: [0xc3]
11033 ;
11034 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
11035 ; X64:       # %bb.0:
11036 ; X64-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
11037 ; X64-NEXT:    vpermt2pd %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
11038 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11039 ; X64-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
11040 ; X64-NEXT:    vaddpd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
11041 ; X64-NEXT:    retq # encoding: [0xc3]
11042   %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
11043   %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
11044   %res2 = fadd <2 x double> %res, %res1
11045   ret <2 x double> %res2
11046 }
11047
11048 declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
11049
11050 define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
11051 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
11052 ; X86:       # %bb.0:
11053 ; X86-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
11054 ; X86-NEXT:    vpermt2pd %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
11055 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11056 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11057 ; X86-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
11058 ; X86-NEXT:    vaddpd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
11059 ; X86-NEXT:    retl # encoding: [0xc3]
11060 ;
11061 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
11062 ; X64:       # %bb.0:
11063 ; X64-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
11064 ; X64-NEXT:    vpermt2pd %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
11065 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11066 ; X64-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
11067 ; X64-NEXT:    vaddpd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
11068 ; X64-NEXT:    retq # encoding: [0xc3]
11069   %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
11070   %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
11071   %res2 = fadd <4 x double> %res, %res1
11072   ret <4 x double> %res2
11073 }
11074
11075 declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
11076
11077 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
11078 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
11079 ; X86:       # %bb.0:
11080 ; X86-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
11081 ; X86-NEXT:    vpermt2ps %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
11082 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11083 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11084 ; X86-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
11085 ; X86-NEXT:    vaddps %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
11086 ; X86-NEXT:    retl # encoding: [0xc3]
11087 ;
11088 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
11089 ; X64:       # %bb.0:
11090 ; X64-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
11091 ; X64-NEXT:    vpermt2ps %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
11092 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11093 ; X64-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
11094 ; X64-NEXT:    vaddps %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
11095 ; X64-NEXT:    retq # encoding: [0xc3]
11096   %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
11097   %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
11098   %res2 = fadd <4 x float> %res, %res1
11099   ret <4 x float> %res2
11100 }
11101
11102 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %x0, <2 x i64> %x1, <4 x float> %x2, i8 %x3) {
11103 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
11104 ; X86:       # %bb.0:
11105 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11106 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11107 ; X86-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
11108 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11109 ; X86-NEXT:    retl # encoding: [0xc3]
11110 ;
11111 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
11112 ; X64:       # %bb.0:
11113 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11114 ; X64-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
11115 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11116 ; X64-NEXT:    retq # encoding: [0xc3]
11117   %x1cast = bitcast <2 x i64> %x1 to <4 x i32>
11118   %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3)
11119   ret <4 x float> %res
11120 }
11121
11122 declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
11123
11124 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
11125 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
11126 ; X86:       # %bb.0:
11127 ; X86-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
11128 ; X86-NEXT:    vpermt2ps %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
11129 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11130 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11131 ; X86-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
11132 ; X86-NEXT:    vaddps %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
11133 ; X86-NEXT:    retl # encoding: [0xc3]
11134 ;
11135 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
11136 ; X64:       # %bb.0:
11137 ; X64-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
11138 ; X64-NEXT:    vpermt2ps %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
11139 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11140 ; X64-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
11141 ; X64-NEXT:    vaddps %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
11142 ; X64-NEXT:    retq # encoding: [0xc3]
11143   %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
11144   %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
11145   %res2 = fadd <8 x float> %res, %res1
11146   ret <8 x float> %res2
11147 }
11148
11149 declare <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
11150
11151 define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
11152 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
11153 ; X86:       # %bb.0:
11154 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
11155 ; X86-NEXT:    vpermt2q %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda]
11156 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11157 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11158 ; X86-NEXT:    vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
11159 ; X86-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11160 ; X86-NEXT:    retl # encoding: [0xc3]
11161 ;
11162 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
11163 ; X64:       # %bb.0:
11164 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
11165 ; X64-NEXT:    vpermt2q %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda]
11166 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11167 ; X64-NEXT:    vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
11168 ; X64-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11169 ; X64-NEXT:    retq # encoding: [0xc3]
11170   %res = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
11171   %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
11172   %res2 = add <2 x i64> %res, %res1
11173   ret <2 x i64> %res2
11174 }
11175
11176 declare <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
11177
11178 define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
11179 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
11180 ; X86:       # %bb.0:
11181 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
11182 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
11183 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11184 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11185 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
11186 ; X86-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11187 ; X86-NEXT:    retl # encoding: [0xc3]
11188 ;
11189 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
11190 ; X64:       # %bb.0:
11191 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
11192 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
11193 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11194 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
11195 ; X64-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11196 ; X64-NEXT:    retq # encoding: [0xc3]
11197   %res = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
11198   %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
11199   %res2 = add <2 x i64> %res, %res1
11200   ret <2 x i64> %res2
11201 }
11202
11203 declare <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
11204
11205 define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
11206 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
11207 ; X86:       # %bb.0:
11208 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
11209 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
11210 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11211 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11212 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca]
11213 ; X86-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11214 ; X86-NEXT:    retl # encoding: [0xc3]
11215 ;
11216 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
11217 ; X64:       # %bb.0:
11218 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
11219 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
11220 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11221 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca]
11222 ; X64-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11223 ; X64-NEXT:    retq # encoding: [0xc3]
11224   %res = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
11225   %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
11226   %res2 = add <2 x i64> %res, %res1
11227   ret <2 x i64> %res2
11228 }
11229
11230 declare <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
11231
11232 define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
11233 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
11234 ; X86:       # %bb.0:
11235 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
11236 ; X86-NEXT:    vpermt2q %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda]
11237 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11238 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11239 ; X86-NEXT:    vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
11240 ; X86-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11241 ; X86-NEXT:    retl # encoding: [0xc3]
11242 ;
11243 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
11244 ; X64:       # %bb.0:
11245 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
11246 ; X64-NEXT:    vpermt2q %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda]
11247 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11248 ; X64-NEXT:    vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
11249 ; X64-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11250 ; X64-NEXT:    retq # encoding: [0xc3]
11251   %res = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
11252   %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
11253   %res2 = add <4 x i64> %res, %res1
11254   ret <4 x i64> %res2
11255 }
11256
11257 declare <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
11258
11259 define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
11260 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
11261 ; X86:       # %bb.0:
11262 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11263 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
11264 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11265 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11266 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
11267 ; X86-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11268 ; X86-NEXT:    retl # encoding: [0xc3]
11269 ;
11270 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
11271 ; X64:       # %bb.0:
11272 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11273 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
11274 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11275 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
11276 ; X64-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11277 ; X64-NEXT:    retq # encoding: [0xc3]
11278   %res = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
11279   %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
11280   %res2 = add <4 x i64> %res, %res1
11281   ret <4 x i64> %res2
11282 }
11283
11284 declare <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
11285
11286 define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
11287 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
11288 ; X86:       # %bb.0:
11289 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11290 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
11291 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11292 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11293 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca]
11294 ; X86-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11295 ; X86-NEXT:    retl # encoding: [0xc3]
11296 ;
11297 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
11298 ; X64:       # %bb.0:
11299 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11300 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
11301 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11302 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca]
11303 ; X64-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11304 ; X64-NEXT:    retq # encoding: [0xc3]
11305   %res = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
11306   %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
11307   %res2 = add <4 x i64> %res, %res1
11308   ret <4 x i64> %res2
11309 }
11310
11311 define void @test_mask_compress_store_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
11312 ; X86-LABEL: test_mask_compress_store_pd_128:
11313 ; X86:       # %bb.0:
11314 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11315 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11316 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11317 ; X86-NEXT:    vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00]
11318 ; X86-NEXT:    retl # encoding: [0xc3]
11319 ;
11320 ; X64-LABEL: test_mask_compress_store_pd_128:
11321 ; X64:       # %bb.0:
11322 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11323 ; X64-NEXT:    vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
11324 ; X64-NEXT:    retq # encoding: [0xc3]
11325   call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
11326   ret void
11327 }
11328
11329 declare void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
11330
11331 define void @test_compress_store_pd_128(i8* %addr, <2 x double> %data) {
11332 ; X86-LABEL: test_compress_store_pd_128:
11333 ; X86:       # %bb.0:
11334 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11335 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11336 ; X86-NEXT:    vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00]
11337 ; X86-NEXT:    retl # encoding: [0xc3]
11338 ;
11339 ; X64-LABEL: test_compress_store_pd_128:
11340 ; X64:       # %bb.0:
11341 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11342 ; X64-NEXT:    vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
11343 ; X64-NEXT:    retq # encoding: [0xc3]
11344   call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 -1)
11345   ret void
11346 }
11347
11348 define void @test_mask_compress_store_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
11349 ; X86-LABEL: test_mask_compress_store_ps_128:
11350 ; X86:       # %bb.0:
11351 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11352 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11353 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11354 ; X86-NEXT:    vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00]
11355 ; X86-NEXT:    retl # encoding: [0xc3]
11356 ;
11357 ; X64-LABEL: test_mask_compress_store_ps_128:
11358 ; X64:       # %bb.0:
11359 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11360 ; X64-NEXT:    vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
11361 ; X64-NEXT:    retq # encoding: [0xc3]
11362   call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
11363   ret void
11364 }
11365
11366 declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
11367
11368 define void @test_compress_store_ps_128(i8* %addr, <4 x float> %data) {
11369 ; X86-LABEL: test_compress_store_ps_128:
11370 ; X86:       # %bb.0:
11371 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11372 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11373 ; X86-NEXT:    vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00]
11374 ; X86-NEXT:    retl # encoding: [0xc3]
11375 ;
11376 ; X64-LABEL: test_compress_store_ps_128:
11377 ; X64:       # %bb.0:
11378 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11379 ; X64-NEXT:    vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
11380 ; X64-NEXT:    retq # encoding: [0xc3]
11381   call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 -1)
11382   ret void
11383 }
11384
11385 define void @test_mask_compress_store_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
11386 ; X86-LABEL: test_mask_compress_store_q_128:
11387 ; X86:       # %bb.0:
11388 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11389 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11390 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11391 ; X86-NEXT:    vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00]
11392 ; X86-NEXT:    retl # encoding: [0xc3]
11393 ;
11394 ; X64-LABEL: test_mask_compress_store_q_128:
11395 ; X64:       # %bb.0:
11396 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11397 ; X64-NEXT:    vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
11398 ; X64-NEXT:    retq # encoding: [0xc3]
11399   call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
11400   ret void
11401 }
11402
11403 declare void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
11404
11405 define void @test_compress_store_q_128(i8* %addr, <2 x i64> %data) {
11406 ; X86-LABEL: test_compress_store_q_128:
11407 ; X86:       # %bb.0:
11408 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11409 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11410 ; X86-NEXT:    vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00]
11411 ; X86-NEXT:    retl # encoding: [0xc3]
11412 ;
11413 ; X64-LABEL: test_compress_store_q_128:
11414 ; X64:       # %bb.0:
11415 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11416 ; X64-NEXT:    vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
11417 ; X64-NEXT:    retq # encoding: [0xc3]
11418   call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 -1)
11419   ret void
11420 }
11421
11422 define void @test_mask_compress_store_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
11423 ; X86-LABEL: test_mask_compress_store_d_128:
11424 ; X86:       # %bb.0:
11425 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11426 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11427 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11428 ; X86-NEXT:    vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00]
11429 ; X86-NEXT:    retl # encoding: [0xc3]
11430 ;
11431 ; X64-LABEL: test_mask_compress_store_d_128:
11432 ; X64:       # %bb.0:
11433 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11434 ; X64-NEXT:    vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
11435 ; X64-NEXT:    retq # encoding: [0xc3]
11436   call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
11437   ret void
11438 }
11439
11440 declare void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
11441
11442 define void @test_compress_store_d_128(i8* %addr, <4 x i32> %data) {
11443 ; X86-LABEL: test_compress_store_d_128:
11444 ; X86:       # %bb.0:
11445 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11446 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11447 ; X86-NEXT:    vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00]
11448 ; X86-NEXT:    retl # encoding: [0xc3]
11449 ;
11450 ; X64-LABEL: test_compress_store_d_128:
11451 ; X64:       # %bb.0:
11452 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11453 ; X64-NEXT:    vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
11454 ; X64-NEXT:    retq # encoding: [0xc3]
11455   call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 -1)
11456   ret void
11457 }
11458
11459 define <2 x double> @test_mask_expand_load_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
11460 ; X86-LABEL: test_mask_expand_load_pd_128:
11461 ; X86:       # %bb.0:
11462 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11463 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11464 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11465 ; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00]
11466 ; X86-NEXT:    retl # encoding: [0xc3]
11467 ;
11468 ; X64-LABEL: test_mask_expand_load_pd_128:
11469 ; X64:       # %bb.0:
11470 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11471 ; X64-NEXT:    vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
11472 ; X64-NEXT:    retq # encoding: [0xc3]
11473   %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
11474   ret <2 x double> %res
11475 }
11476
11477 define <2 x double> @test_maskz_expand_load_pd_128(i8* %addr, i8 %mask) {
11478 ; X86-LABEL: test_maskz_expand_load_pd_128:
11479 ; X86:       # %bb.0:
11480 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11481 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11482 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11483 ; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x00]
11484 ; X86-NEXT:    retl # encoding: [0xc3]
11485 ;
11486 ; X64-LABEL: test_maskz_expand_load_pd_128:
11487 ; X64:       # %bb.0:
11488 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11489 ; X64-NEXT:    vexpandpd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x07]
11490 ; X64-NEXT:    retq # encoding: [0xc3]
11491   %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> zeroinitializer, i8 %mask)
11492   ret <2 x double> %res
11493 }
11494
11495 declare <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
11496
11497 define <2 x double> @test_expand_load_pd_128(i8* %addr, <2 x double> %data) {
11498 ; X86-LABEL: test_expand_load_pd_128:
11499 ; X86:       # %bb.0:
11500 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11501 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11502 ; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00]
11503 ; X86-NEXT:    retl # encoding: [0xc3]
11504 ;
11505 ; X64-LABEL: test_expand_load_pd_128:
11506 ; X64:       # %bb.0:
11507 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11508 ; X64-NEXT:    vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
11509 ; X64-NEXT:    retq # encoding: [0xc3]
11510   %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 -1)
11511   ret <2 x double> %res
11512 }
11513
11514 define <4 x float> @test_mask_expand_load_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
11515 ; X86-LABEL: test_mask_expand_load_ps_128:
11516 ; X86:       # %bb.0:
11517 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11518 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11519 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11520 ; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00]
11521 ; X86-NEXT:    retl # encoding: [0xc3]
11522 ;
11523 ; X64-LABEL: test_mask_expand_load_ps_128:
11524 ; X64:       # %bb.0:
11525 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11526 ; X64-NEXT:    vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
11527 ; X64-NEXT:    retq # encoding: [0xc3]
11528   %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
11529   ret <4 x float> %res
11530 }
11531
11532 define <4 x float> @test_maskz_expand_load_ps_128(i8* %addr, i8 %mask) {
11533 ; X86-LABEL: test_maskz_expand_load_ps_128:
11534 ; X86:       # %bb.0:
11535 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11536 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11537 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11538 ; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x00]
11539 ; X86-NEXT:    retl # encoding: [0xc3]
11540 ;
11541 ; X64-LABEL: test_maskz_expand_load_ps_128:
11542 ; X64:       # %bb.0:
11543 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11544 ; X64-NEXT:    vexpandps (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x07]
11545 ; X64-NEXT:    retq # encoding: [0xc3]
11546   %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> zeroinitializer, i8 %mask)
11547   ret <4 x float> %res
11548 }
11549
11550 declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
11551
11552 define <4 x float> @test_expand_load_ps_128(i8* %addr, <4 x float> %data) {
11553 ; X86-LABEL: test_expand_load_ps_128:
11554 ; X86:       # %bb.0:
11555 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11556 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11557 ; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00]
11558 ; X86-NEXT:    retl # encoding: [0xc3]
11559 ;
11560 ; X64-LABEL: test_expand_load_ps_128:
11561 ; X64:       # %bb.0:
11562 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11563 ; X64-NEXT:    vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
11564 ; X64-NEXT:    retq # encoding: [0xc3]
11565   %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 -1)
11566   ret <4 x float> %res
11567 }
11568
11569 define <2 x i64> @test_mask_expand_load_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
11570 ; X86-LABEL: test_mask_expand_load_q_128:
11571 ; X86:       # %bb.0:
11572 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11573 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11574 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11575 ; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00]
11576 ; X86-NEXT:    retl # encoding: [0xc3]
11577 ;
11578 ; X64-LABEL: test_mask_expand_load_q_128:
11579 ; X64:       # %bb.0:
11580 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11581 ; X64-NEXT:    vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
11582 ; X64-NEXT:    retq # encoding: [0xc3]
11583   %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
11584   ret <2 x i64> %res
11585 }
11586
11587 define <2 x i64> @test_maskz_expand_load_q_128(i8* %addr, i8 %mask) {
11588 ; X86-LABEL: test_maskz_expand_load_q_128:
11589 ; X86:       # %bb.0:
11590 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11591 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11592 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11593 ; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x00]
11594 ; X86-NEXT:    retl # encoding: [0xc3]
11595 ;
11596 ; X64-LABEL: test_maskz_expand_load_q_128:
11597 ; X64:       # %bb.0:
11598 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11599 ; X64-NEXT:    vpexpandq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x07]
11600 ; X64-NEXT:    retq # encoding: [0xc3]
11601   %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> zeroinitializer, i8 %mask)
11602   ret <2 x i64> %res
11603 }
11604
11605 declare <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
11606
11607 define <2 x i64> @test_expand_load_q_128(i8* %addr, <2 x i64> %data) {
11608 ; X86-LABEL: test_expand_load_q_128:
11609 ; X86:       # %bb.0:
11610 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11611 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11612 ; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00]
11613 ; X86-NEXT:    retl # encoding: [0xc3]
11614 ;
11615 ; X64-LABEL: test_expand_load_q_128:
11616 ; X64:       # %bb.0:
11617 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11618 ; X64-NEXT:    vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
11619 ; X64-NEXT:    retq # encoding: [0xc3]
11620   %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 -1)
11621   ret <2 x i64> %res
11622 }
11623
11624 define <4 x i32> @test_mask_expand_load_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
11625 ; X86-LABEL: test_mask_expand_load_d_128:
11626 ; X86:       # %bb.0:
11627 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11628 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11629 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11630 ; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00]
11631 ; X86-NEXT:    retl # encoding: [0xc3]
11632 ;
11633 ; X64-LABEL: test_mask_expand_load_d_128:
11634 ; X64:       # %bb.0:
11635 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11636 ; X64-NEXT:    vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
11637 ; X64-NEXT:    retq # encoding: [0xc3]
11638   %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
11639   ret <4 x i32> %res
11640 }
11641
11642 define <4 x i32> @test_maskz_expand_load_d_128(i8* %addr, i8 %mask) {
11643 ; X86-LABEL: test_maskz_expand_load_d_128:
11644 ; X86:       # %bb.0:
11645 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11646 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11647 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11648 ; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x00]
11649 ; X86-NEXT:    retl # encoding: [0xc3]
11650 ;
11651 ; X64-LABEL: test_maskz_expand_load_d_128:
11652 ; X64:       # %bb.0:
11653 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11654 ; X64-NEXT:    vpexpandd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x07]
11655 ; X64-NEXT:    retq # encoding: [0xc3]
11656   %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> zeroinitializer, i8 %mask)
11657   ret <4 x i32> %res
11658 }
11659
11660 declare <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
11661
11662 define <4 x i32> @test_expand_load_d_128(i8* %addr, <4 x i32> %data) {
11663 ; X86-LABEL: test_expand_load_d_128:
11664 ; X86:       # %bb.0:
11665 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11666 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11667 ; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00]
11668 ; X86-NEXT:    retl # encoding: [0xc3]
11669 ;
11670 ; X64-LABEL: test_expand_load_d_128:
11671 ; X64:       # %bb.0:
11672 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11673 ; X64-NEXT:    vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
11674 ; X64-NEXT:    retq # encoding: [0xc3]
11675   %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 -1)
11676   ret <4 x i32> %res
11677 }
11678
11679 define void @test_mask_compress_store_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
11680 ; X86-LABEL: test_mask_compress_store_pd_256:
11681 ; X86:       # %bb.0:
11682 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11683 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11684 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11685 ; X86-NEXT:    vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00]
11686 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11687 ; X86-NEXT:    retl # encoding: [0xc3]
11688 ;
11689 ; X64-LABEL: test_mask_compress_store_pd_256:
11690 ; X64:       # %bb.0:
11691 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11692 ; X64-NEXT:    vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
11693 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11694 ; X64-NEXT:    retq # encoding: [0xc3]
11695   call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
11696   ret void
11697 }
11698
11699 declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
11700
11701 define void @test_compress_store_pd_256(i8* %addr, <4 x double> %data) {
11702 ; X86-LABEL: test_compress_store_pd_256:
11703 ; X86:       # %bb.0:
11704 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11705 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11706 ; X86-NEXT:    vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00]
11707 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11708 ; X86-NEXT:    retl # encoding: [0xc3]
11709 ;
11710 ; X64-LABEL: test_compress_store_pd_256:
11711 ; X64:       # %bb.0:
11712 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11713 ; X64-NEXT:    vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
11714 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11715 ; X64-NEXT:    retq # encoding: [0xc3]
11716   call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 -1)
11717   ret void
11718 }
11719
11720 define void @test_mask_compress_store_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
11721 ; X86-LABEL: test_mask_compress_store_ps_256:
11722 ; X86:       # %bb.0:
11723 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11724 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11725 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11726 ; X86-NEXT:    vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00]
11727 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11728 ; X86-NEXT:    retl # encoding: [0xc3]
11729 ;
11730 ; X64-LABEL: test_mask_compress_store_ps_256:
11731 ; X64:       # %bb.0:
11732 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11733 ; X64-NEXT:    vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
11734 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11735 ; X64-NEXT:    retq # encoding: [0xc3]
11736   call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
11737   ret void
11738 }
11739
11740 declare void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
11741
11742 define void @test_compress_store_ps_256(i8* %addr, <8 x float> %data) {
11743 ; X86-LABEL: test_compress_store_ps_256:
11744 ; X86:       # %bb.0:
11745 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11746 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11747 ; X86-NEXT:    vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00]
11748 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11749 ; X86-NEXT:    retl # encoding: [0xc3]
11750 ;
11751 ; X64-LABEL: test_compress_store_ps_256:
11752 ; X64:       # %bb.0:
11753 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11754 ; X64-NEXT:    vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
11755 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11756 ; X64-NEXT:    retq # encoding: [0xc3]
11757   call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 -1)
11758   ret void
11759 }
11760
11761 define void @test_mask_compress_store_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
11762 ; X86-LABEL: test_mask_compress_store_q_256:
11763 ; X86:       # %bb.0:
11764 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11765 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11766 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11767 ; X86-NEXT:    vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00]
11768 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11769 ; X86-NEXT:    retl # encoding: [0xc3]
11770 ;
11771 ; X64-LABEL: test_mask_compress_store_q_256:
11772 ; X64:       # %bb.0:
11773 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11774 ; X64-NEXT:    vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
11775 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11776 ; X64-NEXT:    retq # encoding: [0xc3]
11777   call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
11778   ret void
11779 }
11780
11781 declare void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
11782
11783 define void @test_compress_store_q_256(i8* %addr, <4 x i64> %data) {
11784 ; X86-LABEL: test_compress_store_q_256:
11785 ; X86:       # %bb.0:
11786 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11787 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11788 ; X86-NEXT:    vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00]
11789 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11790 ; X86-NEXT:    retl # encoding: [0xc3]
11791 ;
11792 ; X64-LABEL: test_compress_store_q_256:
11793 ; X64:       # %bb.0:
11794 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11795 ; X64-NEXT:    vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
11796 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11797 ; X64-NEXT:    retq # encoding: [0xc3]
11798   call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 -1)
11799   ret void
11800 }
11801
11802 define void @test_mask_compress_store_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
11803 ; X86-LABEL: test_mask_compress_store_d_256:
11804 ; X86:       # %bb.0:
11805 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11806 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11807 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11808 ; X86-NEXT:    vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00]
11809 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11810 ; X86-NEXT:    retl # encoding: [0xc3]
11811 ;
11812 ; X64-LABEL: test_mask_compress_store_d_256:
11813 ; X64:       # %bb.0:
11814 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11815 ; X64-NEXT:    vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
11816 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11817 ; X64-NEXT:    retq # encoding: [0xc3]
11818   call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
11819   ret void
11820 }
11821
11822 declare void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
11823
11824 define void @test_compress_store_d_256(i8* %addr, <8 x i32> %data) {
11825 ; X86-LABEL: test_compress_store_d_256:
11826 ; X86:       # %bb.0:
11827 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11828 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11829 ; X86-NEXT:    vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00]
11830 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11831 ; X86-NEXT:    retl # encoding: [0xc3]
11832 ;
11833 ; X64-LABEL: test_compress_store_d_256:
11834 ; X64:       # %bb.0:
11835 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11836 ; X64-NEXT:    vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
11837 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11838 ; X64-NEXT:    retq # encoding: [0xc3]
11839   call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 -1)
11840   ret void
11841 }
11842
11843 define <4 x double> @test_mask_expand_load_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
11844 ; X86-LABEL: test_mask_expand_load_pd_256:
11845 ; X86:       # %bb.0:
11846 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11847 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11848 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11849 ; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00]
11850 ; X86-NEXT:    retl # encoding: [0xc3]
11851 ;
11852 ; X64-LABEL: test_mask_expand_load_pd_256:
11853 ; X64:       # %bb.0:
11854 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11855 ; X64-NEXT:    vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
11856 ; X64-NEXT:    retq # encoding: [0xc3]
11857   %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
11858   ret <4 x double> %res
11859 }
11860
11861 define <4 x double> @test_maskz_expand_load_pd_256(i8* %addr, i8 %mask) {
11862 ; X86-LABEL: test_maskz_expand_load_pd_256:
11863 ; X86:       # %bb.0:
11864 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11865 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11866 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11867 ; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x00]
11868 ; X86-NEXT:    retl # encoding: [0xc3]
11869 ;
11870 ; X64-LABEL: test_maskz_expand_load_pd_256:
11871 ; X64:       # %bb.0:
11872 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11873 ; X64-NEXT:    vexpandpd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x07]
11874 ; X64-NEXT:    retq # encoding: [0xc3]
11875   %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> zeroinitializer, i8 %mask)
11876   ret <4 x double> %res
11877 }
11878
11879 declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
11880
11881 define <4 x double> @test_expand_load_pd_256(i8* %addr, <4 x double> %data) {
11882 ; X86-LABEL: test_expand_load_pd_256:
11883 ; X86:       # %bb.0:
11884 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11885 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11886 ; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00]
11887 ; X86-NEXT:    retl # encoding: [0xc3]
11888 ;
11889 ; X64-LABEL: test_expand_load_pd_256:
11890 ; X64:       # %bb.0:
11891 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11892 ; X64-NEXT:    vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
11893 ; X64-NEXT:    retq # encoding: [0xc3]
11894   %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 -1)
11895   ret <4 x double> %res
11896 }
11897
11898 define <8 x float> @test_mask_expand_load_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
11899 ; X86-LABEL: test_mask_expand_load_ps_256:
11900 ; X86:       # %bb.0:
11901 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11902 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11903 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11904 ; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00]
11905 ; X86-NEXT:    retl # encoding: [0xc3]
11906 ;
11907 ; X64-LABEL: test_mask_expand_load_ps_256:
11908 ; X64:       # %bb.0:
11909 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11910 ; X64-NEXT:    vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
11911 ; X64-NEXT:    retq # encoding: [0xc3]
11912   %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
11913   ret <8 x float> %res
11914 }
11915
11916 define <8 x float> @test_maskz_expand_load_ps_256(i8* %addr, i8 %mask) {
11917 ; X86-LABEL: test_maskz_expand_load_ps_256:
11918 ; X86:       # %bb.0:
11919 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11920 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11921 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11922 ; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x00]
11923 ; X86-NEXT:    retl # encoding: [0xc3]
11924 ;
11925 ; X64-LABEL: test_maskz_expand_load_ps_256:
11926 ; X64:       # %bb.0:
11927 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11928 ; X64-NEXT:    vexpandps (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x07]
11929 ; X64-NEXT:    retq # encoding: [0xc3]
11930   %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> zeroinitializer, i8 %mask)
11931   ret <8 x float> %res
11932 }
11933
11934 declare <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
11935
11936 define <8 x float> @test_expand_load_ps_256(i8* %addr, <8 x float> %data) {
11937 ; X86-LABEL: test_expand_load_ps_256:
11938 ; X86:       # %bb.0:
11939 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11940 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11941 ; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00]
11942 ; X86-NEXT:    retl # encoding: [0xc3]
11943 ;
11944 ; X64-LABEL: test_expand_load_ps_256:
11945 ; X64:       # %bb.0:
11946 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11947 ; X64-NEXT:    vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
11948 ; X64-NEXT:    retq # encoding: [0xc3]
11949   %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 -1)
11950   ret <8 x float> %res
11951 }
11952
11953 define <4 x i64> @test_mask_expand_load_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
11954 ; X86-LABEL: test_mask_expand_load_q_256:
11955 ; X86:       # %bb.0:
11956 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11957 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11958 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11959 ; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00]
11960 ; X86-NEXT:    retl # encoding: [0xc3]
11961 ;
11962 ; X64-LABEL: test_mask_expand_load_q_256:
11963 ; X64:       # %bb.0:
11964 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11965 ; X64-NEXT:    vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
11966 ; X64-NEXT:    retq # encoding: [0xc3]
11967   %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
11968   ret <4 x i64> %res
11969 }
11970
11971 define <4 x i64> @test_maskz_expand_load_q_256(i8* %addr, i8 %mask) {
11972 ; X86-LABEL: test_maskz_expand_load_q_256:
11973 ; X86:       # %bb.0:
11974 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11975 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11976 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11977 ; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x00]
11978 ; X86-NEXT:    retl # encoding: [0xc3]
11979 ;
11980 ; X64-LABEL: test_maskz_expand_load_q_256:
11981 ; X64:       # %bb.0:
11982 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11983 ; X64-NEXT:    vpexpandq (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x07]
11984 ; X64-NEXT:    retq # encoding: [0xc3]
11985   %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> zeroinitializer, i8 %mask)
11986   ret <4 x i64> %res
11987 }
11988
11989 declare <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
11990
11991 define <4 x i64> @test_expand_load_q_256(i8* %addr, <4 x i64> %data) {
11992 ; X86-LABEL: test_expand_load_q_256:
11993 ; X86:       # %bb.0:
11994 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11995 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11996 ; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00]
11997 ; X86-NEXT:    retl # encoding: [0xc3]
11998 ;
11999 ; X64-LABEL: test_expand_load_q_256:
12000 ; X64:       # %bb.0:
12001 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
12002 ; X64-NEXT:    vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
12003 ; X64-NEXT:    retq # encoding: [0xc3]
12004   %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 -1)
12005   ret <4 x i64> %res
12006 }
12007
12008 define <8 x i32> @test_mask_expand_load_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
12009 ; X86-LABEL: test_mask_expand_load_d_256:
12010 ; X86:       # %bb.0:
12011 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
12012 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
12013 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
12014 ; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00]
12015 ; X86-NEXT:    retl # encoding: [0xc3]
12016 ;
12017 ; X64-LABEL: test_mask_expand_load_d_256:
12018 ; X64:       # %bb.0:
12019 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12020 ; X64-NEXT:    vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
12021 ; X64-NEXT:    retq # encoding: [0xc3]
12022   %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
12023   ret <8 x i32> %res
12024 }
12025
12026 define <8 x i32> @test_maskz_expand_load_d_256(i8* %addr, i8 %mask) {
12027 ; X86-LABEL: test_maskz_expand_load_d_256:
12028 ; X86:       # %bb.0:
12029 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
12030 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
12031 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
12032 ; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x00]
12033 ; X86-NEXT:    retl # encoding: [0xc3]
12034 ;
12035 ; X64-LABEL: test_maskz_expand_load_d_256:
12036 ; X64:       # %bb.0:
12037 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12038 ; X64-NEXT:    vpexpandd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x07]
12039 ; X64-NEXT:    retq # encoding: [0xc3]
12040   %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> zeroinitializer, i8 %mask)
12041   ret <8 x i32> %res
12042 }
12043
12044 declare <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
12045
12046 define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) {
12047 ; X86-LABEL: test_expand_load_d_256:
12048 ; X86:       # %bb.0:
12049 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
12050 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
12051 ; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00]
12052 ; X86-NEXT:    retl # encoding: [0xc3]
12053 ;
12054 ; X64-LABEL: test_expand_load_d_256:
12055 ; X64:       # %bb.0:
12056 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
12057 ; X64-NEXT:    vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
12058 ; X64-NEXT:    retq # encoding: [0xc3]
12059   %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1)
12060   ret <8 x i32> %res
12061 }
12062
12063 define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
12064 ; X86-LABEL: test_sqrt_pd_256:
12065 ; X86:       # %bb.0:
12066 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12067 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12068 ; X86-NEXT:    vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
12069 ; X86-NEXT:    retl # encoding: [0xc3]
12070 ;
12071 ; X64-LABEL: test_sqrt_pd_256:
12072 ; X64:       # %bb.0:
12073 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12074 ; X64-NEXT:    vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
12075 ; X64-NEXT:    retq # encoding: [0xc3]
12076   %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0,  <4 x double> zeroinitializer, i8 %mask)
12077   ret <4 x double> %res
12078 }
12079 declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
12080
12081 define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
12082 ; X86-LABEL: test_sqrt_ps_256:
12083 ; X86:       # %bb.0:
12084 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12085 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12086 ; X86-NEXT:    vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
12087 ; X86-NEXT:    retl # encoding: [0xc3]
12088 ;
12089 ; X64-LABEL: test_sqrt_ps_256:
12090 ; X64:       # %bb.0:
12091 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12092 ; X64-NEXT:    vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
12093 ; X64-NEXT:    retq # encoding: [0xc3]
12094   %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
12095   ret <8 x float> %res
12096 }
12097
12098 declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
12099
12100 declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
12101
12102 define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
12103 ; X86-LABEL: test_int_x86_avx512_mask_prorv_d_128:
12104 ; X86:       # %bb.0:
12105 ; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9]
12106 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12107 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12108 ; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
12109 ; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
12110 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12111 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12112 ; X86-NEXT:    retl # encoding: [0xc3]
12113 ;
12114 ; X64-LABEL: test_int_x86_avx512_mask_prorv_d_128:
12115 ; X64:       # %bb.0:
12116 ; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9]
12117 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12118 ; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
12119 ; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
12120 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12121 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12122 ; X64-NEXT:    retq # encoding: [0xc3]
12123   %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
12124   %res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
12125   %res2 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
12126   %res3 = add <4 x i32> %res, %res1
12127   %res4 = add <4 x i32> %res3, %res2
12128   ret <4 x i32> %res4
12129 }
12130
12131 declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
12132
12133 define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12134 ; X86-LABEL: test_int_x86_avx512_mask_prorv_d_256:
12135 ; X86:       # %bb.0:
12136 ; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9]
12137 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12138 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12139 ; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
12140 ; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
12141 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12142 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12143 ; X86-NEXT:    retl # encoding: [0xc3]
12144 ;
12145 ; X64-LABEL: test_int_x86_avx512_mask_prorv_d_256:
12146 ; X64:       # %bb.0:
12147 ; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9]
12148 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12149 ; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
12150 ; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
12151 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12152 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12153 ; X64-NEXT:    retq # encoding: [0xc3]
12154   %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
12155   %res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
12156   %res2 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
12157   %res3 = add <8 x i32> %res, %res1
12158   %res4 = add <8 x i32> %res3, %res2
12159   ret <8 x i32> %res4
12160 }
12161
12162 declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
12163
12164 define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
12165 ; X86-LABEL: test_int_x86_avx512_mask_prorv_q_128:
12166 ; X86:       # %bb.0:
12167 ; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9]
12168 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12169 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12170 ; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
12171 ; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
12172 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12173 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12174 ; X86-NEXT:    retl # encoding: [0xc3]
12175 ;
12176 ; X64-LABEL: test_int_x86_avx512_mask_prorv_q_128:
12177 ; X64:       # %bb.0:
12178 ; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9]
12179 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12180 ; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
12181 ; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
12182 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12183 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12184 ; X64-NEXT:    retq # encoding: [0xc3]
12185   %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
12186   %res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
12187   %res2 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
12188   %res3 = add <2 x i64> %res, %res1
12189   %res4 = add <2 x i64> %res3, %res2
12190   ret <2 x i64> %res4
12191 }
12192
12193 declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
12194
12195 define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
12196 ; X86-LABEL: test_int_x86_avx512_mask_prorv_q_256:
12197 ; X86:       # %bb.0:
12198 ; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9]
12199 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12200 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12201 ; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
12202 ; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
12203 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12204 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12205 ; X86-NEXT:    retl # encoding: [0xc3]
12206 ;
12207 ; X64-LABEL: test_int_x86_avx512_mask_prorv_q_256:
12208 ; X64:       # %bb.0:
12209 ; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9]
12210 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12211 ; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
12212 ; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
12213 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12214 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12215 ; X64-NEXT:    retq # encoding: [0xc3]
12216   %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
12217   %res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
12218   %res2 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
12219   %res3 = add <4 x i64> %res, %res1
12220   %res4 = add <4 x i64> %res3, %res2
12221   ret <4 x i64> %res4
12222 }
12223
12224 declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i32, <4 x i32>, i8)
12225
12226 define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
12227 ; X86-LABEL: test_int_x86_avx512_mask_prol_d_128:
12228 ; X86:       # %bb.0:
12229 ; X86-NEXT:    vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03]
12230 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12231 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12232 ; X86-NEXT:    vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
12233 ; X86-NEXT:    vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03]
12234 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12235 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12236 ; X86-NEXT:    retl # encoding: [0xc3]
12237 ;
12238 ; X64-LABEL: test_int_x86_avx512_mask_prol_d_128:
12239 ; X64:       # %bb.0:
12240 ; X64-NEXT:    vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03]
12241 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12242 ; X64-NEXT:    vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
12243 ; X64-NEXT:    vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03]
12244 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12245 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12246 ; X64-NEXT:    retq # encoding: [0xc3]
12247   %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
12248   %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
12249   %res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
12250   %res3 = add <4 x i32> %res, %res1
12251   %res4 = add <4 x i32> %res3, %res2
12252   ret <4 x i32> %res4
12253 }
12254
12255 declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i32, <8 x i32>, i8)
12256
12257 define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
12258 ; X86-LABEL: test_int_x86_avx512_mask_prol_d_256:
12259 ; X86:       # %bb.0:
12260 ; X86-NEXT:    vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03]
12261 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12262 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12263 ; X86-NEXT:    vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
12264 ; X86-NEXT:    vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03]
12265 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12266 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12267 ; X86-NEXT:    retl # encoding: [0xc3]
12268 ;
12269 ; X64-LABEL: test_int_x86_avx512_mask_prol_d_256:
12270 ; X64:       # %bb.0:
12271 ; X64-NEXT:    vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03]
12272 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12273 ; X64-NEXT:    vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
12274 ; X64-NEXT:    vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03]
12275 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12276 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12277 ; X64-NEXT:    retq # encoding: [0xc3]
12278   %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
12279   %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
12280   %res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
12281   %res3 = add <8 x i32> %res, %res1
12282   %res4 = add <8 x i32> %res3, %res2
12283   ret <8 x i32> %res4
12284 }
12285
12286 declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i32, <2 x i64>, i8)
12287
12288 define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
12289 ; X86-LABEL: test_int_x86_avx512_mask_prol_q_128:
12290 ; X86:       # %bb.0:
12291 ; X86-NEXT:    vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03]
12292 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12293 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12294 ; X86-NEXT:    vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
12295 ; X86-NEXT:    vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03]
12296 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12297 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12298 ; X86-NEXT:    retl # encoding: [0xc3]
12299 ;
12300 ; X64-LABEL: test_int_x86_avx512_mask_prol_q_128:
12301 ; X64:       # %bb.0:
12302 ; X64-NEXT:    vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03]
12303 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12304 ; X64-NEXT:    vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
12305 ; X64-NEXT:    vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03]
12306 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12307 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12308 ; X64-NEXT:    retq # encoding: [0xc3]
12309   %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
12310   %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
12311   %res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
12312   %res3 = add <2 x i64> %res, %res1
12313   %res4 = add <2 x i64> %res3, %res2
12314   ret <2 x i64> %res4
12315 }
12316
12317 declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i32, <4 x i64>, i8)
12318
12319 define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
12320 ; X86-LABEL: test_int_x86_avx512_mask_prol_q_256:
12321 ; X86:       # %bb.0:
12322 ; X86-NEXT:    vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03]
12323 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12324 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12325 ; X86-NEXT:    vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
12326 ; X86-NEXT:    vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03]
12327 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12328 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12329 ; X86-NEXT:    retl # encoding: [0xc3]
12330 ;
12331 ; X64-LABEL: test_int_x86_avx512_mask_prol_q_256:
12332 ; X64:       # %bb.0:
12333 ; X64-NEXT:    vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03]
12334 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12335 ; X64-NEXT:    vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
12336 ; X64-NEXT:    vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03]
12337 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12338 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12339 ; X64-NEXT:    retq # encoding: [0xc3]
12340   %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
12341   %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
12342   %res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
12343   %res3 = add <4 x i64> %res, %res1
12344   %res4 = add <4 x i64> %res3, %res2
12345   ret <4 x i64> %res4
12346 }
12347
12348 declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
12349
12350 define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
12351 ; X86-LABEL: test_int_x86_avx512_mask_prolv_d_128:
12352 ; X86:       # %bb.0:
12353 ; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9]
12354 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12355 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12356 ; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
12357 ; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
12358 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12359 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12360 ; X86-NEXT:    retl # encoding: [0xc3]
12361 ;
12362 ; X64-LABEL: test_int_x86_avx512_mask_prolv_d_128:
12363 ; X64:       # %bb.0:
12364 ; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9]
12365 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12366 ; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
12367 ; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
12368 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12369 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12370 ; X64-NEXT:    retq # encoding: [0xc3]
12371   %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
12372   %res1 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
12373   %res2 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
12374   %res3 = add <4 x i32> %res, %res1
12375   %res4 = add <4 x i32> %res3, %res2
12376   ret <4 x i32> %res4
12377 }
12378
12379 declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
12380
12381 define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12382 ; X86-LABEL: test_int_x86_avx512_mask_prolv_d_256:
12383 ; X86:       # %bb.0:
12384 ; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9]
12385 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12386 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12387 ; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
12388 ; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
12389 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12390 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12391 ; X86-NEXT:    retl # encoding: [0xc3]
12392 ;
12393 ; X64-LABEL: test_int_x86_avx512_mask_prolv_d_256:
12394 ; X64:       # %bb.0:
12395 ; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9]
12396 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12397 ; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
12398 ; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
12399 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12400 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12401 ; X64-NEXT:    retq # encoding: [0xc3]
12402   %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
12403   %res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
12404   %res2 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
12405   %res3 = add <8 x i32> %res, %res1
12406   %res4 = add <8 x i32> %res3, %res2
12407   ret <8 x i32> %res4
12408 }
12409
12410 declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
12411
12412 define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
12413 ; X86-LABEL: test_int_x86_avx512_mask_prolv_q_128:
12414 ; X86:       # %bb.0:
12415 ; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9]
12416 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12417 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12418 ; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
12419 ; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
12420 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12421 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12422 ; X86-NEXT:    retl # encoding: [0xc3]
12423 ;
12424 ; X64-LABEL: test_int_x86_avx512_mask_prolv_q_128:
12425 ; X64:       # %bb.0:
12426 ; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9]
12427 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12428 ; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
12429 ; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
12430 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12431 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12432 ; X64-NEXT:    retq # encoding: [0xc3]
12433   %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
12434   %res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
12435   %res2 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
12436   %res3 = add <2 x i64> %res, %res1
12437   %res4 = add <2 x i64> %res3, %res2
12438   ret <2 x i64> %res4
12439 }
12440
12441 declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
12442
12443 define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
12444 ; X86-LABEL: test_int_x86_avx512_mask_prolv_q_256:
12445 ; X86:       # %bb.0:
12446 ; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9]
12447 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12448 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12449 ; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
12450 ; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
12451 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12452 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12453 ; X86-NEXT:    retl # encoding: [0xc3]
12454 ;
12455 ; X64-LABEL: test_int_x86_avx512_mask_prolv_q_256:
12456 ; X64:       # %bb.0:
12457 ; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9]
12458 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12459 ; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
12460 ; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
12461 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12462 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12463 ; X64-NEXT:    retq # encoding: [0xc3]
12464   %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
12465   %res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
12466   %res2 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
12467   %res3 = add <4 x i64> %res, %res1
12468   %res4 = add <4 x i64> %res3, %res2
12469   ret <4 x i64> %res4
12470 }
12471
12472 declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i32, <4 x i32>, i8)
12473
12474 define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
12475 ; X86-LABEL: test_int_x86_avx512_mask_pror_d_128:
12476 ; X86:       # %bb.0:
12477 ; X86-NEXT:    vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03]
12478 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12479 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12480 ; X86-NEXT:    vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
12481 ; X86-NEXT:    vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03]
12482 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12483 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12484 ; X86-NEXT:    retl # encoding: [0xc3]
12485 ;
12486 ; X64-LABEL: test_int_x86_avx512_mask_pror_d_128:
12487 ; X64:       # %bb.0:
12488 ; X64-NEXT:    vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03]
12489 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12490 ; X64-NEXT:    vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
12491 ; X64-NEXT:    vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03]
12492 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12493 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12494 ; X64-NEXT:    retq # encoding: [0xc3]
12495   %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
12496   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
12497   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
12498   %res3 = add <4 x i32> %res, %res1
12499   %res4 = add <4 x i32> %res3, %res2
12500   ret <4 x i32> %res4
12501 }
12502
12503 declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i32, <8 x i32>, i8)
12504
12505 define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
12506 ; X86-LABEL: test_int_x86_avx512_mask_pror_d_256:
12507 ; X86:       # %bb.0:
12508 ; X86-NEXT:    vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03]
12509 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12510 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12511 ; X86-NEXT:    vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
12512 ; X86-NEXT:    vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03]
12513 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12514 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12515 ; X86-NEXT:    retl # encoding: [0xc3]
12516 ;
12517 ; X64-LABEL: test_int_x86_avx512_mask_pror_d_256:
12518 ; X64:       # %bb.0:
12519 ; X64-NEXT:    vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03]
12520 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12521 ; X64-NEXT:    vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
12522 ; X64-NEXT:    vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03]
12523 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12524 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12525 ; X64-NEXT:    retq # encoding: [0xc3]
12526   %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
12527   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
12528   %res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
12529   %res3 = add <8 x i32> %res, %res1
12530   %res4 = add <8 x i32> %res3, %res2
12531   ret <8 x i32> %res4
12532 }
12533
12534 declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i32, <2 x i64>, i8)
12535
12536 define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
12537 ; X86-LABEL: test_int_x86_avx512_mask_pror_q_128:
12538 ; X86:       # %bb.0:
12539 ; X86-NEXT:    vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03]
12540 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12541 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12542 ; X86-NEXT:    vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
12543 ; X86-NEXT:    vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03]
12544 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12545 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12546 ; X86-NEXT:    retl # encoding: [0xc3]
12547 ;
12548 ; X64-LABEL: test_int_x86_avx512_mask_pror_q_128:
12549 ; X64:       # %bb.0:
12550 ; X64-NEXT:    vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03]
12551 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12552 ; X64-NEXT:    vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
12553 ; X64-NEXT:    vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03]
12554 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12555 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12556 ; X64-NEXT:    retq # encoding: [0xc3]
12557   %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
12558   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
12559   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
12560   %res3 = add <2 x i64> %res, %res1
12561   %res4 = add <2 x i64> %res3, %res2
12562   ret <2 x i64> %res4
12563 }
12564
12565 declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i32, <4 x i64>, i8)
12566
12567 define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
12568 ; X86-LABEL: test_int_x86_avx512_mask_pror_q_256:
12569 ; X86:       # %bb.0:
12570 ; X86-NEXT:    vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03]
12571 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12572 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12573 ; X86-NEXT:    vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
12574 ; X86-NEXT:    vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03]
12575 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12576 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12577 ; X86-NEXT:    retl # encoding: [0xc3]
12578 ;
12579 ; X64-LABEL: test_int_x86_avx512_mask_pror_q_256:
12580 ; X64:       # %bb.0:
12581 ; X64-NEXT:    vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03]
12582 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12583 ; X64-NEXT:    vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
12584 ; X64-NEXT:    vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03]
12585 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12586 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12587 ; X64-NEXT:    retq # encoding: [0xc3]
12588   %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
12589   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
12590   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
12591   %res3 = add <4 x i64> %res, %res1
12592   %res4 = add <4 x i64> %res3, %res2
12593   ret <4 x i64> %res4
12594 }
12595
12596 declare <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32>, <4 x i32>)
12597
12598 define <4 x i32>@test_int_x86_avx512_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
12599 ; X86-LABEL: test_int_x86_avx512_prorv_d_128:
12600 ; X86:       # %bb.0:
12601 ; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9]
12602 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12603 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12604 ; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
12605 ; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
12606 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12607 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12608 ; X86-NEXT:    retl # encoding: [0xc3]
12609 ;
12610 ; X64-LABEL: test_int_x86_avx512_prorv_d_128:
12611 ; X64:       # %bb.0:
12612 ; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9]
12613 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12614 ; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
12615 ; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
12616 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12617 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12618 ; X64-NEXT:    retq # encoding: [0xc3]
12619   %1 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12620   %2 = bitcast i8 %x3 to <8 x i1>
12621   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12622   %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
12623   %4 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12624   %5 = bitcast i8 %x3 to <8 x i1>
12625   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12626   %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer
12627   %7 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12628   %res3 = add <4 x i32> %3, %6
12629   %res4 = add <4 x i32> %res3, %7
12630   ret <4 x i32> %res4
12631 }
12632
12633 declare <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32>, <8 x i32>)
12634
12635 define <8 x i32>@test_int_x86_avx512_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12636 ; X86-LABEL: test_int_x86_avx512_prorv_d_256:
12637 ; X86:       # %bb.0:
12638 ; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9]
12639 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12640 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12641 ; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
12642 ; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
12643 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12644 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12645 ; X86-NEXT:    retl # encoding: [0xc3]
12646 ;
12647 ; X64-LABEL: test_int_x86_avx512_prorv_d_256:
12648 ; X64:       # %bb.0:
12649 ; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9]
12650 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12651 ; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
12652 ; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
12653 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12654 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12655 ; X64-NEXT:    retq # encoding: [0xc3]
12656   %1 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12657   %2 = bitcast i8 %x3 to <8 x i1>
12658   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
12659   %4 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12660   %5 = bitcast i8 %x3 to <8 x i1>
12661   %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
12662   %7 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12663   %res3 = add <8 x i32> %3, %6
12664   %res4 = add <8 x i32> %res3, %7
12665   ret <8 x i32> %res4
12666 }
12667
12668 declare <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64>, <2 x i64>)
12669
12670 define <2 x i64>@test_int_x86_avx512_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
12671 ; X86-LABEL: test_int_x86_avx512_prorv_q_128:
12672 ; X86:       # %bb.0:
12673 ; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9]
12674 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12675 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12676 ; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
12677 ; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
12678 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12679 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12680 ; X86-NEXT:    retl # encoding: [0xc3]
12681 ;
12682 ; X64-LABEL: test_int_x86_avx512_prorv_q_128:
12683 ; X64:       # %bb.0:
12684 ; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9]
12685 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12686 ; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
12687 ; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
12688 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12689 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12690 ; X64-NEXT:    retq # encoding: [0xc3]
12691   %1 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12692   %2 = bitcast i8 %x3 to <8 x i1>
12693   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
12694   %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
12695   %4 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12696   %5 = bitcast i8 %x3 to <8 x i1>
12697   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
12698   %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer
12699   %7 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12700   %res3 = add <2 x i64> %3, %6
12701   %res4 = add <2 x i64> %res3, %7
12702   ret <2 x i64> %res4
12703 }
12704
12705 declare <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64>, <4 x i64>)
12706
12707 define <4 x i64>@test_int_x86_avx512_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
12708 ; X86-LABEL: test_int_x86_avx512_prorv_q_256:
12709 ; X86:       # %bb.0:
12710 ; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9]
12711 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12712 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12713 ; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
12714 ; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
12715 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12716 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12717 ; X86-NEXT:    retl # encoding: [0xc3]
12718 ;
12719 ; X64-LABEL: test_int_x86_avx512_prorv_q_256:
12720 ; X64:       # %bb.0:
12721 ; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9]
12722 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12723 ; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
12724 ; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
12725 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12726 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12727 ; X64-NEXT:    retq # encoding: [0xc3]
12728   %1 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1)
12729   %2 = bitcast i8 %x3 to <8 x i1>
12730   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12731   %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
12732   %4 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1)
12733   %5 = bitcast i8 %x3 to <8 x i1>
12734   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12735   %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
12736   %7 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1)
12737   %res3 = add <4 x i64> %3, %6
12738   %res4 = add <4 x i64> %res3, %7
12739   ret <4 x i64> %res4
12740 }
12741
12742 declare <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32>, i32)
12743
12744 define <4 x i32>@test_int_x86_avx512_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
12745 ; X86-LABEL: test_int_x86_avx512_prol_d_128:
12746 ; X86:       # %bb.0:
12747 ; X86-NEXT:    vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03]
12748 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12749 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12750 ; X86-NEXT:    vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
12751 ; X86-NEXT:    vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03]
12752 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12753 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12754 ; X86-NEXT:    retl # encoding: [0xc3]
12755 ;
12756 ; X64-LABEL: test_int_x86_avx512_prol_d_128:
12757 ; X64:       # %bb.0:
12758 ; X64-NEXT:    vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03]
12759 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12760 ; X64-NEXT:    vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
12761 ; X64-NEXT:    vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03]
12762 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12763 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12764 ; X64-NEXT:    retq # encoding: [0xc3]
12765   %1 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3)
12766   %2 = bitcast i8 %x3 to <8 x i1>
12767   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12768   %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
12769   %4 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3)
12770   %5 = bitcast i8 %x3 to <8 x i1>
12771   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12772   %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer
12773   %7 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3)
12774   %res3 = add <4 x i32> %3, %6
12775   %res4 = add <4 x i32> %res3, %7
12776   ret <4 x i32> %res4
12777 }
12778
12779 declare <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32>, i32)
12780
12781 define <8 x i32>@test_int_x86_avx512_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
12782 ; X86-LABEL: test_int_x86_avx512_prol_d_256:
12783 ; X86:       # %bb.0:
12784 ; X86-NEXT:    vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03]
12785 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12786 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12787 ; X86-NEXT:    vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
12788 ; X86-NEXT:    vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03]
12789 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12790 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12791 ; X86-NEXT:    retl # encoding: [0xc3]
12792 ;
12793 ; X64-LABEL: test_int_x86_avx512_prol_d_256:
12794 ; X64:       # %bb.0:
12795 ; X64-NEXT:    vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03]
12796 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12797 ; X64-NEXT:    vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
12798 ; X64-NEXT:    vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03]
12799 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12800 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12801 ; X64-NEXT:    retq # encoding: [0xc3]
12802   %1 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3)
12803   %2 = bitcast i8 %x3 to <8 x i1>
12804   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
12805   %4 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3)
12806   %5 = bitcast i8 %x3 to <8 x i1>
12807   %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
12808   %7 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3)
12809   %res3 = add <8 x i32> %3, %6
12810   %res4 = add <8 x i32> %res3, %7
12811   ret <8 x i32> %res4
12812 }
12813
12814 declare <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64>, i32)
12815
12816 define <2 x i64>@test_int_x86_avx512_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
12817 ; X86-LABEL: test_int_x86_avx512_prol_q_128:
12818 ; X86:       # %bb.0:
12819 ; X86-NEXT:    vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03]
12820 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12821 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12822 ; X86-NEXT:    vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
12823 ; X86-NEXT:    vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03]
12824 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12825 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12826 ; X86-NEXT:    retl # encoding: [0xc3]
12827 ;
12828 ; X64-LABEL: test_int_x86_avx512_prol_q_128:
12829 ; X64:       # %bb.0:
12830 ; X64-NEXT:    vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03]
12831 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12832 ; X64-NEXT:    vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
12833 ; X64-NEXT:    vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03]
12834 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12835 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12836 ; X64-NEXT:    retq # encoding: [0xc3]
12837   %1 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3)
12838   %2 = bitcast i8 %x3 to <8 x i1>
12839   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
12840   %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
12841   %4 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3)
12842   %5 = bitcast i8 %x3 to <8 x i1>
12843   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
12844   %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer
12845   %7 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3)
12846   %res3 = add <2 x i64> %3, %6
12847   %res4 = add <2 x i64> %res3, %7
12848   ret <2 x i64> %res4
12849 }
12850
12851 declare <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64>, i32)
12852
12853 define <4 x i64>@test_int_x86_avx512_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
12854 ; X86-LABEL: test_int_x86_avx512_prol_q_256:
12855 ; X86:       # %bb.0:
12856 ; X86-NEXT:    vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03]
12857 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12858 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12859 ; X86-NEXT:    vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
12860 ; X86-NEXT:    vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03]
12861 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12862 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12863 ; X86-NEXT:    retl # encoding: [0xc3]
12864 ;
12865 ; X64-LABEL: test_int_x86_avx512_prol_q_256:
12866 ; X64:       # %bb.0:
12867 ; X64-NEXT:    vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03]
12868 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12869 ; X64-NEXT:    vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
12870 ; X64-NEXT:    vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03]
12871 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12872 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12873 ; X64-NEXT:    retq # encoding: [0xc3]
12874   %1 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3)
12875   %2 = bitcast i8 %x3 to <8 x i1>
12876   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12877   %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
12878   %4 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3)
12879   %5 = bitcast i8 %x3 to <8 x i1>
12880   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12881   %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
12882   %7 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3)
12883   %res3 = add <4 x i64> %3, %6
12884   %res4 = add <4 x i64> %res3, %7
12885   ret <4 x i64> %res4
12886 }
12887
12888 declare <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32>, <4 x i32>)
12889
12890 define <4 x i32>@test_int_x86_avx512_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
12891 ; X86-LABEL: test_int_x86_avx512_prolv_d_128:
12892 ; X86:       # %bb.0:
12893 ; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9]
12894 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12895 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12896 ; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
12897 ; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
12898 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12899 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12900 ; X86-NEXT:    retl # encoding: [0xc3]
12901 ;
12902 ; X64-LABEL: test_int_x86_avx512_prolv_d_128:
12903 ; X64:       # %bb.0:
12904 ; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9]
12905 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12906 ; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
12907 ; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
12908 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12909 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12910 ; X64-NEXT:    retq # encoding: [0xc3]
12911   %1 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12912   %2 = bitcast i8 %x3 to <8 x i1>
12913   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12914   %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
12915   %4 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12916   %5 = bitcast i8 %x3 to <8 x i1>
12917   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12918   %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer
12919   %7 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12920   %res3 = add <4 x i32> %3, %6
12921   %res4 = add <4 x i32> %res3, %7
12922   ret <4 x i32> %res4
12923 }
12924
12925 declare <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32>, <8 x i32>)
12926
12927 define <8 x i32>@test_int_x86_avx512_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12928 ; X86-LABEL: test_int_x86_avx512_prolv_d_256:
12929 ; X86:       # %bb.0:
12930 ; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9]
12931 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12932 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12933 ; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
12934 ; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
12935 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12936 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12937 ; X86-NEXT:    retl # encoding: [0xc3]
12938 ;
12939 ; X64-LABEL: test_int_x86_avx512_prolv_d_256:
12940 ; X64:       # %bb.0:
12941 ; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9]
12942 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12943 ; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
12944 ; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
12945 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12946 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12947 ; X64-NEXT:    retq # encoding: [0xc3]
12948   %1 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12949   %2 = bitcast i8 %x3 to <8 x i1>
12950   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
12951   %4 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12952   %5 = bitcast i8 %x3 to <8 x i1>
12953   %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
12954   %7 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12955   %res3 = add <8 x i32> %3, %6
12956   %res4 = add <8 x i32> %res3, %7
12957   ret <8 x i32> %res4
12958 }
12959
12960 declare <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64>, <2 x i64>)
12961
12962 define <2 x i64>@test_int_x86_avx512_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
12963 ; X86-LABEL: test_int_x86_avx512_prolv_q_128:
12964 ; X86:       # %bb.0:
12965 ; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9]
12966 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12967 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12968 ; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
12969 ; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
12970 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12971 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12972 ; X86-NEXT:    retl # encoding: [0xc3]
12973 ;
12974 ; X64-LABEL: test_int_x86_avx512_prolv_q_128:
12975 ; X64:       # %bb.0:
12976 ; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9]
12977 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12978 ; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
12979 ; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
12980 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12981 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12982 ; X64-NEXT:    retq # encoding: [0xc3]
12983   %1 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12984   %2 = bitcast i8 %x3 to <8 x i1>
12985   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
12986   %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
12987   %4 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12988   %5 = bitcast i8 %x3 to <8 x i1>
12989   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
12990   %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer
12991   %7 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12992   %res3 = add <2 x i64> %3, %6
12993   %res4 = add <2 x i64> %res3, %7
12994   ret <2 x i64> %res4
12995 }
12996
12997 declare <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64>, <4 x i64>)
12998
12999 define <4 x i64>@test_int_x86_avx512_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
13000 ; X86-LABEL: test_int_x86_avx512_prolv_q_256:
13001 ; X86:       # %bb.0:
13002 ; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9]
13003 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13004 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13005 ; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
13006 ; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
13007 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
13008 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
13009 ; X86-NEXT:    retl # encoding: [0xc3]
13010 ;
13011 ; X64-LABEL: test_int_x86_avx512_prolv_q_256:
13012 ; X64:       # %bb.0:
13013 ; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9]
13014 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13015 ; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
13016 ; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
13017 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
13018 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
13019 ; X64-NEXT:    retq # encoding: [0xc3]
13020   %1 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1)
13021   %2 = bitcast i8 %x3 to <8 x i1>
13022   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13023   %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
13024   %4 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1)
13025   %5 = bitcast i8 %x3 to <8 x i1>
13026   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13027   %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
13028   %7 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1)
13029   %res3 = add <4 x i64> %3, %6
13030   %res4 = add <4 x i64> %res3, %7
13031   ret <4 x i64> %res4
13032 }
13033
13034 declare <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32>, i32)
13035
13036 define <4 x i32>@test_int_x86_avx512_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
13037 ; X86-LABEL: test_int_x86_avx512_pror_d_128:
13038 ; X86:       # %bb.0:
13039 ; X86-NEXT:    vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03]
13040 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
13041 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13042 ; X86-NEXT:    vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
13043 ; X86-NEXT:    vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03]
13044 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
13045 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
13046 ; X86-NEXT:    retl # encoding: [0xc3]
13047 ;
13048 ; X64-LABEL: test_int_x86_avx512_pror_d_128:
13049 ; X64:       # %bb.0:
13050 ; X64-NEXT:    vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03]
13051 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13052 ; X64-NEXT:    vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
13053 ; X64-NEXT:    vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03]
13054 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
13055 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
13056 ; X64-NEXT:    retq # encoding: [0xc3]
13057   %1 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3)
13058   %2 = bitcast i8 %x3 to <8 x i1>
13059   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13060   %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
13061   %4 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3)
13062   %5 = bitcast i8 %x3 to <8 x i1>
13063   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13064   %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer
13065   %7 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3)
13066   %res3 = add <4 x i32> %3, %6
13067   %res4 = add <4 x i32> %res3, %7
13068   ret <4 x i32> %res4
13069 }
13070
13071 declare <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32>, i32)
13072
13073 define <8 x i32>@test_int_x86_avx512_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
13074 ; X86-LABEL: test_int_x86_avx512_pror_d_256:
13075 ; X86:       # %bb.0:
13076 ; X86-NEXT:    vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03]
13077 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
13078 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13079 ; X86-NEXT:    vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
13080 ; X86-NEXT:    vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03]
13081 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
13082 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
13083 ; X86-NEXT:    retl # encoding: [0xc3]
13084 ;
13085 ; X64-LABEL: test_int_x86_avx512_pror_d_256:
13086 ; X64:       # %bb.0:
13087 ; X64-NEXT:    vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03]
13088 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13089 ; X64-NEXT:    vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
13090 ; X64-NEXT:    vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03]
13091 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
13092 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
13093 ; X64-NEXT:    retq # encoding: [0xc3]
13094   %1 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3)
13095   %2 = bitcast i8 %x3 to <8 x i1>
13096   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
13097   %4 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3)
13098   %5 = bitcast i8 %x3 to <8 x i1>
13099   %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
13100   %7 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3)
13101   %res3 = add <8 x i32> %3, %6
13102   %res4 = add <8 x i32> %res3, %7
13103   ret <8 x i32> %res4
13104 }
13105
13106 declare <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64>, i32)
13107
13108 define <2 x i64>@test_int_x86_avx512_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
13109 ; X86-LABEL: test_int_x86_avx512_pror_q_128:
13110 ; X86:       # %bb.0:
13111 ; X86-NEXT:    vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03]
13112 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
13113 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13114 ; X86-NEXT:    vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
13115 ; X86-NEXT:    vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03]
13116 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
13117 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
13118 ; X86-NEXT:    retl # encoding: [0xc3]
13119 ;
13120 ; X64-LABEL: test_int_x86_avx512_pror_q_128:
13121 ; X64:       # %bb.0:
13122 ; X64-NEXT:    vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03]
13123 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13124 ; X64-NEXT:    vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
13125 ; X64-NEXT:    vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03]
13126 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
13127 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
13128 ; X64-NEXT:    retq # encoding: [0xc3]
13129   %1 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3)
13130   %2 = bitcast i8 %x3 to <8 x i1>
13131   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
13132   %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
13133   %4 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3)
13134   %5 = bitcast i8 %x3 to <8 x i1>
13135   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
13136   %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer
13137   %7 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3)
13138   %res3 = add <2 x i64> %3, %6
13139   %res4 = add <2 x i64> %res3, %7
13140   ret <2 x i64> %res4
13141 }
13142
13143 declare <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64>, i32)
13144
13145 define <4 x i64>@test_int_x86_avx512_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
13146 ; X86-LABEL: test_int_x86_avx512_pror_q_256:
13147 ; X86:       # %bb.0:
13148 ; X86-NEXT:    vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03]
13149 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
13150 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13151 ; X86-NEXT:    vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
13152 ; X86-NEXT:    vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03]
13153 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
13154 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
13155 ; X86-NEXT:    retl # encoding: [0xc3]
13156 ;
13157 ; X64-LABEL: test_int_x86_avx512_pror_q_256:
13158 ; X64:       # %bb.0:
13159 ; X64-NEXT:    vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03]
13160 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13161 ; X64-NEXT:    vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
13162 ; X64-NEXT:    vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03]
13163 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
13164 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
13165 ; X64-NEXT:    retq # encoding: [0xc3]
13166   %1 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3)
13167   %2 = bitcast i8 %x3 to <8 x i1>
13168   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13169   %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
13170   %4 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3)
13171   %5 = bitcast i8 %x3 to <8 x i1>
13172   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13173   %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
13174   %7 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3)
13175   %res3 = add <4 x i64> %3, %6
13176   %res4 = add <4 x i64> %res3, %7
13177   ret <4 x i64> %res4
13178 }
13179
13180 declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
13181
13182 define <8 x float> @test_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
13183 ; CHECK-LABEL: test_vfmadd256_ps:
13184 ; CHECK:       # %bb.0:
13185 ; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
13186 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
13187 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13188   %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
13189   ret <8 x float> %res
13190 }
13191
13192 define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
13193 ; X86-LABEL: test_mask_vfmadd256_ps:
13194 ; X86:       # %bb.0:
13195 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13196 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13197 ; X86-NEXT:    vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
13198 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
13199 ; X86-NEXT:    retl # encoding: [0xc3]
13200 ;
13201 ; X64-LABEL: test_mask_vfmadd256_ps:
13202 ; X64:       # %bb.0:
13203 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13204 ; X64-NEXT:    vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
13205 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
13206 ; X64-NEXT:    retq # encoding: [0xc3]
13207   %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
13208   ret <8 x float> %res
13209 }
13210
13211 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
13212
13213 define <4 x float> @test_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
13214 ; CHECK-LABEL: test_vfmadd128_ps:
13215 ; CHECK:       # %bb.0:
13216 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
13217 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
13218 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13219   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
13220   ret <4 x float> %res
13221 }
13222
13223 define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
13224 ; X86-LABEL: test_mask_vfmadd128_ps:
13225 ; X86:       # %bb.0:
13226 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13227 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13228 ; X86-NEXT:    vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
13229 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
13230 ; X86-NEXT:    retl # encoding: [0xc3]
13231 ;
13232 ; X64-LABEL: test_mask_vfmadd128_ps:
13233 ; X64:       # %bb.0:
13234 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13235 ; X64-NEXT:    vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
13236 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
13237 ; X64-NEXT:    retq # encoding: [0xc3]
13238   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
13239   ret <4 x float> %res
13240 }
13241
13242 declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
13243
13244 define <4 x double> @test_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
13245 ; CHECK-LABEL: test_fmadd256_pd:
13246 ; CHECK:       # %bb.0:
13247 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
13248 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
13249 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13250   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 -1)
13251   ret <4 x double> %res
13252 }
13253
13254 define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
13255 ; X86-LABEL: test_mask_fmadd256_pd:
13256 ; X86:       # %bb.0:
13257 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13258 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13259 ; X86-NEXT:    vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
13260 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
13261 ; X86-NEXT:    retl # encoding: [0xc3]
13262 ;
13263 ; X64-LABEL: test_mask_fmadd256_pd:
13264 ; X64:       # %bb.0:
13265 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13266 ; X64-NEXT:    vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
13267 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
13268 ; X64-NEXT:    retq # encoding: [0xc3]
13269   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
13270   ret <4 x double> %res
13271 }
13272
13273 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
13274
13275 define <2 x double> @test_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
13276 ; CHECK-LABEL: test_fmadd128_pd:
13277 ; CHECK:       # %bb.0:
13278 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
13279 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
13280 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13281   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1)
13282   ret <2 x double> %res
13283 }
13284
13285 define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
13286 ; X86-LABEL: test_mask_fmadd128_pd:
13287 ; X86:       # %bb.0:
13288 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13289 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13290 ; X86-NEXT:    vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
13291 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
13292 ; X86-NEXT:    retl # encoding: [0xc3]
13293 ;
13294 ; X64-LABEL: test_mask_fmadd128_pd:
13295 ; X64:       # %bb.0:
13296 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13297 ; X64-NEXT:    vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
13298 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
13299 ; X64-NEXT:    retq # encoding: [0xc3]
13300   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
13301   ret <2 x double> %res
13302 }
13303
13304 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
13305
13306 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
13307 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
13308 ; X86:       # %bb.0:
13309 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13310 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13311 ; X86-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
13312 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
13313 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13314 ; X86-NEXT:    retl # encoding: [0xc3]
13315 ;
13316 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
13317 ; X64:       # %bb.0:
13318 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13319 ; X64-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
13320 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
13321 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13322 ; X64-NEXT:    retq # encoding: [0xc3]
13323   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
13324   ret <2 x double> %res
13325 }
13326
13327 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
13328
13329 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
13330 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
13331 ; X86:       # %bb.0:
13332 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13333 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13334 ; X86-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
13335 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
13336 ; X86-NEXT:    retl # encoding: [0xc3]
13337 ;
13338 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
13339 ; X64:       # %bb.0:
13340 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13341 ; X64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
13342 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
13343 ; X64-NEXT:    retq # encoding: [0xc3]
13344   %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
13345   ret <2 x double> %res
13346 }
13347
13348 declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
13349
13350 define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
13351 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
13352 ; X86:       # %bb.0:
13353 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13354 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13355 ; X86-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
13356 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
13357 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13358 ; X86-NEXT:    retl # encoding: [0xc3]
13359 ;
13360 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
13361 ; X64:       # %bb.0:
13362 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13363 ; X64-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
13364 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
13365 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13366 ; X64-NEXT:    retq # encoding: [0xc3]
13367   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
13368   ret <4 x double> %res
13369 }
13370
13371 declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
13372
13373 define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
13374 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
13375 ; X86:       # %bb.0:
13376 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13377 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13378 ; X86-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
13379 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
13380 ; X86-NEXT:    retl # encoding: [0xc3]
13381 ;
13382 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
13383 ; X64:       # %bb.0:
13384 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13385 ; X64-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
13386 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
13387 ; X64-NEXT:    retq # encoding: [0xc3]
13388   %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
13389   ret <4 x double> %res
13390 }
13391
13392 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
13393
13394 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
13395 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
13396 ; X86:       # %bb.0:
13397 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13398 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13399 ; X86-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
13400 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
13401 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13402 ; X86-NEXT:    retl # encoding: [0xc3]
13403 ;
13404 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
13405 ; X64:       # %bb.0:
13406 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13407 ; X64-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
13408 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
13409 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13410 ; X64-NEXT:    retq # encoding: [0xc3]
13411   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
13412   ret <4 x float> %res
13413 }
13414
13415 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
13416
13417 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
13418 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
13419 ; X86:       # %bb.0:
13420 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13421 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13422 ; X86-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
13423 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
13424 ; X86-NEXT:    retl # encoding: [0xc3]
13425 ;
13426 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
13427 ; X64:       # %bb.0:
13428 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13429 ; X64-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
13430 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
13431 ; X64-NEXT:    retq # encoding: [0xc3]
13432   %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
13433   ret <4 x float> %res
13434 }
13435
13436 declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
13437
13438 define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
13439 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
13440 ; X86:       # %bb.0:
13441 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13442 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13443 ; X86-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
13444 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
13445 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13446 ; X86-NEXT:    retl # encoding: [0xc3]
13447 ;
13448 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
13449 ; X64:       # %bb.0:
13450 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13451 ; X64-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
13452 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
13453 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13454 ; X64-NEXT:    retq # encoding: [0xc3]
13455   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
13456   ret <8 x float> %res
13457 }
13458
13459 declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
13460
13461 define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
13462 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
13463 ; X86:       # %bb.0:
13464 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13465 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13466 ; X86-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
13467 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
13468 ; X86-NEXT:    retl # encoding: [0xc3]
13469 ;
13470 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
13471 ; X64:       # %bb.0:
13472 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13473 ; X64-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
13474 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
13475 ; X64-NEXT:    retq # encoding: [0xc3]
13476   %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
13477   ret <8 x float> %res
13478 }
13479
13480
13481 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
13482
13483 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
13484 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
13485 ; X86:       # %bb.0:
13486 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13487 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13488 ; X86-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
13489 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
13490 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13491 ; X86-NEXT:    retl # encoding: [0xc3]
13492 ;
13493 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
13494 ; X64:       # %bb.0:
13495 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13496 ; X64-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
13497 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
13498 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13499 ; X64-NEXT:    retq # encoding: [0xc3]
13500   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
13501   ret <2 x double> %res
13502 }
13503
13504
13505 declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
13506
13507 define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
13508 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
13509 ; X86:       # %bb.0:
13510 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13511 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13512 ; X86-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
13513 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
13514 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13515 ; X86-NEXT:    retl # encoding: [0xc3]
13516 ;
13517 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
13518 ; X64:       # %bb.0:
13519 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13520 ; X64-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
13521 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
13522 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13523 ; X64-NEXT:    retq # encoding: [0xc3]
13524   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
13525   ret <4 x double> %res
13526 }
13527
13528 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
13529
13530 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
13531 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
13532 ; X86:       # %bb.0:
13533 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13534 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13535 ; X86-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
13536 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
13537 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13538 ; X86-NEXT:    retl # encoding: [0xc3]
13539 ;
13540 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
13541 ; X64:       # %bb.0:
13542 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13543 ; X64-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
13544 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
13545 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13546 ; X64-NEXT:    retq # encoding: [0xc3]
13547   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
13548   ret <4 x float> %res
13549 }
13550
13551 declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
13552
13553 define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
13554 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
13555 ; X86:       # %bb.0:
13556 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13557 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13558 ; X86-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
13559 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
13560 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13561 ; X86-NEXT:    retl # encoding: [0xc3]
13562 ;
13563 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
13564 ; X64:       # %bb.0:
13565 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13566 ; X64-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
13567 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
13568 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13569 ; X64-NEXT:    retq # encoding: [0xc3]
13570   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
13571   ret <8 x float> %res
13572 }
13573
13574 declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
13575
13576 define <8 x float> @test_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
13577 ; CHECK-LABEL: test_vfnmadd256_ps:
13578 ; CHECK:       # %bb.0:
13579 ; CHECK-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2]
13580 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) + ymm2
13581 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13582   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
13583   ret <8 x float> %res
13584 }
13585
13586 define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
13587 ; X86-LABEL: test_mask_vfnmadd256_ps:
13588 ; X86:       # %bb.0:
13589 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13590 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13591 ; X86-NEXT:    vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
13592 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
13593 ; X86-NEXT:    retl # encoding: [0xc3]
13594 ;
13595 ; X64-LABEL: test_mask_vfnmadd256_ps:
13596 ; X64:       # %bb.0:
13597 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13598 ; X64-NEXT:    vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
13599 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
13600 ; X64-NEXT:    retq # encoding: [0xc3]
13601   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
13602   ret <8 x float> %res
13603 }
13604
13605 declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
13606
13607 define <4 x float> @test_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
13608 ; CHECK-LABEL: test_vfnmadd128_ps:
13609 ; CHECK:       # %bb.0:
13610 ; CHECK-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2]
13611 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
13612 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13613   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
13614   ret <4 x float> %res
13615 }
13616
13617 define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
13618 ; X86-LABEL: test_mask_vfnmadd128_ps:
13619 ; X86:       # %bb.0:
13620 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13621 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13622 ; X86-NEXT:    vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
13623 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
13624 ; X86-NEXT:    retl # encoding: [0xc3]
13625 ;
13626 ; X64-LABEL: test_mask_vfnmadd128_ps:
13627 ; X64:       # %bb.0:
13628 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13629 ; X64-NEXT:    vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
13630 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
13631 ; X64-NEXT:    retq # encoding: [0xc3]
13632   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
13633   ret <4 x float> %res
13634 }
13635
13636 declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
13637
13638 define <4 x double> @test_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
13639 ; CHECK-LABEL: test_vfnmadd256_pd:
13640 ; CHECK:       # %bb.0:
13641 ; CHECK-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
13642 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) + ymm2
13643 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13644   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
13645   ret <4 x double> %res
13646 }
13647
13648 define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
13649 ; X86-LABEL: test_mask_vfnmadd256_pd:
13650 ; X86:       # %bb.0:
13651 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13652 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13653 ; X86-NEXT:    vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
13654 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
13655 ; X86-NEXT:    retl # encoding: [0xc3]
13656 ;
13657 ; X64-LABEL: test_mask_vfnmadd256_pd:
13658 ; X64:       # %bb.0:
13659 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13660 ; X64-NEXT:    vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
13661 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
13662 ; X64-NEXT:    retq # encoding: [0xc3]
13663   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
13664   ret <4 x double> %res
13665 }
13666
13667 declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
13668
13669 define <2 x double> @test_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
13670 ; CHECK-LABEL: test_vfnmadd128_pd:
13671 ; CHECK:       # %bb.0:
13672 ; CHECK-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
13673 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
13674 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13675   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
13676   ret <2 x double> %res
13677 }
13678
13679 define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
13680 ; X86-LABEL: test_mask_vfnmadd128_pd:
13681 ; X86:       # %bb.0:
13682 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13683 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13684 ; X86-NEXT:    vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
13685 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
13686 ; X86-NEXT:    retl # encoding: [0xc3]
13687 ;
13688 ; X64-LABEL: test_mask_vfnmadd128_pd:
13689 ; X64:       # %bb.0:
13690 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13691 ; X64-NEXT:    vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
13692 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
13693 ; X64-NEXT:    retq # encoding: [0xc3]
13694   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
13695   ret <2 x double> %res
13696 }
13697
13698 declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
13699
13700 define <8 x float> @test_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
13701 ; CHECK-LABEL: test_vfnmsub256_ps:
13702 ; CHECK:       # %bb.0:
13703 ; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2]
13704 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) - ymm2
13705 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13706   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
13707   ret <8 x float> %res
13708 }
13709
13710 define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
13711 ; X86-LABEL: test_mask_vfnmsub256_ps:
13712 ; X86:       # %bb.0:
13713 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13714 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13715 ; X86-NEXT:    vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
13716 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
13717 ; X86-NEXT:    retl # encoding: [0xc3]
13718 ;
13719 ; X64-LABEL: test_mask_vfnmsub256_ps:
13720 ; X64:       # %bb.0:
13721 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13722 ; X64-NEXT:    vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
13723 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
13724 ; X64-NEXT:    retq # encoding: [0xc3]
13725   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
13726   ret <8 x float> %res
13727 }
13728
13729 declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
13730
13731 define <4 x float> @test_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
13732 ; CHECK-LABEL: test_vfnmsub128_ps:
13733 ; CHECK:       # %bb.0:
13734 ; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2]
13735 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
13736 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13737   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
13738   ret <4 x float> %res
13739 }
13740
13741 define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
13742 ; X86-LABEL: test_mask_vfnmsub128_ps:
13743 ; X86:       # %bb.0:
13744 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13745 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13746 ; X86-NEXT:    vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
13747 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
13748 ; X86-NEXT:    retl # encoding: [0xc3]
13749 ;
13750 ; X64-LABEL: test_mask_vfnmsub128_ps:
13751 ; X64:       # %bb.0:
13752 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13753 ; X64-NEXT:    vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
13754 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
13755 ; X64-NEXT:    retq # encoding: [0xc3]
13756   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
13757   ret <4 x float> %res
13758 }
13759
13760 declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
13761
13762 define <4 x double> @test_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
13763 ; CHECK-LABEL: test_vfnmsub256_pd:
13764 ; CHECK:       # %bb.0:
13765 ; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
13766 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) - ymm2
13767 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13768   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
13769   ret <4 x double> %res
13770 }
13771
13772 define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
13773 ; X86-LABEL: test_mask_vfnmsub256_pd:
13774 ; X86:       # %bb.0:
13775 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13776 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13777 ; X86-NEXT:    vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
13778 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
13779 ; X86-NEXT:    retl # encoding: [0xc3]
13780 ;
13781 ; X64-LABEL: test_mask_vfnmsub256_pd:
13782 ; X64:       # %bb.0:
13783 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13784 ; X64-NEXT:    vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
13785 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
13786 ; X64-NEXT:    retq # encoding: [0xc3]
13787   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
13788   ret <4 x double> %res
13789 }
13790
13791 declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
13792
13793 define <2 x double> @test_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
13794 ; CHECK-LABEL: test_vfnmsub128_pd:
13795 ; CHECK:       # %bb.0:
13796 ; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
13797 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
13798 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13799   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
13800   ret <2 x double> %res
13801 }
13802
13803 define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
13804 ; X86-LABEL: test_mask_vfnmsub128_pd:
13805 ; X86:       # %bb.0:
13806 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13807 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13808 ; X86-NEXT:    vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
13809 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
13810 ; X86-NEXT:    retl # encoding: [0xc3]
13811 ;
13812 ; X64-LABEL: test_mask_vfnmsub128_pd:
13813 ; X64:       # %bb.0:
13814 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13815 ; X64-NEXT:    vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
13816 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
13817 ; X64-NEXT:    retq # encoding: [0xc3]
13818   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
13819   ret <2 x double> %res
13820 }
13821
13822 declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
13823
13824 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
13825 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
13826 ; X86:       # %bb.0:
13827 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13828 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13829 ; X86-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
13830 ; X86-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
13831 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13832 ; X86-NEXT:    retl # encoding: [0xc3]
13833 ;
13834 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
13835 ; X64:       # %bb.0:
13836 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13837 ; X64-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
13838 ; X64-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
13839 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13840 ; X64-NEXT:    retq # encoding: [0xc3]
13841   %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
13842   ret <2 x double> %res
13843 }
13844
13845 declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
13846
13847 define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
13848 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
13849 ; X86:       # %bb.0:
13850 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13851 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13852 ; X86-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
13853 ; X86-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
13854 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13855 ; X86-NEXT:    retl # encoding: [0xc3]
13856 ;
13857 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
13858 ; X64:       # %bb.0:
13859 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13860 ; X64-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
13861 ; X64-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
13862 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13863 ; X64-NEXT:    retq # encoding: [0xc3]
13864   %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
13865   ret <4 x double> %res
13866 }
13867
13868 declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
13869
13870 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
13871 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
13872 ; X86:       # %bb.0:
13873 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13874 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13875 ; X86-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
13876 ; X86-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
13877 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13878 ; X86-NEXT:    retl # encoding: [0xc3]
13879 ;
13880 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
13881 ; X64:       # %bb.0:
13882 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13883 ; X64-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
13884 ; X64-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
13885 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13886 ; X64-NEXT:    retq # encoding: [0xc3]
13887   %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
13888   ret <4 x float> %res
13889 }
13890
13891 declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
13892
13893 define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
13894 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
13895 ; X86:       # %bb.0:
13896 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13897 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13898 ; X86-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
13899 ; X86-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
13900 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13901 ; X86-NEXT:    retl # encoding: [0xc3]
13902 ;
13903 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
13904 ; X64:       # %bb.0:
13905 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13906 ; X64-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
13907 ; X64-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
13908 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13909 ; X64-NEXT:    retq # encoding: [0xc3]
13910   %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
13911   ret <8 x float> %res
13912 }
13913
13914 declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
13915
13916 define <8 x float> @test_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
13917 ; CHECK-LABEL: test_fmaddsub256_ps:
13918 ; CHECK:       # %bb.0:
13919 ; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
13920 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
13921 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13922   %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 -1)
13923   ret <8 x float> %res
13924 }
13925
13926 define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
13927 ; X86-LABEL: test_mask_fmaddsub256_ps:
13928 ; X86:       # %bb.0:
13929 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13930 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13931 ; X86-NEXT:    vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
13932 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
13933 ; X86-NEXT:    retl # encoding: [0xc3]
13934 ;
13935 ; X64-LABEL: test_mask_fmaddsub256_ps:
13936 ; X64:       # %bb.0:
13937 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13938 ; X64-NEXT:    vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
13939 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
13940 ; X64-NEXT:    retq # encoding: [0xc3]
13941   %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
13942   ret <8 x float> %res
13943 }
13944
13945 declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
13946
13947 define <4 x float> @test_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
13948 ; CHECK-LABEL: test_fmaddsub128_ps:
13949 ; CHECK:       # %bb.0:
13950 ; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
13951 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
13952 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13953   %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 -1)
13954   ret <4 x float> %res
13955 }
13956
13957 define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
13958 ; X86-LABEL: test_mask_fmaddsub128_ps:
13959 ; X86:       # %bb.0:
13960 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13961 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13962 ; X86-NEXT:    vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
13963 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
13964 ; X86-NEXT:    retl # encoding: [0xc3]
13965 ;
13966 ; X64-LABEL: test_mask_fmaddsub128_ps:
13967 ; X64:       # %bb.0:
13968 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13969 ; X64-NEXT:    vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
13970 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
13971 ; X64-NEXT:    retq # encoding: [0xc3]
13972   %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
13973   ret <4 x float> %res
13974 }
13975
13976 declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
13977
13978 define <4 x double> @test_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
13979 ; CHECK-LABEL: test_vfmaddsub256_pd:
13980 ; CHECK:       # %bb.0:
13981 ; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
13982 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
13983 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13984   %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
13985   ret <4 x double> %res
13986 }
13987
13988 define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
13989 ; X86-LABEL: test_mask_vfmaddsub256_pd:
13990 ; X86:       # %bb.0:
13991 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13992 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13993 ; X86-NEXT:    vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
13994 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
13995 ; X86-NEXT:    retl # encoding: [0xc3]
13996 ;
13997 ; X64-LABEL: test_mask_vfmaddsub256_pd:
13998 ; X64:       # %bb.0:
13999 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14000 ; X64-NEXT:    vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
14001 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
14002 ; X64-NEXT:    retq # encoding: [0xc3]
14003   %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
14004   ret <4 x double> %res
14005 }
14006
14007 declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
14008
14009 define <2 x double> @test_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
14010 ; CHECK-LABEL: test_vfmaddsub128_pd:
14011 ; CHECK:       # %bb.0:
14012 ; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
14013 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
14014 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14015   %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
14016   ret <2 x double> %res
14017 }
14018
14019 define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
14020 ; X86-LABEL: test_mask_vfmaddsub128_pd:
14021 ; X86:       # %bb.0:
14022 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14023 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14024 ; X86-NEXT:    vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
14025 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
14026 ; X86-NEXT:    retl # encoding: [0xc3]
14027 ;
14028 ; X64-LABEL: test_mask_vfmaddsub128_pd:
14029 ; X64:       # %bb.0:
14030 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14031 ; X64-NEXT:    vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
14032 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
14033 ; X64-NEXT:    retq # encoding: [0xc3]
14034   %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
14035   ret <2 x double> %res
14036 }
14037
14038 declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
14039
14040 define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
14041 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
14042 ; X86:       # %bb.0:
14043 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14044 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14045 ; X86-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
14046 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
14047 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
14048 ; X86-NEXT:    retl # encoding: [0xc3]
14049 ;
14050 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
14051 ; X64:       # %bb.0:
14052 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14053 ; X64-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
14054 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
14055 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
14056 ; X64-NEXT:    retq # encoding: [0xc3]
14057   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
14058   ret <2 x double> %res
14059 }
14060
14061 declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
14062
14063 define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
14064 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
14065 ; X86:       # %bb.0:
14066 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14067 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14068 ; X86-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
14069 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
14070 ; X86-NEXT:    retl # encoding: [0xc3]
14071 ;
14072 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
14073 ; X64:       # %bb.0:
14074 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14075 ; X64-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
14076 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
14077 ; X64-NEXT:    retq # encoding: [0xc3]
14078   %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
14079   ret <2 x double> %res
14080 }
14081
14082 declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
14083
14084 define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
14085 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
14086 ; X86:       # %bb.0:
14087 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14088 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14089 ; X86-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
14090 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
14091 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
14092 ; X86-NEXT:    retl # encoding: [0xc3]
14093 ;
14094 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
14095 ; X64:       # %bb.0:
14096 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14097 ; X64-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
14098 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
14099 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
14100 ; X64-NEXT:    retq # encoding: [0xc3]
14101   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
14102   ret <4 x double> %res
14103 }
14104
14105 declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
14106
14107 define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
14108 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
14109 ; X86:       # %bb.0:
14110 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14111 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14112 ; X86-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
14113 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
14114 ; X86-NEXT:    retl # encoding: [0xc3]
14115 ;
14116 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
14117 ; X64:       # %bb.0:
14118 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14119 ; X64-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
14120 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
14121 ; X64-NEXT:    retq # encoding: [0xc3]
14122   %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
14123   ret <4 x double> %res
14124 }
14125
14126 declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
14127
14128 define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
14129 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
14130 ; X86:       # %bb.0:
14131 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14132 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14133 ; X86-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
14134 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
14135 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
14136 ; X86-NEXT:    retl # encoding: [0xc3]
14137 ;
14138 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
14139 ; X64:       # %bb.0:
14140 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14141 ; X64-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
14142 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
14143 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
14144 ; X64-NEXT:    retq # encoding: [0xc3]
14145   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
14146   ret <4 x float> %res
14147 }
14148
14149 declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
14150
14151 define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
14152 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
14153 ; X86:       # %bb.0:
14154 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14155 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14156 ; X86-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
14157 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
14158 ; X86-NEXT:    retl # encoding: [0xc3]
14159 ;
14160 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
14161 ; X64:       # %bb.0:
14162 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14163 ; X64-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
14164 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
14165 ; X64-NEXT:    retq # encoding: [0xc3]
14166   %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
14167   ret <4 x float> %res
14168 }
14169
14170 declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
14171
14172 define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
14173 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
14174 ; X86:       # %bb.0:
14175 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14176 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14177 ; X86-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
14178 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
14179 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
14180 ; X86-NEXT:    retl # encoding: [0xc3]
14181 ;
14182 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
14183 ; X64:       # %bb.0:
14184 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14185 ; X64-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
14186 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
14187 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
14188 ; X64-NEXT:    retq # encoding: [0xc3]
14189   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
14190   ret <8 x float> %res
14191 }
14192
14193 declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
14194
14195 define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
14196 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
14197 ; X86:       # %bb.0:
14198 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14199 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14200 ; X86-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
14201 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
14202 ; X86-NEXT:    retl # encoding: [0xc3]
14203 ;
14204 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
14205 ; X64:       # %bb.0:
14206 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14207 ; X64-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
14208 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
14209 ; X64-NEXT:    retq # encoding: [0xc3]
14210   %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
14211   ret <8 x float> %res
14212 }
14213
14214 declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
14215
14216 define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
14217 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
14218 ; X86:       # %bb.0:
14219 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14220 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14221 ; X86-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
14222 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
14223 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
14224 ; X86-NEXT:    retl # encoding: [0xc3]
14225 ;
14226 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
14227 ; X64:       # %bb.0:
14228 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14229 ; X64-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
14230 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
14231 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
14232 ; X64-NEXT:    retq # encoding: [0xc3]
14233   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
14234   ret <2 x double> %res
14235 }
14236
14237 declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
14238
14239 define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
14240 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
14241 ; X86:       # %bb.0:
14242 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14243 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14244 ; X86-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
14245 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
14246 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
14247 ; X86-NEXT:    retl # encoding: [0xc3]
14248 ;
14249 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
14250 ; X64:       # %bb.0:
14251 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14252 ; X64-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
14253 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
14254 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
14255 ; X64-NEXT:    retq # encoding: [0xc3]
14256   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
14257   ret <4 x double> %res
14258 }
14259
14260 declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
14261
14262 define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
14263 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
14264 ; X86:       # %bb.0:
14265 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14266 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14267 ; X86-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
14268 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
14269 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
14270 ; X86-NEXT:    retl # encoding: [0xc3]
14271 ;
14272 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
14273 ; X64:       # %bb.0:
14274 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14275 ; X64-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
14276 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
14277 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
14278 ; X64-NEXT:    retq # encoding: [0xc3]
14279   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
14280   ret <4 x float> %res
14281 }
14282
14283 declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
14284
14285 define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
14286 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
14287 ; X86:       # %bb.0:
14288 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14289 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14290 ; X86-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
14291 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
14292 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
14293 ; X86-NEXT:    retl # encoding: [0xc3]
14294 ;
14295 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
14296 ; X64:       # %bb.0:
14297 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14298 ; X64-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
14299 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
14300 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
14301 ; X64-NEXT:    retq # encoding: [0xc3]
14302   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
14303   ret <8 x float> %res
14304 }
14305
14306
14307 define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
14308 ; X86-LABEL: test_mask_vfmadd128_ps_rmk:
14309 ; X86:       # %bb.0:
14310 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14311 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14312 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14313 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
14314 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14315 ; X86-NEXT:    retl # encoding: [0xc3]
14316 ;
14317 ; X64-LABEL: test_mask_vfmadd128_ps_rmk:
14318 ; X64:       # %bb.0:
14319 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14320 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
14321 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14322 ; X64-NEXT:    retq # encoding: [0xc3]
14323   %a2 = load <4 x float>, <4 x float>* %ptr_a2
14324   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
14325   ret <4 x float> %res
14326 }
14327
14328 define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
14329 ; X86-LABEL: test_mask_vfmadd128_ps_rmka:
14330 ; X86:       # %bb.0:
14331 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14332 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14333 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14334 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
14335 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14336 ; X86-NEXT:    retl # encoding: [0xc3]
14337 ;
14338 ; X64-LABEL: test_mask_vfmadd128_ps_rmka:
14339 ; X64:       # %bb.0:
14340 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14341 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
14342 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14343 ; X64-NEXT:    retq # encoding: [0xc3]
14344   %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
14345   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
14346   ret <4 x float> %res
14347 }
14348
14349 define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
14350 ; X86-LABEL: test_mask_vfmadd128_ps_rmkz:
14351 ; X86:       # %bb.0:
14352 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14353 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
14354 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14355 ; X86-NEXT:    retl # encoding: [0xc3]
14356 ;
14357 ; X64-LABEL: test_mask_vfmadd128_ps_rmkz:
14358 ; X64:       # %bb.0:
14359 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
14360 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14361 ; X64-NEXT:    retq # encoding: [0xc3]
14362   %a2 = load <4 x float>, <4 x float>* %ptr_a2
14363   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
14364   ret <4 x float> %res
14365 }
14366
14367 define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
14368 ; X86-LABEL: test_mask_vfmadd128_ps_rmkza:
14369 ; X86:       # %bb.0:
14370 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14371 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
14372 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14373 ; X86-NEXT:    retl # encoding: [0xc3]
14374 ;
14375 ; X64-LABEL: test_mask_vfmadd128_ps_rmkza:
14376 ; X64:       # %bb.0:
14377 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
14378 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14379 ; X64-NEXT:    retq # encoding: [0xc3]
14380   %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
14381   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
14382   ret <4 x float> %res
14383 }
14384
14385 define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
14386 ; X86-LABEL: test_mask_vfmadd128_ps_rmb:
14387 ; X86:       # %bb.0:
14388 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14389 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14390 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14391 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
14392 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14393 ; X86-NEXT:    retl # encoding: [0xc3]
14394 ;
14395 ; X64-LABEL: test_mask_vfmadd128_ps_rmb:
14396 ; X64:       # %bb.0:
14397 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14398 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
14399 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14400 ; X64-NEXT:    retq # encoding: [0xc3]
14401   %q = load float, float* %ptr_a2
14402   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
14403   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
14404   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
14405   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
14406   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
14407   ret <4 x float> %res
14408 }
14409
14410 define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
14411 ; X86-LABEL: test_mask_vfmadd128_ps_rmba:
14412 ; X86:       # %bb.0:
14413 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14414 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14415 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14416 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
14417 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14418 ; X86-NEXT:    retl # encoding: [0xc3]
14419 ;
14420 ; X64-LABEL: test_mask_vfmadd128_ps_rmba:
14421 ; X64:       # %bb.0:
14422 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14423 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
14424 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14425 ; X64-NEXT:    retq # encoding: [0xc3]
14426   %q = load float, float* %ptr_a2, align 4
14427   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
14428   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
14429   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
14430   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
14431   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
14432   ret <4 x float> %res
14433 }
14434
14435 define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
14436 ; X86-LABEL: test_mask_vfmadd128_ps_rmbz:
14437 ; X86:       # %bb.0:
14438 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14439 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
14440 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14441 ; X86-NEXT:    retl # encoding: [0xc3]
14442 ;
14443 ; X64-LABEL: test_mask_vfmadd128_ps_rmbz:
14444 ; X64:       # %bb.0:
14445 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
14446 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14447 ; X64-NEXT:    retq # encoding: [0xc3]
14448   %q = load float, float* %ptr_a2
14449   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
14450   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
14451   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
14452   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
14453   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
14454   ret <4 x float> %res
14455 }
14456
14457 define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
14458 ; X86-LABEL: test_mask_vfmadd128_ps_rmbza:
14459 ; X86:       # %bb.0:
14460 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14461 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
14462 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14463 ; X86-NEXT:    retl # encoding: [0xc3]
14464 ;
14465 ; X64-LABEL: test_mask_vfmadd128_ps_rmbza:
14466 ; X64:       # %bb.0:
14467 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
14468 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14469 ; X64-NEXT:    retq # encoding: [0xc3]
14470   %q = load float, float* %ptr_a2, align 4
14471   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
14472   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
14473   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
14474   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
14475   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
14476   ret <4 x float> %res
14477 }
14478
14479 define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
14480 ; X86-LABEL: test_mask_vfmadd128_pd_rmk:
14481 ; X86:       # %bb.0:
14482 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14483 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14484 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14485 ; X86-NEXT:    vfmadd213pd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x00]
14486 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14487 ; X86-NEXT:    retl # encoding: [0xc3]
14488 ;
14489 ; X64-LABEL: test_mask_vfmadd128_pd_rmk:
14490 ; X64:       # %bb.0:
14491 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14492 ; X64-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
14493 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14494 ; X64-NEXT:    retq # encoding: [0xc3]
14495   %a2 = load <2 x double>, <2 x double>* %ptr_a2
14496   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
14497   ret <2 x double> %res
14498 }
14499
14500 define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
14501 ; X86-LABEL: test_mask_vfmadd128_pd_rmkz:
14502 ; X86:       # %bb.0:
14503 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14504 ; X86-NEXT:    vfmadd213pd (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x00]
14505 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14506 ; X86-NEXT:    retl # encoding: [0xc3]
14507 ;
14508 ; X64-LABEL: test_mask_vfmadd128_pd_rmkz:
14509 ; X64:       # %bb.0:
14510 ; X64-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
14511 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
14512 ; X64-NEXT:    retq # encoding: [0xc3]
14513   %a2 = load <2 x double>, <2 x double>* %ptr_a2
14514   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
14515   ret <2 x double> %res
14516 }
14517
14518 define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
14519 ; X86-LABEL: test_mask_vfmadd256_pd_rmk:
14520 ; X86:       # %bb.0:
14521 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14522 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14523 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14524 ; X86-NEXT:    vfmadd213pd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x00]
14525 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
14526 ; X86-NEXT:    retl # encoding: [0xc3]
14527 ;
14528 ; X64-LABEL: test_mask_vfmadd256_pd_rmk:
14529 ; X64:       # %bb.0:
14530 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14531 ; X64-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
14532 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
14533 ; X64-NEXT:    retq # encoding: [0xc3]
14534   %a2 = load <4 x double>, <4 x double>* %ptr_a2
14535   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
14536   ret <4 x double> %res
14537 }
14538
14539 define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
14540 ; X86-LABEL: test_mask_vfmadd256_pd_rmkz:
14541 ; X86:       # %bb.0:
14542 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14543 ; X86-NEXT:    vfmadd213pd (%eax), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x00]
14544 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
14545 ; X86-NEXT:    retl # encoding: [0xc3]
14546 ;
14547 ; X64-LABEL: test_mask_vfmadd256_pd_rmkz:
14548 ; X64:       # %bb.0:
14549 ; X64-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
14550 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
14551 ; X64-NEXT:    retq # encoding: [0xc3]
14552   %a2 = load <4 x double>, <4 x double>* %ptr_a2
14553   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
14554   ret <4 x double> %res
14555 }
14556
14557 declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8)
14558
14559 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
14560 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
14561 ; X86:       # %bb.0:
14562 ; X86-NEXT:    vpmovqd %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc2]
14563 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14564 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14565 ; X86-NEXT:    vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
14566 ; X86-NEXT:    vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
14567 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
14568 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
14569 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
14570 ; X86-NEXT:    retl # encoding: [0xc3]
14571 ;
14572 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
14573 ; X64:       # %bb.0:
14574 ; X64-NEXT:    vpmovqd %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc2]
14575 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14576 ; X64-NEXT:    vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
14577 ; X64-NEXT:    vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
14578 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
14579 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
14580 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
14581 ; X64-NEXT:    retq # encoding: [0xc3]
14582   %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
14583   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
14584   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
14585   %res3 = add <4 x i32> %res0, %res1
14586   %res4 = add <4 x i32> %res3, %res2
14587   ret <4 x i32> %res4
14588 }
14589
14590 define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
14591 ; X86-LABEL: test_mask_compress_pd_128:
14592 ; X86:       # %bb.0:
14593 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14594 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14595 ; X86-NEXT:    vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
14596 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14597 ; X86-NEXT:    retl # encoding: [0xc3]
14598 ;
14599 ; X64-LABEL: test_mask_compress_pd_128:
14600 ; X64:       # %bb.0:
14601 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14602 ; X64-NEXT:    vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
14603 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14604 ; X64-NEXT:    retq # encoding: [0xc3]
14605   %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
14606   ret <2 x double> %res
14607 }
14608
14609 define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
14610 ; X86-LABEL: test_maskz_compress_pd_128:
14611 ; X86:       # %bb.0:
14612 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14613 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14614 ; X86-NEXT:    vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
14615 ; X86-NEXT:    retl # encoding: [0xc3]
14616 ;
14617 ; X64-LABEL: test_maskz_compress_pd_128:
14618 ; X64:       # %bb.0:
14619 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14620 ; X64-NEXT:    vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
14621 ; X64-NEXT:    retq # encoding: [0xc3]
14622   %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
14623   ret <2 x double> %res
14624 }
14625
14626 define <2 x double> @test_compress_pd_128(<2 x double> %data) {
14627 ; CHECK-LABEL: test_compress_pd_128:
14628 ; CHECK:       # %bb.0:
14629 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14630   %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
14631   ret <2 x double> %res
14632 }
14633
14634 declare <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
14635
14636 define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
14637 ; X86-LABEL: test_mask_compress_ps_128:
14638 ; X86:       # %bb.0:
14639 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14640 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14641 ; X86-NEXT:    vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
14642 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14643 ; X86-NEXT:    retl # encoding: [0xc3]
14644 ;
14645 ; X64-LABEL: test_mask_compress_ps_128:
14646 ; X64:       # %bb.0:
14647 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14648 ; X64-NEXT:    vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
14649 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14650 ; X64-NEXT:    retq # encoding: [0xc3]
14651   %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
14652   ret <4 x float> %res
14653 }
14654
14655 define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
14656 ; X86-LABEL: test_maskz_compress_ps_128:
14657 ; X86:       # %bb.0:
14658 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14659 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14660 ; X86-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
14661 ; X86-NEXT:    retl # encoding: [0xc3]
14662 ;
14663 ; X64-LABEL: test_maskz_compress_ps_128:
14664 ; X64:       # %bb.0:
14665 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14666 ; X64-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
14667 ; X64-NEXT:    retq # encoding: [0xc3]
14668   %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
14669   ret <4 x float> %res
14670 }
14671
14672 define <4 x float> @test_compress_ps_128(<4 x float> %data) {
14673 ; CHECK-LABEL: test_compress_ps_128:
14674 ; CHECK:       # %bb.0:
14675 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14676   %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
14677   ret <4 x float> %res
14678 }
14679
14680 declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
14681
14682 define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
14683 ; X86-LABEL: test_mask_compress_q_128:
14684 ; X86:       # %bb.0:
14685 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14686 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14687 ; X86-NEXT:    vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
14688 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14689 ; X86-NEXT:    retl # encoding: [0xc3]
14690 ;
14691 ; X64-LABEL: test_mask_compress_q_128:
14692 ; X64:       # %bb.0:
14693 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14694 ; X64-NEXT:    vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
14695 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14696 ; X64-NEXT:    retq # encoding: [0xc3]
14697   %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
14698   ret <2 x i64> %res
14699 }
14700
14701 define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
14702 ; X86-LABEL: test_maskz_compress_q_128:
14703 ; X86:       # %bb.0:
14704 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14705 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14706 ; X86-NEXT:    vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
14707 ; X86-NEXT:    retl # encoding: [0xc3]
14708 ;
14709 ; X64-LABEL: test_maskz_compress_q_128:
14710 ; X64:       # %bb.0:
14711 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14712 ; X64-NEXT:    vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
14713 ; X64-NEXT:    retq # encoding: [0xc3]
14714   %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
14715   ret <2 x i64> %res
14716 }
14717
14718 define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
14719 ; CHECK-LABEL: test_compress_q_128:
14720 ; CHECK:       # %bb.0:
14721 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14722   %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
14723   ret <2 x i64> %res
14724 }
14725
14726 declare <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
14727
14728 define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
14729 ; X86-LABEL: test_mask_compress_d_128:
14730 ; X86:       # %bb.0:
14731 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14732 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14733 ; X86-NEXT:    vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
14734 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14735 ; X86-NEXT:    retl # encoding: [0xc3]
14736 ;
14737 ; X64-LABEL: test_mask_compress_d_128:
14738 ; X64:       # %bb.0:
14739 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14740 ; X64-NEXT:    vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
14741 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14742 ; X64-NEXT:    retq # encoding: [0xc3]
14743   %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
14744   ret <4 x i32> %res
14745 }
14746
14747 define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
14748 ; X86-LABEL: test_maskz_compress_d_128:
14749 ; X86:       # %bb.0:
14750 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14751 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14752 ; X86-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
14753 ; X86-NEXT:    retl # encoding: [0xc3]
14754 ;
14755 ; X64-LABEL: test_maskz_compress_d_128:
14756 ; X64:       # %bb.0:
14757 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14758 ; X64-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
14759 ; X64-NEXT:    retq # encoding: [0xc3]
14760   %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
14761   ret <4 x i32> %res
14762 }
14763
14764 define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
14765 ; CHECK-LABEL: test_compress_d_128:
14766 ; CHECK:       # %bb.0:
14767 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14768   %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
14769   ret <4 x i32> %res
14770 }
14771
14772 declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
14773
14774 define <2 x double> @test_expand_pd_128(<2 x double> %data) {
14775 ; CHECK-LABEL: test_expand_pd_128:
14776 ; CHECK:       # %bb.0:
14777 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14778   %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
14779   ret <2 x double> %res
14780 }
14781
14782 define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
14783 ; X86-LABEL: test_mask_expand_pd_128:
14784 ; X86:       # %bb.0:
14785 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14786 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14787 ; X86-NEXT:    vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
14788 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14789 ; X86-NEXT:    retl # encoding: [0xc3]
14790 ;
14791 ; X64-LABEL: test_mask_expand_pd_128:
14792 ; X64:       # %bb.0:
14793 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14794 ; X64-NEXT:    vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
14795 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14796 ; X64-NEXT:    retq # encoding: [0xc3]
14797   %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
14798   ret <2 x double> %res
14799 }
14800
14801 define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
14802 ; X86-LABEL: test_maskz_expand_pd_128:
14803 ; X86:       # %bb.0:
14804 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14805 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14806 ; X86-NEXT:    vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
14807 ; X86-NEXT:    retl # encoding: [0xc3]
14808 ;
14809 ; X64-LABEL: test_maskz_expand_pd_128:
14810 ; X64:       # %bb.0:
14811 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14812 ; X64-NEXT:    vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
14813 ; X64-NEXT:    retq # encoding: [0xc3]
14814   %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
14815   ret <2 x double> %res
14816 }
14817
14818 declare <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
14819
14820 define <4 x float> @test_expand_ps_128(<4 x float> %data) {
14821 ; CHECK-LABEL: test_expand_ps_128:
14822 ; CHECK:       # %bb.0:
14823 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14824   %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
14825   ret <4 x float> %res
14826 }
14827
14828 define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
14829 ; X86-LABEL: test_mask_expand_ps_128:
14830 ; X86:       # %bb.0:
14831 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14832 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14833 ; X86-NEXT:    vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
14834 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14835 ; X86-NEXT:    retl # encoding: [0xc3]
14836 ;
14837 ; X64-LABEL: test_mask_expand_ps_128:
14838 ; X64:       # %bb.0:
14839 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14840 ; X64-NEXT:    vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
14841 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14842 ; X64-NEXT:    retq # encoding: [0xc3]
14843   %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
14844   ret <4 x float> %res
14845 }
14846
14847 define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
14848 ; X86-LABEL: test_maskz_expand_ps_128:
14849 ; X86:       # %bb.0:
14850 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14851 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14852 ; X86-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
14853 ; X86-NEXT:    retl # encoding: [0xc3]
14854 ;
14855 ; X64-LABEL: test_maskz_expand_ps_128:
14856 ; X64:       # %bb.0:
14857 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14858 ; X64-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
14859 ; X64-NEXT:    retq # encoding: [0xc3]
14860   %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
14861   ret <4 x float> %res
14862 }
14863
14864 declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
14865
14866 define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
14867 ; CHECK-LABEL: test_expand_q_128:
14868 ; CHECK:       # %bb.0:
14869 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14870   %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
14871   ret <2 x i64> %res
14872 }
14873
14874 define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
14875 ; X86-LABEL: test_mask_expand_q_128:
14876 ; X86:       # %bb.0:
14877 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14878 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14879 ; X86-NEXT:    vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
14880 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14881 ; X86-NEXT:    retl # encoding: [0xc3]
14882 ;
14883 ; X64-LABEL: test_mask_expand_q_128:
14884 ; X64:       # %bb.0:
14885 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14886 ; X64-NEXT:    vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
14887 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14888 ; X64-NEXT:    retq # encoding: [0xc3]
14889   %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
14890   ret <2 x i64> %res
14891 }
14892
14893 define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
14894 ; X86-LABEL: test_maskz_expand_q_128:
14895 ; X86:       # %bb.0:
14896 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14897 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14898 ; X86-NEXT:    vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
14899 ; X86-NEXT:    retl # encoding: [0xc3]
14900 ;
14901 ; X64-LABEL: test_maskz_expand_q_128:
14902 ; X64:       # %bb.0:
14903 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14904 ; X64-NEXT:    vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
14905 ; X64-NEXT:    retq # encoding: [0xc3]
14906   %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
14907   ret <2 x i64> %res
14908 }
14909
14910 declare <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
14911
14912 define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
14913 ; CHECK-LABEL: test_expand_d_128:
14914 ; CHECK:       # %bb.0:
14915 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14916   %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
14917   ret <4 x i32> %res
14918 }
14919
14920 define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
14921 ; X86-LABEL: test_mask_expand_d_128:
14922 ; X86:       # %bb.0:
14923 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14924 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14925 ; X86-NEXT:    vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
14926 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14927 ; X86-NEXT:    retl # encoding: [0xc3]
14928 ;
14929 ; X64-LABEL: test_mask_expand_d_128:
14930 ; X64:       # %bb.0:
14931 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14932 ; X64-NEXT:    vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
14933 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14934 ; X64-NEXT:    retq # encoding: [0xc3]
14935   %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
14936   ret <4 x i32> %res
14937 }
14938
14939 define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
14940 ; X86-LABEL: test_maskz_expand_d_128:
14941 ; X86:       # %bb.0:
14942 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14943 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14944 ; X86-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
14945 ; X86-NEXT:    retl # encoding: [0xc3]
14946 ;
14947 ; X64-LABEL: test_maskz_expand_d_128:
14948 ; X64:       # %bb.0:
14949 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14950 ; X64-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
14951 ; X64-NEXT:    retq # encoding: [0xc3]
14952   %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
14953   ret <4 x i32> %res
14954 }
14955
14956 declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
14957
14958 define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
14959 ; X86-LABEL: test_mask_compress_pd_256:
14960 ; X86:       # %bb.0:
14961 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14962 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14963 ; X86-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
14964 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
14965 ; X86-NEXT:    retl # encoding: [0xc3]
14966 ;
14967 ; X64-LABEL: test_mask_compress_pd_256:
14968 ; X64:       # %bb.0:
14969 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14970 ; X64-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
14971 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
14972 ; X64-NEXT:    retq # encoding: [0xc3]
14973   %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
14974   ret <4 x double> %res
14975 }
14976
14977 define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
14978 ; X86-LABEL: test_maskz_compress_pd_256:
14979 ; X86:       # %bb.0:
14980 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14981 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14982 ; X86-NEXT:    vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
14983 ; X86-NEXT:    retl # encoding: [0xc3]
14984 ;
14985 ; X64-LABEL: test_maskz_compress_pd_256:
14986 ; X64:       # %bb.0:
14987 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14988 ; X64-NEXT:    vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
14989 ; X64-NEXT:    retq # encoding: [0xc3]
14990   %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
14991   ret <4 x double> %res
14992 }
14993
14994 define <4 x double> @test_compress_pd_256(<4 x double> %data) {
14995 ; CHECK-LABEL: test_compress_pd_256:
14996 ; CHECK:       # %bb.0:
14997 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14998   %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
14999   ret <4 x double> %res
15000 }
15001
15002 declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
15003
15004 define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
15005 ; X86-LABEL: test_mask_compress_ps_256:
15006 ; X86:       # %bb.0:
15007 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15008 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15009 ; X86-NEXT:    vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
15010 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15011 ; X86-NEXT:    retl # encoding: [0xc3]
15012 ;
15013 ; X64-LABEL: test_mask_compress_ps_256:
15014 ; X64:       # %bb.0:
15015 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15016 ; X64-NEXT:    vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
15017 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15018 ; X64-NEXT:    retq # encoding: [0xc3]
15019   %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
15020   ret <8 x float> %res
15021 }
15022
15023 define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
15024 ; X86-LABEL: test_maskz_compress_ps_256:
15025 ; X86:       # %bb.0:
15026 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15027 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15028 ; X86-NEXT:    vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
15029 ; X86-NEXT:    retl # encoding: [0xc3]
15030 ;
15031 ; X64-LABEL: test_maskz_compress_ps_256:
15032 ; X64:       # %bb.0:
15033 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15034 ; X64-NEXT:    vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
15035 ; X64-NEXT:    retq # encoding: [0xc3]
15036   %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
15037   ret <8 x float> %res
15038 }
15039
15040 define <8 x float> @test_compress_ps_256(<8 x float> %data) {
15041 ; CHECK-LABEL: test_compress_ps_256:
15042 ; CHECK:       # %bb.0:
15043 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15044   %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
15045   ret <8 x float> %res
15046 }
15047
15048 declare <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
15049
15050 define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
15051 ; X86-LABEL: test_mask_compress_q_256:
15052 ; X86:       # %bb.0:
15053 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15054 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15055 ; X86-NEXT:    vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
15056 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15057 ; X86-NEXT:    retl # encoding: [0xc3]
15058 ;
15059 ; X64-LABEL: test_mask_compress_q_256:
15060 ; X64:       # %bb.0:
15061 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15062 ; X64-NEXT:    vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
15063 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15064 ; X64-NEXT:    retq # encoding: [0xc3]
15065   %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
15066   ret <4 x i64> %res
15067 }
15068
15069 define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
15070 ; X86-LABEL: test_maskz_compress_q_256:
15071 ; X86:       # %bb.0:
15072 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15073 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15074 ; X86-NEXT:    vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
15075 ; X86-NEXT:    retl # encoding: [0xc3]
15076 ;
15077 ; X64-LABEL: test_maskz_compress_q_256:
15078 ; X64:       # %bb.0:
15079 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15080 ; X64-NEXT:    vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
15081 ; X64-NEXT:    retq # encoding: [0xc3]
15082   %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
15083   ret <4 x i64> %res
15084 }
15085
15086 define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
15087 ; CHECK-LABEL: test_compress_q_256:
15088 ; CHECK:       # %bb.0:
15089 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15090   %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
15091   ret <4 x i64> %res
15092 }
15093
15094 declare <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
15095
15096 define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
15097 ; X86-LABEL: test_mask_compress_d_256:
15098 ; X86:       # %bb.0:
15099 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15100 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15101 ; X86-NEXT:    vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
15102 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15103 ; X86-NEXT:    retl # encoding: [0xc3]
15104 ;
15105 ; X64-LABEL: test_mask_compress_d_256:
15106 ; X64:       # %bb.0:
15107 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15108 ; X64-NEXT:    vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
15109 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15110 ; X64-NEXT:    retq # encoding: [0xc3]
15111   %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
15112   ret <8 x i32> %res
15113 }
15114
15115 define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
15116 ; X86-LABEL: test_maskz_compress_d_256:
15117 ; X86:       # %bb.0:
15118 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15119 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15120 ; X86-NEXT:    vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
15121 ; X86-NEXT:    retl # encoding: [0xc3]
15122 ;
15123 ; X64-LABEL: test_maskz_compress_d_256:
15124 ; X64:       # %bb.0:
15125 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15126 ; X64-NEXT:    vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
15127 ; X64-NEXT:    retq # encoding: [0xc3]
15128   %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
15129   ret <8 x i32> %res
15130 }
15131
15132 define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
15133 ; CHECK-LABEL: test_compress_d_256:
15134 ; CHECK:       # %bb.0:
15135 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15136   %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
15137   ret <8 x i32> %res
15138 }
15139
15140 declare <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
15141
15142 define <4 x double> @test_expand_pd_256(<4 x double> %data) {
15143 ; CHECK-LABEL: test_expand_pd_256:
15144 ; CHECK:       # %bb.0:
15145 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15146   %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
15147   ret <4 x double> %res
15148 }
15149
15150 define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
15151 ; X86-LABEL: test_mask_expand_pd_256:
15152 ; X86:       # %bb.0:
15153 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15154 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15155 ; X86-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
15156 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15157 ; X86-NEXT:    retl # encoding: [0xc3]
15158 ;
15159 ; X64-LABEL: test_mask_expand_pd_256:
15160 ; X64:       # %bb.0:
15161 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15162 ; X64-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
15163 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15164 ; X64-NEXT:    retq # encoding: [0xc3]
15165   %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
15166   ret <4 x double> %res
15167 }
15168
15169 define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
15170 ; X86-LABEL: test_maskz_expand_pd_256:
15171 ; X86:       # %bb.0:
15172 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15173 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15174 ; X86-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
15175 ; X86-NEXT:    retl # encoding: [0xc3]
15176 ;
15177 ; X64-LABEL: test_maskz_expand_pd_256:
15178 ; X64:       # %bb.0:
15179 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15180 ; X64-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
15181 ; X64-NEXT:    retq # encoding: [0xc3]
15182   %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
15183   ret <4 x double> %res
15184 }
15185
15186 declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
15187
15188 define <8 x float> @test_expand_ps_256(<8 x float> %data) {
15189 ; CHECK-LABEL: test_expand_ps_256:
15190 ; CHECK:       # %bb.0:
15191 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15192   %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
15193   ret <8 x float> %res
15194 }
15195
15196 define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
15197 ; X86-LABEL: test_mask_expand_ps_256:
15198 ; X86:       # %bb.0:
15199 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15200 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15201 ; X86-NEXT:    vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
15202 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15203 ; X86-NEXT:    retl # encoding: [0xc3]
15204 ;
15205 ; X64-LABEL: test_mask_expand_ps_256:
15206 ; X64:       # %bb.0:
15207 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15208 ; X64-NEXT:    vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
15209 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15210 ; X64-NEXT:    retq # encoding: [0xc3]
15211   %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
15212   ret <8 x float> %res
15213 }
15214
15215 define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
15216 ; X86-LABEL: test_maskz_expand_ps_256:
15217 ; X86:       # %bb.0:
15218 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15219 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15220 ; X86-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
15221 ; X86-NEXT:    retl # encoding: [0xc3]
15222 ;
15223 ; X64-LABEL: test_maskz_expand_ps_256:
15224 ; X64:       # %bb.0:
15225 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15226 ; X64-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
15227 ; X64-NEXT:    retq # encoding: [0xc3]
15228   %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
15229   ret <8 x float> %res
15230 }
15231
15232 declare <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
15233
15234 define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
15235 ; CHECK-LABEL: test_expand_q_256:
15236 ; CHECK:       # %bb.0:
15237 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15238   %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
15239   ret <4 x i64> %res
15240 }
15241
15242 define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
15243 ; X86-LABEL: test_mask_expand_q_256:
15244 ; X86:       # %bb.0:
15245 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15246 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15247 ; X86-NEXT:    vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
15248 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15249 ; X86-NEXT:    retl # encoding: [0xc3]
15250 ;
15251 ; X64-LABEL: test_mask_expand_q_256:
15252 ; X64:       # %bb.0:
15253 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15254 ; X64-NEXT:    vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
15255 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15256 ; X64-NEXT:    retq # encoding: [0xc3]
15257   %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
15258   ret <4 x i64> %res
15259 }
15260
15261 define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
15262 ; X86-LABEL: test_maskz_expand_q_256:
15263 ; X86:       # %bb.0:
15264 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15265 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15266 ; X86-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
15267 ; X86-NEXT:    retl # encoding: [0xc3]
15268 ;
15269 ; X64-LABEL: test_maskz_expand_q_256:
15270 ; X64:       # %bb.0:
15271 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15272 ; X64-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
15273 ; X64-NEXT:    retq # encoding: [0xc3]
15274   %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
15275   ret <4 x i64> %res
15276 }
15277
15278 declare <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
15279
15280 define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
15281 ; CHECK-LABEL: test_expand_d_256:
15282 ; CHECK:       # %bb.0:
15283 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15284   %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
15285   ret <8 x i32> %res
15286 }
15287
15288 define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
15289 ; X86-LABEL: test_mask_expand_d_256:
15290 ; X86:       # %bb.0:
15291 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15292 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15293 ; X86-NEXT:    vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
15294 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15295 ; X86-NEXT:    retl # encoding: [0xc3]
15296 ;
15297 ; X64-LABEL: test_mask_expand_d_256:
15298 ; X64:       # %bb.0:
15299 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15300 ; X64-NEXT:    vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
15301 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15302 ; X64-NEXT:    retq # encoding: [0xc3]
15303   %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
15304   ret <8 x i32> %res
15305 }
15306
15307 define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
15308 ; X86-LABEL: test_maskz_expand_d_256:
15309 ; X86:       # %bb.0:
15310 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15311 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15312 ; X86-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
15313 ; X86-NEXT:    retl # encoding: [0xc3]
15314 ;
15315 ; X64-LABEL: test_maskz_expand_d_256:
15316 ; X64:       # %bb.0:
15317 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15318 ; X64-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
15319 ; X64-NEXT:    retq # encoding: [0xc3]
15320   %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
15321   ret <8 x i32> %res
15322 }
15323
15324 declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)