llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
   4
   5 define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   6 ; CHECK-LABEL: test_mask_packs_epi32_rr_128:
   7 ; CHECK:       # %bb.0:
   8 ; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
   9 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
  10   %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
  11   ret <8 x i16> %1
  12 }
  13
  14 define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
  15 ; X86-LABEL: test_mask_packs_epi32_rrk_128:
  16 ; X86:       # %bb.0:
  17 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  18 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
  19 ; X86-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
  20 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
  21 ; X86-NEXT:    retl # encoding: [0xc3]
  22 ;
  23 ; X64-LABEL: test_mask_packs_epi32_rrk_128:
  24 ; X64:       # %bb.0:
  25 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
  26 ; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
  27 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
  28 ; X64-NEXT:    retq # encoding: [0xc3]
  29   %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
  30   %2 = bitcast i8 %mask to <8 x i1>
  31   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
  32   ret <8 x i16> %3
  33 }
  34
  35 define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
  36 ; X86-LABEL: test_mask_packs_epi32_rrkz_128:
  37 ; X86:       # %bb.0:
  38 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  39 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
  40 ; X86-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
  41 ; X86-NEXT:    retl # encoding: [0xc3]
  42 ;
  43 ; X64-LABEL: test_mask_packs_epi32_rrkz_128:
  44 ; X64:       # %bb.0:
  45 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
  46 ; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
  47 ; X64-NEXT:    retq # encoding: [0xc3]
  48   %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
  49   %2 = bitcast i8 %mask to <8 x i1>
  50   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
  51   ret <8 x i16> %3
  52 }
  53
  54 define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
  55 ; X86-LABEL: test_mask_packs_epi32_rm_128:
  56 ; X86:       # %bb.0:
  57 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  58 ; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00]
  59 ; X86-NEXT:    retl # encoding: [0xc3]
  60 ;
  61 ; X64-LABEL: test_mask_packs_epi32_rm_128:
  62 ; X64:       # %bb.0:
  63 ; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07]
  64 ; X64-NEXT:    retq # encoding: [0xc3]
  65   %b = load <4 x i32>, <4 x i32>* %ptr_b
  66   %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
  67   ret <8 x i16> %1
  68 }
  69
  70 define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
  71 ; X86-LABEL: test_mask_packs_epi32_rmk_128:
  72 ; X86:       # %bb.0:
  73 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  74 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
  75 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
  76 ; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08]
  77 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  78 ; X86-NEXT:    retl # encoding: [0xc3]
  79 ;
  80 ; X64-LABEL: test_mask_packs_epi32_rmk_128:
  81 ; X64:       # %bb.0:
  82 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
  83 ; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
  84 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  85 ; X64-NEXT:    retq # encoding: [0xc3]
  86   %b = load <4 x i32>, <4 x i32>* %ptr_b
  87   %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
  88   %2 = bitcast i8 %mask to <8 x i1>
  89   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
  90   ret <8 x i16> %3
  91 }
  92
  93 define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
  94 ; X86-LABEL: test_mask_packs_epi32_rmkz_128:
  95 ; X86:       # %bb.0:
  96 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  97 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
  98 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
  99 ; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00]
 100 ; X86-NEXT:    retl # encoding: [0xc3]
 101 ;
 102 ; X64-LABEL: test_mask_packs_epi32_rmkz_128:
 103 ; X64:       # %bb.0:
 104 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 105 ; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
 106 ; X64-NEXT:    retq # encoding: [0xc3]
 107   %b = load <4 x i32>, <4 x i32>* %ptr_b
 108   %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
 109   %2 = bitcast i8 %mask to <8 x i1>
 110   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
 111   ret <8 x i16> %3
 112 }
 113
 114 define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
 115 ; X86-LABEL: test_mask_packs_epi32_rmb_128:
 116 ; X86:       # %bb.0:
 117 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 118 ; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00]
 119 ; X86-NEXT:    retl # encoding: [0xc3]
 120 ;
 121 ; X64-LABEL: test_mask_packs_epi32_rmb_128:
 122 ; X64:       # %bb.0:
 123 ; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
 124 ; X64-NEXT:    retq # encoding: [0xc3]
 125   %q = load i32, i32* %ptr_b
 126   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
 127   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
 128   %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
 129   ret <8 x i16> %1
 130 }
 131
 132 define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
 133 ; X86-LABEL: test_mask_packs_epi32_rmbk_128:
 134 ; X86:       # %bb.0:
 135 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 136 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
 137 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
 138 ; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08]
 139 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 140 ; X86-NEXT:    retl # encoding: [0xc3]
 141 ;
 142 ; X64-LABEL: test_mask_packs_epi32_rmbk_128:
 143 ; X64:       # %bb.0:
 144 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 145 ; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
 146 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 147 ; X64-NEXT:    retq # encoding: [0xc3]
 148   %q = load i32, i32* %ptr_b
 149   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
 150   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
 151   %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
 152   %2 = bitcast i8 %mask to <8 x i1>
 153   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
 154   ret <8 x i16> %3
 155 }
 156
 157 define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
 158 ; X86-LABEL: test_mask_packs_epi32_rmbkz_128:
 159 ; X86:       # %bb.0:
 160 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 161 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
 162 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
 163 ; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00]
 164 ; X86-NEXT:    retl # encoding: [0xc3]
 165 ;
 166 ; X64-LABEL: test_mask_packs_epi32_rmbkz_128:
 167 ; X64:       # %bb.0:
 168 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 169 ; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
 170 ; X64-NEXT:    retq # encoding: [0xc3]
 171   %q = load i32, i32* %ptr_b
 172   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
 173   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
 174   %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
 175   %2 = bitcast i8 %mask to <8 x i1>
 176   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
 177   ret <8 x i16> %3
 178 }
 179
 180 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
 181
 182 define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
 183 ; CHECK-LABEL: test_mask_packs_epi32_rr_256:
 184 ; CHECK:       # %bb.0:
 185 ; CHECK-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
 186 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 187   %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
 188   ret <16 x i16> %1
 189 }
 190
 191 define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
 192 ; X86-LABEL: test_mask_packs_epi32_rrk_256:
 193 ; X86:       # %bb.0:
 194 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 195 ; X86-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
 196 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 197 ; X86-NEXT:    retl # encoding: [0xc3]
 198 ;
 199 ; X64-LABEL: test_mask_packs_epi32_rrk_256:
 200 ; X64:       # %bb.0:
 201 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 202 ; X64-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
 203 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 204 ; X64-NEXT:    retq # encoding: [0xc3]
 205   %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
 206   %2 = bitcast i16 %mask to <16 x i1>
 207   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
 208   ret <16 x i16> %3
 209 }
 210
 211 define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
 212 ; X86-LABEL: test_mask_packs_epi32_rrkz_256:
 213 ; X86:       # %bb.0:
 214 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 215 ; X86-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
 216 ; X86-NEXT:    retl # encoding: [0xc3]
 217 ;
 218 ; X64-LABEL: test_mask_packs_epi32_rrkz_256:
 219 ; X64:       # %bb.0:
 220 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 221 ; X64-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
 222 ; X64-NEXT:    retq # encoding: [0xc3]
 223   %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
 224   %2 = bitcast i16 %mask to <16 x i1>
 225   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
 226   ret <16 x i16> %3
 227 }
 228
 229 define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
 230 ; X86-LABEL: test_mask_packs_epi32_rm_256:
 231 ; X86:       # %bb.0:
 232 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 233 ; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00]
 234 ; X86-NEXT:    retl # encoding: [0xc3]
 235 ;
 236 ; X64-LABEL: test_mask_packs_epi32_rm_256:
 237 ; X64:       # %bb.0:
 238 ; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07]
 239 ; X64-NEXT:    retq # encoding: [0xc3]
 240   %b = load <8 x i32>, <8 x i32>* %ptr_b
 241   %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
 242   ret <16 x i16> %1
 243 }
 244
 245 define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
 246 ; X86-LABEL: test_mask_packs_epi32_rmk_256:
 247 ; X86:       # %bb.0:
 248 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 249 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 250 ; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08]
 251 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 252 ; X86-NEXT:    retl # encoding: [0xc3]
 253 ;
 254 ; X64-LABEL: test_mask_packs_epi32_rmk_256:
 255 ; X64:       # %bb.0:
 256 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 257 ; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
 258 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 259 ; X64-NEXT:    retq # encoding: [0xc3]
 260   %b = load <8 x i32>, <8 x i32>* %ptr_b
 261   %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
 262   %2 = bitcast i16 %mask to <16 x i1>
 263   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
 264   ret <16 x i16> %3
 265 }
 266
 267 define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
 268 ; X86-LABEL: test_mask_packs_epi32_rmkz_256:
 269 ; X86:       # %bb.0:
 270 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 271 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 272 ; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00]
 273 ; X86-NEXT:    retl # encoding: [0xc3]
 274 ;
 275 ; X64-LABEL: test_mask_packs_epi32_rmkz_256:
 276 ; X64:       # %bb.0:
 277 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 278 ; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
 279 ; X64-NEXT:    retq # encoding: [0xc3]
 280   %b = load <8 x i32>, <8 x i32>* %ptr_b
 281   %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
 282   %2 = bitcast i16 %mask to <16 x i1>
 283   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
 284   ret <16 x i16> %3
 285 }
 286
 287 define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
 288 ; X86-LABEL: test_mask_packs_epi32_rmb_256:
 289 ; X86:       # %bb.0:
 290 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 291 ; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00]
 292 ; X86-NEXT:    retl # encoding: [0xc3]
 293 ;
 294 ; X64-LABEL: test_mask_packs_epi32_rmb_256:
 295 ; X64:       # %bb.0:
 296 ; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
 297 ; X64-NEXT:    retq # encoding: [0xc3]
 298   %q = load i32, i32* %ptr_b
 299   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
 300   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
 301   %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
 302   ret <16 x i16> %1
 303 }
 304
 305 define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
 306 ; X86-LABEL: test_mask_packs_epi32_rmbk_256:
 307 ; X86:       # %bb.0:
 308 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 309 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 310 ; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08]
 311 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 312 ; X86-NEXT:    retl # encoding: [0xc3]
 313 ;
 314 ; X64-LABEL: test_mask_packs_epi32_rmbk_256:
 315 ; X64:       # %bb.0:
 316 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 317 ; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
 318 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 319 ; X64-NEXT:    retq # encoding: [0xc3]
 320   %q = load i32, i32* %ptr_b
 321   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
 322   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
 323   %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
 324   %2 = bitcast i16 %mask to <16 x i1>
 325   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
 326   ret <16 x i16> %3
 327 }
 328
 329 define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
 330 ; X86-LABEL: test_mask_packs_epi32_rmbkz_256:
 331 ; X86:       # %bb.0:
 332 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 333 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 334 ; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00]
 335 ; X86-NEXT:    retl # encoding: [0xc3]
 336 ;
 337 ; X64-LABEL: test_mask_packs_epi32_rmbkz_256:
 338 ; X64:       # %bb.0:
 339 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 340 ; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
 341 ; X64-NEXT:    retq # encoding: [0xc3]
 342   %q = load i32, i32* %ptr_b
 343   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
 344   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
 345   %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
 346   %2 = bitcast i16 %mask to <16 x i1>
 347   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
 348   ret <16 x i16> %3
 349 }
 350
 351 declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>)
 352
 353 define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
 354 ; CHECK-LABEL: test_mask_packs_epi16_rr_128:
 355 ; CHECK:       # %bb.0:
 356 ; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
 357 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 358   %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
 359   ret <16 x i8> %1
 360 }
 361
 362 define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
 363 ; X86-LABEL: test_mask_packs_epi16_rrk_128:
 364 ; X86:       # %bb.0:
 365 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 366 ; X86-NEXT:    vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
 367 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 368 ; X86-NEXT:    retl # encoding: [0xc3]
 369 ;
 370 ; X64-LABEL: test_mask_packs_epi16_rrk_128:
 371 ; X64:       # %bb.0:
 372 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 373 ; X64-NEXT:    vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
 374 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 375 ; X64-NEXT:    retq # encoding: [0xc3]
 376   %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
 377   %2 = bitcast i16 %mask to <16 x i1>
 378   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
 379   ret <16 x i8> %3
 380 }
 381
 382 define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
 383 ; X86-LABEL: test_mask_packs_epi16_rrkz_128:
 384 ; X86:       # %bb.0:
 385 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 386 ; X86-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
 387 ; X86-NEXT:    retl # encoding: [0xc3]
 388 ;
 389 ; X64-LABEL: test_mask_packs_epi16_rrkz_128:
 390 ; X64:       # %bb.0:
 391 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 392 ; X64-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
 393 ; X64-NEXT:    retq # encoding: [0xc3]
 394   %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
 395   %2 = bitcast i16 %mask to <16 x i1>
 396   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
 397   ret <16 x i8> %3
 398 }
 399
 400 define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
 401 ; X86-LABEL: test_mask_packs_epi16_rm_128:
 402 ; X86:       # %bb.0:
 403 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 404 ; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00]
 405 ; X86-NEXT:    retl # encoding: [0xc3]
 406 ;
 407 ; X64-LABEL: test_mask_packs_epi16_rm_128:
 408 ; X64:       # %bb.0:
 409 ; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07]
 410 ; X64-NEXT:    retq # encoding: [0xc3]
 411   %b = load <8 x i16>, <8 x i16>* %ptr_b
 412   %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
 413   ret <16 x i8> %1
 414 }
 415
 416 define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
 417 ; X86-LABEL: test_mask_packs_epi16_rmk_128:
 418 ; X86:       # %bb.0:
 419 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 420 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 421 ; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08]
 422 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 423 ; X86-NEXT:    retl # encoding: [0xc3]
 424 ;
 425 ; X64-LABEL: test_mask_packs_epi16_rmk_128:
 426 ; X64:       # %bb.0:
 427 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 428 ; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f]
 429 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 430 ; X64-NEXT:    retq # encoding: [0xc3]
 431   %b = load <8 x i16>, <8 x i16>* %ptr_b
 432   %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
 433   %2 = bitcast i16 %mask to <16 x i1>
 434   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
 435   ret <16 x i8> %3
 436 }
 437
 438 define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
 439 ; X86-LABEL: test_mask_packs_epi16_rmkz_128:
 440 ; X86:       # %bb.0:
 441 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 442 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 443 ; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00]
 444 ; X86-NEXT:    retl # encoding: [0xc3]
 445 ;
 446 ; X64-LABEL: test_mask_packs_epi16_rmkz_128:
 447 ; X64:       # %bb.0:
 448 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 449 ; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07]
 450 ; X64-NEXT:    retq # encoding: [0xc3]
 451   %b = load <8 x i16>, <8 x i16>* %ptr_b
 452   %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
 453   %2 = bitcast i16 %mask to <16 x i1>
 454   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
 455   ret <16 x i8> %3
 456 }
 457
 458 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>)
 459
 460 define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
 461 ; CHECK-LABEL: test_mask_packs_epi16_rr_256:
 462 ; CHECK:       # %bb.0:
 463 ; CHECK-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
 464 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 465   %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
 466   ret <32 x i8> %1
 467 }
 468
 469 define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
 470 ; X86-LABEL: test_mask_packs_epi16_rrk_256:
 471 ; X86:       # %bb.0:
 472 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
 473 ; X86-NEXT:    vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
 474 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 475 ; X86-NEXT:    retl # encoding: [0xc3]
 476 ;
 477 ; X64-LABEL: test_mask_packs_epi16_rrk_256:
 478 ; X64:       # %bb.0:
 479 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 480 ; X64-NEXT:    vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
 481 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 482 ; X64-NEXT:    retq # encoding: [0xc3]
 483   %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
 484   %2 = bitcast i32 %mask to <32 x i1>
 485   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
 486   ret <32 x i8> %3
 487 }
 488
 489 define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
 490 ; X86-LABEL: test_mask_packs_epi16_rrkz_256:
 491 ; X86:       # %bb.0:
 492 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
 493 ; X86-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
 494 ; X86-NEXT:    retl # encoding: [0xc3]
 495 ;
 496 ; X64-LABEL: test_mask_packs_epi16_rrkz_256:
 497 ; X64:       # %bb.0:
 498 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 499 ; X64-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
 500 ; X64-NEXT:    retq # encoding: [0xc3]
 501   %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
 502   %2 = bitcast i32 %mask to <32 x i1>
 503   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
 504   ret <32 x i8> %3
 505 }
 506
 507 define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
 508 ; X86-LABEL: test_mask_packs_epi16_rm_256:
 509 ; X86:       # %bb.0:
 510 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 511 ; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00]
 512 ; X86-NEXT:    retl # encoding: [0xc3]
 513 ;
 514 ; X64-LABEL: test_mask_packs_epi16_rm_256:
 515 ; X64:       # %bb.0:
 516 ; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07]
 517 ; X64-NEXT:    retq # encoding: [0xc3]
 518   %b = load <16 x i16>, <16 x i16>* %ptr_b
 519   %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
 520   ret <32 x i8> %1
 521 }
 522
 523 define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
 524 ; X86-LABEL: test_mask_packs_epi16_rmk_256:
 525 ; X86:       # %bb.0:
 526 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 527 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
 528 ; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08]
 529 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 530 ; X86-NEXT:    retl # encoding: [0xc3]
 531 ;
 532 ; X64-LABEL: test_mask_packs_epi16_rmk_256:
 533 ; X64:       # %bb.0:
 534 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 535 ; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f]
 536 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 537 ; X64-NEXT:    retq # encoding: [0xc3]
 538   %b = load <16 x i16>, <16 x i16>* %ptr_b
 539   %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
 540   %2 = bitcast i32 %mask to <32 x i1>
 541   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
 542   ret <32 x i8> %3
 543 }
 544
 545 define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
 546 ; X86-LABEL: test_mask_packs_epi16_rmkz_256:
 547 ; X86:       # %bb.0:
 548 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 549 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
 550 ; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00]
 551 ; X86-NEXT:    retl # encoding: [0xc3]
 552 ;
 553 ; X64-LABEL: test_mask_packs_epi16_rmkz_256:
 554 ; X64:       # %bb.0:
 555 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 556 ; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07]
 557 ; X64-NEXT:    retq # encoding: [0xc3]
 558   %b = load <16 x i16>, <16 x i16>* %ptr_b
 559   %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
 560   %2 = bitcast i32 %mask to <32 x i1>
 561   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
 562   ret <32 x i8> %3
 563 }
 564
 565 declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>)
 566
 567
 568 define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
 569 ; CHECK-LABEL: test_mask_packus_epi32_rr_128:
 570 ; CHECK:       # %bb.0:
 571 ; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1]
 572 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 573   %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
 574   ret <8 x i16> %1
 575 }
 576
 577 define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
 578 ; X86-LABEL: test_mask_packus_epi32_rrk_128:
 579 ; X86:       # %bb.0:
 580 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 581 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 582 ; X86-NEXT:    vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
 583 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 584 ; X86-NEXT:    retl # encoding: [0xc3]
 585 ;
 586 ; X64-LABEL: test_mask_packus_epi32_rrk_128:
 587 ; X64:       # %bb.0:
 588 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 589 ; X64-NEXT:    vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
 590 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 591 ; X64-NEXT:    retq # encoding: [0xc3]
 592   %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
 593   %2 = bitcast i8 %mask to <8 x i1>
 594   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
 595   ret <8 x i16> %3
 596 }
 597
 598 define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
 599 ; X86-LABEL: test_mask_packus_epi32_rrkz_128:
 600 ; X86:       # %bb.0:
 601 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 602 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 603 ; X86-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
 604 ; X86-NEXT:    retl # encoding: [0xc3]
 605 ;
 606 ; X64-LABEL: test_mask_packus_epi32_rrkz_128:
 607 ; X64:       # %bb.0:
 608 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 609 ; X64-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
 610 ; X64-NEXT:    retq # encoding: [0xc3]
 611   %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
 612   %2 = bitcast i8 %mask to <8 x i1>
 613   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
 614   ret <8 x i16> %3
 615 }
 616
 617 define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
 618 ; X86-LABEL: test_mask_packus_epi32_rm_128:
 619 ; X86:       # %bb.0:
 620 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 621 ; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00]
 622 ; X86-NEXT:    retl # encoding: [0xc3]
 623 ;
 624 ; X64-LABEL: test_mask_packus_epi32_rm_128:
 625 ; X64:       # %bb.0:
 626 ; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07]
 627 ; X64-NEXT:    retq # encoding: [0xc3]
 628   %b = load <4 x i32>, <4 x i32>* %ptr_b
 629   %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
 630   ret <8 x i16> %1
 631 }
 632
 633 define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
 634 ; X86-LABEL: test_mask_packus_epi32_rmk_128:
 635 ; X86:       # %bb.0:
 636 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 637 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
 638 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
 639 ; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08]
 640 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 641 ; X86-NEXT:    retl # encoding: [0xc3]
 642 ;
 643 ; X64-LABEL: test_mask_packus_epi32_rmk_128:
 644 ; X64:       # %bb.0:
 645 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 646 ; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f]
 647 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 648 ; X64-NEXT:    retq # encoding: [0xc3]
 649   %b = load <4 x i32>, <4 x i32>* %ptr_b
 650   %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
 651   %2 = bitcast i8 %mask to <8 x i1>
 652   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
 653   ret <8 x i16> %3
 654 }
 655
 656 define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
 657 ; X86-LABEL: test_mask_packus_epi32_rmkz_128:
 658 ; X86:       # %bb.0:
 659 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 660 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
 661 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
 662 ; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00]
 663 ; X86-NEXT:    retl # encoding: [0xc3]
 664 ;
 665 ; X64-LABEL: test_mask_packus_epi32_rmkz_128:
 666 ; X64:       # %bb.0:
 667 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 668 ; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07]
 669 ; X64-NEXT:    retq # encoding: [0xc3]
 670   %b = load <4 x i32>, <4 x i32>* %ptr_b
 671   %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
 672   %2 = bitcast i8 %mask to <8 x i1>
 673   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
 674   ret <8 x i16> %3
 675 }
 676
 677 define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
 678 ; X86-LABEL: test_mask_packus_epi32_rmb_128:
 679 ; X86:       # %bb.0:
 680 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 681 ; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00]
 682 ; X86-NEXT:    retl # encoding: [0xc3]
 683 ;
 684 ; X64-LABEL: test_mask_packus_epi32_rmb_128:
 685 ; X64:       # %bb.0:
 686 ; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07]
 687 ; X64-NEXT:    retq # encoding: [0xc3]
 688   %q = load i32, i32* %ptr_b
 689   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
 690   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
 691   %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
 692   ret <8 x i16> %1
 693 }
 694
 695 define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
 696 ; X86-LABEL: test_mask_packus_epi32_rmbk_128:
 697 ; X86:       # %bb.0:
 698 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 699 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
 700 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
 701 ; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08]
 702 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 703 ; X86-NEXT:    retl # encoding: [0xc3]
 704 ;
 705 ; X64-LABEL: test_mask_packus_epi32_rmbk_128:
 706 ; X64:       # %bb.0:
 707 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 708 ; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f]
 709 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 710 ; X64-NEXT:    retq # encoding: [0xc3]
 711   %q = load i32, i32* %ptr_b
 712   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
 713   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
 714   %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
 715   %2 = bitcast i8 %mask to <8 x i1>
 716   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
 717   ret <8 x i16> %3
 718 }
 719
 720 define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
 721 ; X86-LABEL: test_mask_packus_epi32_rmbkz_128:
 722 ; X86:       # %bb.0:
 723 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 724 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
 725 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
 726 ; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00]
 727 ; X86-NEXT:    retl # encoding: [0xc3]
 728 ;
 729 ; X64-LABEL: test_mask_packus_epi32_rmbkz_128:
 730 ; X64:       # %bb.0:
 731 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 732 ; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07]
 733 ; X64-NEXT:    retq # encoding: [0xc3]
 734   %q = load i32, i32* %ptr_b
 735   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
 736   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
 737   %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
 738   %2 = bitcast i8 %mask to <8 x i1>
 739   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
 740   ret <8 x i16> %3
 741 }
 742
 743 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)
 744
 745 define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
 746 ; CHECK-LABEL: test_mask_packus_epi32_rr_256:
 747 ; CHECK:       # %bb.0:
 748 ; CHECK-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
 749 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 750   %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
 751   ret <16 x i16> %1
 752 }
 753
 754 define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
 755 ; X86-LABEL: test_mask_packus_epi32_rrk_256:
 756 ; X86:       # %bb.0:
 757 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 758 ; X86-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
 759 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 760 ; X86-NEXT:    retl # encoding: [0xc3]
 761 ;
 762 ; X64-LABEL: test_mask_packus_epi32_rrk_256:
 763 ; X64:       # %bb.0:
 764 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 765 ; X64-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
 766 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 767 ; X64-NEXT:    retq # encoding: [0xc3]
 768   %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
 769   %2 = bitcast i16 %mask to <16 x i1>
 770   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
 771   ret <16 x i16> %3
 772 }
 773
 774 define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
 775 ; X86-LABEL: test_mask_packus_epi32_rrkz_256:
 776 ; X86:       # %bb.0:
 777 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 778 ; X86-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
 779 ; X86-NEXT:    retl # encoding: [0xc3]
 780 ;
 781 ; X64-LABEL: test_mask_packus_epi32_rrkz_256:
 782 ; X64:       # %bb.0:
 783 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 784 ; X64-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
 785 ; X64-NEXT:    retq # encoding: [0xc3]
 786   %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
 787   %2 = bitcast i16 %mask to <16 x i1>
 788   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
 789   ret <16 x i16> %3
 790 }
 791
 792 define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
 793 ; X86-LABEL: test_mask_packus_epi32_rm_256:
 794 ; X86:       # %bb.0:
 795 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 796 ; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00]
 797 ; X86-NEXT:    retl # encoding: [0xc3]
 798 ;
 799 ; X64-LABEL: test_mask_packus_epi32_rm_256:
 800 ; X64:       # %bb.0:
 801 ; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07]
 802 ; X64-NEXT:    retq # encoding: [0xc3]
 803   %b = load <8 x i32>, <8 x i32>* %ptr_b
 804   %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
 805   ret <16 x i16> %1
 806 }
 807
 808 define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
 809 ; X86-LABEL: test_mask_packus_epi32_rmk_256:
 810 ; X86:       # %bb.0:
 811 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 812 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 813 ; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08]
 814 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 815 ; X86-NEXT:    retl # encoding: [0xc3]
 816 ;
 817 ; X64-LABEL: test_mask_packus_epi32_rmk_256:
 818 ; X64:       # %bb.0:
 819 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 820 ; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f]
 821 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 822 ; X64-NEXT:    retq # encoding: [0xc3]
 823   %b = load <8 x i32>, <8 x i32>* %ptr_b
 824   %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
 825   %2 = bitcast i16 %mask to <16 x i1>
 826   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
 827   ret <16 x i16> %3
 828 }
 829
 830 define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
 831 ; X86-LABEL: test_mask_packus_epi32_rmkz_256:
 832 ; X86:       # %bb.0:
 833 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 834 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 835 ; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00]
 836 ; X86-NEXT:    retl # encoding: [0xc3]
 837 ;
 838 ; X64-LABEL: test_mask_packus_epi32_rmkz_256:
 839 ; X64:       # %bb.0:
 840 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 841 ; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07]
 842 ; X64-NEXT:    retq # encoding: [0xc3]
 843   %b = load <8 x i32>, <8 x i32>* %ptr_b
 844   %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
 845   %2 = bitcast i16 %mask to <16 x i1>
 846   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
 847   ret <16 x i16> %3
 848 }
 849
 850 define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
 851 ; X86-LABEL: test_mask_packus_epi32_rmb_256:
 852 ; X86:       # %bb.0:
 853 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 854 ; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00]
 855 ; X86-NEXT:    retl # encoding: [0xc3]
 856 ;
 857 ; X64-LABEL: test_mask_packus_epi32_rmb_256:
 858 ; X64:       # %bb.0:
 859 ; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07]
 860 ; X64-NEXT:    retq # encoding: [0xc3]
 861   %q = load i32, i32* %ptr_b
 862   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
 863   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
 864   %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
 865   ret <16 x i16> %1
 866 }
 867
 868 define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
 869 ; X86-LABEL: test_mask_packus_epi32_rmbk_256:
 870 ; X86:       # %bb.0:
 871 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 872 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 873 ; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08]
 874 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 875 ; X86-NEXT:    retl # encoding: [0xc3]
 876 ;
 877 ; X64-LABEL: test_mask_packus_epi32_rmbk_256:
 878 ; X64:       # %bb.0:
 879 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 880 ; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f]
 881 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 882 ; X64-NEXT:    retq # encoding: [0xc3]
 883   %q = load i32, i32* %ptr_b
 884   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
 885   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
 886   %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
 887   %2 = bitcast i16 %mask to <16 x i1>
 888   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
 889   ret <16 x i16> %3
 890 }
 891
 892 define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
 893 ; X86-LABEL: test_mask_packus_epi32_rmbkz_256:
 894 ; X86:       # %bb.0:
 895 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 896 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 897 ; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00]
 898 ; X86-NEXT:    retl # encoding: [0xc3]
 899 ;
 900 ; X64-LABEL: test_mask_packus_epi32_rmbkz_256:
 901 ; X64:       # %bb.0:
 902 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 903 ; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07]
 904 ; X64-NEXT:    retq # encoding: [0xc3]
 905   %q = load i32, i32* %ptr_b
 906   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
 907   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
 908   %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
 909   %2 = bitcast i16 %mask to <16 x i1>
 910   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
 911   ret <16 x i16> %3
 912 }
 913
 914 declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>)
 915
 916 define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
 917 ; CHECK-LABEL: test_mask_packus_epi16_rr_128:
 918 ; CHECK:       # %bb.0:
 919 ; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
 920 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 921   %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
 922   ret <16 x i8> %1
 923 }
 924
 925 define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
 926 ; X86-LABEL: test_mask_packus_epi16_rrk_128:
 927 ; X86:       # %bb.0:
 928 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 929 ; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
 930 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 931 ; X86-NEXT:    retl # encoding: [0xc3]
 932 ;
 933 ; X64-LABEL: test_mask_packus_epi16_rrk_128:
 934 ; X64:       # %bb.0:
 935 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 936 ; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
 937 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 938 ; X64-NEXT:    retq # encoding: [0xc3]
 939   %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
 940   %2 = bitcast i16 %mask to <16 x i1>
 941   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
 942   ret <16 x i8> %3
 943 }
 944
 945 define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
 946 ; X86-LABEL: test_mask_packus_epi16_rrkz_128:
 947 ; X86:       # %bb.0:
 948 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 949 ; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
 950 ; X86-NEXT:    retl # encoding: [0xc3]
 951 ;
 952 ; X64-LABEL: test_mask_packus_epi16_rrkz_128:
 953 ; X64:       # %bb.0:
 954 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 955 ; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
 956 ; X64-NEXT:    retq # encoding: [0xc3]
 957   %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
 958   %2 = bitcast i16 %mask to <16 x i1>
 959   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
 960   ret <16 x i8> %3
 961 }
 962
 963 define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
 964 ; X86-LABEL: test_mask_packus_epi16_rm_128:
 965 ; X86:       # %bb.0:
 966 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 967 ; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00]
 968 ; X86-NEXT:    retl # encoding: [0xc3]
 969 ;
 970 ; X64-LABEL: test_mask_packus_epi16_rm_128:
 971 ; X64:       # %bb.0:
 972 ; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07]
 973 ; X64-NEXT:    retq # encoding: [0xc3]
 974   %b = load <8 x i16>, <8 x i16>* %ptr_b
 975   %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
 976   ret <16 x i8> %1
 977 }
 978
 979 define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
 980 ; X86-LABEL: test_mask_packus_epi16_rmk_128:
 981 ; X86:       # %bb.0:
 982 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 983 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 984 ; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08]
 985 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 986 ; X86-NEXT:    retl # encoding: [0xc3]
 987 ;
 988 ; X64-LABEL: test_mask_packus_epi16_rmk_128:
 989 ; X64:       # %bb.0:
 990 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 991 ; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f]
 992 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 993 ; X64-NEXT:    retq # encoding: [0xc3]
 994   %b = load <8 x i16>, <8 x i16>* %ptr_b
 995   %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
 996   %2 = bitcast i16 %mask to <16 x i1>
 997   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
 998   ret <16 x i8> %3
 999 }
1000
1001 define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
1002 ; X86-LABEL: test_mask_packus_epi16_rmkz_128:
1003 ; X86:       # %bb.0:
1004 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1005 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1006 ; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00]
1007 ; X86-NEXT:    retl # encoding: [0xc3]
1008 ;
1009 ; X64-LABEL: test_mask_packus_epi16_rmkz_128:
1010 ; X64:       # %bb.0:
1011 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1012 ; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07]
1013 ; X64-NEXT:    retq # encoding: [0xc3]
1014   %b = load <8 x i16>, <8 x i16>* %ptr_b
1015   %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
1016   %2 = bitcast i16 %mask to <16 x i1>
1017   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
1018   ret <16 x i8> %3
1019 }
1020
1021 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
1022
1023 define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1024 ; CHECK-LABEL: test_mask_packus_epi16_rr_256:
1025 ; CHECK:       # %bb.0:
1026 ; CHECK-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
1027 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1028   %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1029   ret <32 x i8> %1
1030 }
1031
1032 define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
1033 ; X86-LABEL: test_mask_packus_epi16_rrk_256:
1034 ; X86:       # %bb.0:
1035 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1036 ; X86-NEXT:    vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
1037 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1038 ; X86-NEXT:    retl # encoding: [0xc3]
1039 ;
1040 ; X64-LABEL: test_mask_packus_epi16_rrk_256:
1041 ; X64:       # %bb.0:
1042 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1043 ; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
1044 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1045 ; X64-NEXT:    retq # encoding: [0xc3]
1046   %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1047   %2 = bitcast i32 %mask to <32 x i1>
1048   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
1049   ret <32 x i8> %3
1050 }
1051
1052 define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
1053 ; X86-LABEL: test_mask_packus_epi16_rrkz_256:
1054 ; X86:       # %bb.0:
1055 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1056 ; X86-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
1057 ; X86-NEXT:    retl # encoding: [0xc3]
1058 ;
1059 ; X64-LABEL: test_mask_packus_epi16_rrkz_256:
1060 ; X64:       # %bb.0:
1061 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1062 ; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
1063 ; X64-NEXT:    retq # encoding: [0xc3]
1064   %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1065   %2 = bitcast i32 %mask to <32 x i1>
1066   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
1067   ret <32 x i8> %3
1068 }
1069
1070 define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1071 ; X86-LABEL: test_mask_packus_epi16_rm_256:
1072 ; X86:       # %bb.0:
1073 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1074 ; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00]
1075 ; X86-NEXT:    retl # encoding: [0xc3]
1076 ;
1077 ; X64-LABEL: test_mask_packus_epi16_rm_256:
1078 ; X64:       # %bb.0:
1079 ; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07]
1080 ; X64-NEXT:    retq # encoding: [0xc3]
1081   %b = load <16 x i16>, <16 x i16>* %ptr_b
1082   %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1083   ret <32 x i8> %1
1084 }
1085
1086 define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
1087 ; X86-LABEL: test_mask_packus_epi16_rmk_256:
1088 ; X86:       # %bb.0:
1089 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1090 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1091 ; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08]
1092 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1093 ; X86-NEXT:    retl # encoding: [0xc3]
1094 ;
1095 ; X64-LABEL: test_mask_packus_epi16_rmk_256:
1096 ; X64:       # %bb.0:
1097 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1098 ; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f]
1099 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1100 ; X64-NEXT:    retq # encoding: [0xc3]
1101   %b = load <16 x i16>, <16 x i16>* %ptr_b
1102   %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1103   %2 = bitcast i32 %mask to <32 x i1>
1104   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
1105   ret <32 x i8> %3
1106 }
1107
1108 define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
1109 ; X86-LABEL: test_mask_packus_epi16_rmkz_256:
1110 ; X86:       # %bb.0:
1111 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1112 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1113 ; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00]
1114 ; X86-NEXT:    retl # encoding: [0xc3]
1115 ;
1116 ; X64-LABEL: test_mask_packus_epi16_rmkz_256:
1117 ; X64:       # %bb.0:
1118 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1119 ; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07]
1120 ; X64-NEXT:    retq # encoding: [0xc3]
1121   %b = load <16 x i16>, <16 x i16>* %ptr_b
1122   %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1123   %2 = bitcast i32 %mask to <32 x i1>
1124   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
1125   ret <32 x i8> %3
1126 }
1127
1128 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>)
1129
1130 define <8 x i16>@test_int_x86_avx512_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1131 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_128:
1132 ; CHECK:       # %bb.0:
1133 ; CHECK-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x75,0xc2]
1134 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1135   %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1136   ret <8 x i16> %1
1137 }
1138
1139 define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1140 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
1141 ; X86:       # %bb.0:
1142 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1143 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1144 ; X86-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
1145 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1146 ; X86-NEXT:    retl # encoding: [0xc3]
1147 ;
1148 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
1149 ; X64:       # %bb.0:
1150 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1151 ; X64-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
1152 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1153 ; X64-NEXT:    retq # encoding: [0xc3]
1154   %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1155   %2 = bitcast i8 %x3 to <8 x i1>
1156   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1
1157   ret <8 x i16> %3
1158 }
1159
1160 define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1161 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
1162 ; X86:       # %bb.0:
1163 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1164 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1165 ; X86-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
1166 ; X86-NEXT:    retl # encoding: [0xc3]
1167 ;
1168 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
1169 ; X64:       # %bb.0:
1170 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1171 ; X64-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
1172 ; X64-NEXT:    retq # encoding: [0xc3]
1173   %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1174   %2 = bitcast i8 %x3 to <8 x i1>
1175   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
1176   ret <8 x i16> %3
1177 }
1178
1179 define <16 x i16>@test_int_x86_avx512_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1180 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_256:
1181 ; CHECK:       # %bb.0:
1182 ; CHECK-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x75,0xc2]
1183 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1184   %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1185   ret <16 x i16> %1
1186 }
1187
1188 define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1189 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
1190 ; X86:       # %bb.0:
1191 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1192 ; X86-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
1193 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1194 ; X86-NEXT:    retl # encoding: [0xc3]
1195 ;
1196 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
1197 ; X64:       # %bb.0:
1198 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1199 ; X64-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
1200 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1201 ; X64-NEXT:    retq # encoding: [0xc3]
1202   %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1203   %2 = bitcast i16 %x3 to <16 x i1>
1204   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1
1205   ret <16 x i16> %3
1206 }
1207
1208 define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1209 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
1210 ; X86:       # %bb.0:
1211 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1212 ; X86-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
1213 ; X86-NEXT:    retl # encoding: [0xc3]
1214 ;
1215 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
1216 ; X64:       # %bb.0:
1217 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1218 ; X64-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
1219 ; X64-NEXT:    retq # encoding: [0xc3]
1220   %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1221   %2 = bitcast i16 %x3 to <16 x i1>
1222   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
1223   ret <16 x i16> %3
1224 }
1225
1226 declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>)
1227
1228 define <8 x i16>@test_int_x86_avx512_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1229 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_128:
1230 ; CHECK:       # %bb.0:
1231 ; CHECK-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xc2]
1232 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1233   %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
1234   ret <8 x i16> %1
1235 }
1236
1237 define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1238 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
1239 ; X86:       # %bb.0:
1240 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1241 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1242 ; X86-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
1243 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1244 ; X86-NEXT:    retl # encoding: [0xc3]
1245 ;
1246 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
1247 ; X64:       # %bb.0:
1248 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1249 ; X64-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
1250 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1251 ; X64-NEXT:    retq # encoding: [0xc3]
1252   %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
1253   %2 = bitcast i8 %x3 to <8 x i1>
1254   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1
1255   ret <8 x i16> %3
1256 }
1257
1258 declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>)
1259
1260 define <16 x i16>@test_int_x86_avx512_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1261 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_256:
1262 ; CHECK:       # %bb.0:
1263 ; CHECK-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xc2]
1264 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1265   %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
1266   ret <16 x i16> %1
1267 }
1268
1269 define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1270 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
1271 ; X86:       # %bb.0:
1272 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1273 ; X86-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
1274 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1275 ; X86-NEXT:    retl # encoding: [0xc3]
1276 ;
1277 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
1278 ; X64:       # %bb.0:
1279 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1280 ; X64-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
1281 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1282 ; X64-NEXT:    retq # encoding: [0xc3]
1283   %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
1284   %2 = bitcast i16 %x3 to <16 x i1>
1285   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1
1286   ret <16 x i16> %3
1287 }
1288
1289 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>)
1290
1291 define <16 x i8> @test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
1292 ; X86-LABEL: test_int_x86_avx512_mask_pavg_b_128:
1293 ; X86:       # %bb.0:
1294 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1295 ; X86-NEXT:    vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
1296 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1297 ; X86-NEXT:    retl # encoding: [0xc3]
1298 ;
1299 ; X64-LABEL: test_int_x86_avx512_mask_pavg_b_128:
1300 ; X64:       # %bb.0:
1301 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1302 ; X64-NEXT:    vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
1303 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1304 ; X64-NEXT:    retq # encoding: [0xc3]
1305   %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %x0, <16 x i8> %x1)
1306   %2 = bitcast i16 %x3 to <16 x i1>
1307   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x2
1308   ret <16 x i8> %3
1309 }
1310
1311 declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>)
1312
1313 define <32 x i8> @test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
1314 ; X86-LABEL: test_int_x86_avx512_mask_pavg_b_256:
1315 ; X86:       # %bb.0:
1316 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1317 ; X86-NEXT:    vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
1318 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1319 ; X86-NEXT:    retl # encoding: [0xc3]
1320 ;
1321 ; X64-LABEL: test_int_x86_avx512_mask_pavg_b_256:
1322 ; X64:       # %bb.0:
1323 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1324 ; X64-NEXT:    vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
1325 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1326 ; X64-NEXT:    retq # encoding: [0xc3]
1327   %1 = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %x0, <32 x i8> %x1)
1328   %2 = bitcast i32 %x3 to <32 x i1>
1329   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x2
1330   ret <32 x i8> %3
1331 }
1332
1333 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>)
1334
1335 define <8 x i16> @test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1336 ; X86-LABEL: test_int_x86_avx512_mask_pavg_w_128:
1337 ; X86:       # %bb.0:
1338 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1339 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1340 ; X86-NEXT:    vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
1341 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1342 ; X86-NEXT:    retl # encoding: [0xc3]
1343 ;
1344 ; X64-LABEL: test_int_x86_avx512_mask_pavg_w_128:
1345 ; X64:       # %bb.0:
1346 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1347 ; X64-NEXT:    vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
1348 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1349 ; X64-NEXT:    retq # encoding: [0xc3]
1350   %1 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %x0, <8 x i16> %x1)
1351   %2 = bitcast i8 %x3 to <8 x i1>
1352   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1353   ret <8 x i16> %3
1354 }
1355
1356 declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>)
1357
1358 define <16 x i16> @test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1359 ; X86-LABEL: test_int_x86_avx512_mask_pavg_w_256:
1360 ; X86:       # %bb.0:
1361 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1362 ; X86-NEXT:    vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
1363 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1364 ; X86-NEXT:    retl # encoding: [0xc3]
1365 ;
1366 ; X64-LABEL: test_int_x86_avx512_mask_pavg_w_256:
1367 ; X64:       # %bb.0:
1368 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1369 ; X64-NEXT:    vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
1370 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1371 ; X64-NEXT:    retq # encoding: [0xc3]
1372   %1 = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %x0, <16 x i16> %x1)
1373   %2 = bitcast i16 %x3 to <16 x i1>
1374   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1375   ret <16 x i16> %3
1376 }
1377
1378 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>)
1379
1380 define <8 x i16> @test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1381 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
1382 ; X86:       # %bb.0:
1383 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1384 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1385 ; X86-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
1386 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1387 ; X86-NEXT:    retl # encoding: [0xc3]
1388 ;
1389 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
1390 ; X64:       # %bb.0:
1391 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1392 ; X64-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
1393 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1394 ; X64-NEXT:    retq # encoding: [0xc3]
1395   %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %x0, <8 x i16> %x1)
1396   %2 = bitcast i8 %x3 to <8 x i1>
1397   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1398   ret <8 x i16> %3
1399 }
1400
1401 declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>)
1402
1403 define <16 x i16> @test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1404 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
1405 ; X86:       # %bb.0:
1406 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1407 ; X86-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
1408 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1409 ; X86-NEXT:    retl # encoding: [0xc3]
1410 ;
1411 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
1412 ; X64:       # %bb.0:
1413 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1414 ; X64-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
1415 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1416 ; X64-NEXT:    retq # encoding: [0xc3]
1417   %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %x0, <16 x i16> %x1)
1418   %2 = bitcast i16 %x3 to <16 x i1>
1419   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1420   ret <16 x i16> %3
1421 }
1422
1423 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>)
1424
1425 define <8 x i16> @test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1426 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
1427 ; X86:       # %bb.0:
1428 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1429 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1430 ; X86-NEXT:    vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
1431 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1432 ; X86-NEXT:    retl # encoding: [0xc3]
1433 ;
1434 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
1435 ; X64:       # %bb.0:
1436 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1437 ; X64-NEXT:    vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
1438 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1439 ; X64-NEXT:    retq # encoding: [0xc3]
1440   %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %x0, <8 x i16> %x1)
1441   %2 = bitcast i8 %x3 to <8 x i1>
1442   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1443   ret <8 x i16> %3
1444 }
1445
1446 declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>)
1447
1448 define <16 x i16> @test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1449 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
1450 ; X86:       # %bb.0:
1451 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1452 ; X86-NEXT:    vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
1453 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1454 ; X86-NEXT:    retl # encoding: [0xc3]
1455 ;
1456 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
1457 ; X64:       # %bb.0:
1458 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1459 ; X64-NEXT:    vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
1460 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1461 ; X64-NEXT:    retq # encoding: [0xc3]
1462   %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %x0, <16 x i16> %x1)
1463   %2 = bitcast i16 %x3 to <16 x i1>
1464   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1465   ret <16 x i16> %3
1466 }
1467
1468 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>)
1469
1470 define <8 x i16> @test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1471 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
1472 ; X86:       # %bb.0:
1473 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1474 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1475 ; X86-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
1476 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1477 ; X86-NEXT:    retl # encoding: [0xc3]
1478 ;
1479 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
1480 ; X64:       # %bb.0:
1481 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1482 ; X64-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
1483 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1484 ; X64-NEXT:    retq # encoding: [0xc3]
1485   %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1)
1486   %2 = bitcast i8 %x3 to <8 x i1>
1487   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1488   ret <8 x i16> %3
1489 }
1490
1491 declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>)
1492
1493 define <16 x i16> @test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1494 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
1495 ; X86:       # %bb.0:
1496 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1497 ; X86-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
1498 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1499 ; X86-NEXT:    retl # encoding: [0xc3]
1500 ;
1501 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
1502 ; X64:       # %bb.0:
1503 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1504 ; X64-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
1505 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1506 ; X64-NEXT:    retq # encoding: [0xc3]
1507   %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %x0, <16 x i16> %x1)
1508   %2 = bitcast i16 %x3 to <16 x i1>
1509   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1510   ret <16 x i16> %3
1511 }
1512
1513 declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8)
1514
1515 define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
1516 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
1517 ; X86:       # %bb.0:
1518 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1519 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1520 ; X86-NEXT:    vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2]
1521 ; X86-NEXT:    vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1]
1522 ; X86-NEXT:    vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0]
1523 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1524 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1525 ; X86-NEXT:    retl # encoding: [0xc3]
1526 ;
1527 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
1528 ; X64:       # %bb.0:
1529 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1530 ; X64-NEXT:    vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2]
1531 ; X64-NEXT:    vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1]
1532 ; X64-NEXT:    vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0]
1533 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1534 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1535 ; X64-NEXT:    retq # encoding: [0xc3]
1536     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
1537     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
1538     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
1539     %res3 = add <16 x i8> %res0, %res1
1540     %res4 = add <16 x i8> %res3, %res2
1541     ret <16 x i8> %res4
1542 }
1543
1544 declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8)
1545
1546 define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
1547 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
1548 ; X86:       # %bb.0:
1549 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1550 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1551 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1552 ; X86-NEXT:    vpmovwb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x00]
1553 ; X86-NEXT:    vpmovwb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x00]
1554 ; X86-NEXT:    retl # encoding: [0xc3]
1555 ;
1556 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
1557 ; X64:       # %bb.0:
1558 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1559 ; X64-NEXT:    vpmovwb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x07]
1560 ; X64-NEXT:    vpmovwb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x07]
1561 ; X64-NEXT:    retq # encoding: [0xc3]
1562     call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
1563     call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
1564     ret void
1565 }
1566
1567 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8)
1568
1569 define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
1570 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
1571 ; X86:       # %bb.0:
1572 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1573 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1574 ; X86-NEXT:    vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2]
1575 ; X86-NEXT:    vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1]
1576 ; X86-NEXT:    vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0]
1577 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1578 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1579 ; X86-NEXT:    retl # encoding: [0xc3]
1580 ;
1581 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
1582 ; X64:       # %bb.0:
1583 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1584 ; X64-NEXT:    vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2]
1585 ; X64-NEXT:    vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1]
1586 ; X64-NEXT:    vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0]
1587 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1588 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1589 ; X64-NEXT:    retq # encoding: [0xc3]
1590     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
1591     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
1592     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
1593     %res3 = add <16 x i8> %res0, %res1
1594     %res4 = add <16 x i8> %res3, %res2
1595     ret <16 x i8> %res4
1596 }
1597
1598 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8)
1599
1600 define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
1601 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
1602 ; X86:       # %bb.0:
1603 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1604 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1605 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1606 ; X86-NEXT:    vpmovswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x00]
1607 ; X86-NEXT:    vpmovswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x00]
1608 ; X86-NEXT:    retl # encoding: [0xc3]
1609 ;
1610 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
1611 ; X64:       # %bb.0:
1612 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1613 ; X64-NEXT:    vpmovswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07]
1614 ; X64-NEXT:    vpmovswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07]
1615 ; X64-NEXT:    retq # encoding: [0xc3]
1616     call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
1617     call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
1618     ret void
1619 }
1620
1621 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8)
1622
1623 define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
1624 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
1625 ; X86:       # %bb.0:
1626 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1627 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1628 ; X86-NEXT:    vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2]
1629 ; X86-NEXT:    vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1]
1630 ; X86-NEXT:    vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0]
1631 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1632 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1633 ; X86-NEXT:    retl # encoding: [0xc3]
1634 ;
1635 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
1636 ; X64:       # %bb.0:
1637 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1638 ; X64-NEXT:    vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2]
1639 ; X64-NEXT:    vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1]
1640 ; X64-NEXT:    vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0]
1641 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1642 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1643 ; X64-NEXT:    retq # encoding: [0xc3]
1644     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
1645     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
1646     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
1647     %res3 = add <16 x i8> %res0, %res1
1648     %res4 = add <16 x i8> %res3, %res2
1649     ret <16 x i8> %res4
1650 }
1651
1652 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8)
1653
1654 define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
1655 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
1656 ; X86:       # %bb.0:
1657 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1658 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1659 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1660 ; X86-NEXT:    vpmovuswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x00]
1661 ; X86-NEXT:    vpmovuswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x00]
1662 ; X86-NEXT:    retl # encoding: [0xc3]
1663 ;
1664 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
1665 ; X64:       # %bb.0:
1666 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1667 ; X64-NEXT:    vpmovuswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07]
1668 ; X64-NEXT:    vpmovuswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07]
1669 ; X64-NEXT:    retq # encoding: [0xc3]
1670     call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
1671     call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
1672     ret void
1673 }
1674
1675 define <16 x i8>@test_int_x86_avx512_pmov_wb_256(<16 x i16> %x0) {
1676 ; CHECK-LABEL: test_int_x86_avx512_pmov_wb_256:
1677 ; CHECK:       # %bb.0:
1678 ; CHECK-NEXT:    vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0]
1679 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1680 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1681   %1 = trunc <16 x i16> %x0 to <16 x i8>
1682   ret <16 x i8> %1
1683 }
1684
1685 define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1686 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
1687 ; X86:       # %bb.0:
1688 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1689 ; X86-NEXT:    vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
1690 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1691 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1692 ; X86-NEXT:    retl # encoding: [0xc3]
1693 ;
1694 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
1695 ; X64:       # %bb.0:
1696 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1697 ; X64-NEXT:    vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
1698 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1699 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1700 ; X64-NEXT:    retq # encoding: [0xc3]
1701   %1 = trunc <16 x i16> %x0 to <16 x i8>
1702   %2 = bitcast i16 %x2 to <16 x i1>
1703   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x1
1704   ret <16 x i8> %3
1705 }
1706
1707 define <16 x i8>@test_int_x86_avx512_maskz_pmov_wb_256(<16 x i16> %x0, i16 %x2) {
1708 ; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
1709 ; X86:       # %bb.0:
1710 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1711 ; X86-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
1712 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1713 ; X86-NEXT:    retl # encoding: [0xc3]
1714 ;
1715 ; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
1716 ; X64:       # %bb.0:
1717 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1718 ; X64-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
1719 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1720 ; X64-NEXT:    retq # encoding: [0xc3]
1721   %1 = trunc <16 x i16> %x0 to <16 x i8>
1722   %2 = bitcast i16 %x2 to <16 x i1>
1723   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
1724   ret <16 x i8> %3
1725 }
1726
1727 declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16)
1728
1729 define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
1730 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
1731 ; X86:       # %bb.0:
1732 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1733 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1734 ; X86-NEXT:    vpmovwb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x00]
1735 ; X86-NEXT:    vpmovwb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x00]
1736 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1737 ; X86-NEXT:    retl # encoding: [0xc3]
1738 ;
1739 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
1740 ; X64:       # %bb.0:
1741 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1742 ; X64-NEXT:    vpmovwb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x07]
1743 ; X64-NEXT:    vpmovwb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x07]
1744 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1745 ; X64-NEXT:    retq # encoding: [0xc3]
1746     call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
1747     call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
1748     ret void
1749 }
1750
1751 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16)
1752
1753 define <16 x i8>@test_int_x86_avx512_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1) {
1754 ; CHECK-LABEL: test_int_x86_avx512_pmovs_wb_256:
1755 ; CHECK:       # %bb.0:
1756 ; CHECK-NEXT:    vpmovswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x20,0xc0]
1757 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1758 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1759   %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
1760   ret <16 x i8> %res
1761 }
1762
1763 define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1764 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
1765 ; X86:       # %bb.0:
1766 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1767 ; X86-NEXT:    vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1]
1768 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1769 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1770 ; X86-NEXT:    retl # encoding: [0xc3]
1771 ;
1772 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
1773 ; X64:       # %bb.0:
1774 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1775 ; X64-NEXT:    vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1]
1776 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1777 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1778 ; X64-NEXT:    retq # encoding: [0xc3]
1779   %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
1780   ret <16 x i8> %res
1781 }
1782
1783 define <16 x i8>@test_int_x86_avx512_maskz_pmovs_wb_256(<16 x i16> %x0, i16 %x2) {
1784 ; X86-LABEL: test_int_x86_avx512_maskz_pmovs_wb_256:
1785 ; X86:       # %bb.0:
1786 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1787 ; X86-NEXT:    vpmovswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc0]
1788 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1789 ; X86-NEXT:    retl # encoding: [0xc3]
1790 ;
1791 ; X64-LABEL: test_int_x86_avx512_maskz_pmovs_wb_256:
1792 ; X64:       # %bb.0:
1793 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1794 ; X64-NEXT:    vpmovswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc0]
1795 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1796 ; X64-NEXT:    retq # encoding: [0xc3]
1797   %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
1798   ret <16 x i8> %res
1799 }
1800
1801 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16)
1802
1803 define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
1804 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
1805 ; X86:       # %bb.0:
1806 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1807 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1808 ; X86-NEXT:    vpmovswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x00]
1809 ; X86-NEXT:    vpmovswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x00]
1810 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1811 ; X86-NEXT:    retl # encoding: [0xc3]
1812 ;
1813 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
1814 ; X64:       # %bb.0:
1815 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1816 ; X64-NEXT:    vpmovswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07]
1817 ; X64-NEXT:    vpmovswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07]
1818 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1819 ; X64-NEXT:    retq # encoding: [0xc3]
1820     call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
1821     call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
1822     ret void
1823 }
1824
1825 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16)
1826
1827 define <16 x i8>@test_int_x86_avx512_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1828 ; CHECK-LABEL: test_int_x86_avx512_pmovus_wb_256:
1829 ; CHECK:       # %bb.0:
1830 ; CHECK-NEXT:    vpmovuswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x10,0xc0]
1831 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1832 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1833   %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
1834   ret <16 x i8> %res
1835 }
1836
1837 define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1838 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
1839 ; X86:       # %bb.0:
1840 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1841 ; X86-NEXT:    vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1]
1842 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1843 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1844 ; X86-NEXT:    retl # encoding: [0xc3]
1845 ;
1846 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
1847 ; X64:       # %bb.0:
1848 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1849 ; X64-NEXT:    vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1]
1850 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1851 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1852 ; X64-NEXT:    retq # encoding: [0xc3]
1853   %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
1854   ret <16 x i8> %res
1855 }
1856
1857 define <16 x i8>@test_int_x86_avx512_maskz_pmovus_wb_256(<16 x i16> %x0, i16 %x2) {
1858 ; X86-LABEL: test_int_x86_avx512_maskz_pmovus_wb_256:
1859 ; X86:       # %bb.0:
1860 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1861 ; X86-NEXT:    vpmovuswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc0]
1862 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1863 ; X86-NEXT:    retl # encoding: [0xc3]
1864 ;
1865 ; X64-LABEL: test_int_x86_avx512_maskz_pmovus_wb_256:
1866 ; X64:       # %bb.0:
1867 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1868 ; X64-NEXT:    vpmovuswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc0]
1869 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1870 ; X64-NEXT:    retq # encoding: [0xc3]
1871   %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
1872   ret <16 x i8> %res
1873 }
1874
1875 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16)
1876
1877 define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
1878 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
1879 ; X86:       # %bb.0:
1880 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1881 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1882 ; X86-NEXT:    vpmovuswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x00]
1883 ; X86-NEXT:    vpmovuswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x00]
1884 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1885 ; X86-NEXT:    retl # encoding: [0xc3]
1886 ;
1887 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
1888 ; X64:       # %bb.0:
1889 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1890 ; X64-NEXT:    vpmovuswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07]
1891 ; X64-NEXT:    vpmovuswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07]
1892 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1893 ; X64-NEXT:    retq # encoding: [0xc3]
1894     call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
1895     call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
1896     ret void
1897 }
1898
1899 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
1900
1901 define <4 x i32> @test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
1902 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
1903 ; X86:       # %bb.0:
1904 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1905 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1906 ; X86-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
1907 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1908 ; X86-NEXT:    retl # encoding: [0xc3]
1909 ;
1910 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
1911 ; X64:       # %bb.0:
1912 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1913 ; X64-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
1914 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1915 ; X64-NEXT:    retq # encoding: [0xc3]
1916   %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %x0, <8 x i16> %x1)
1917   %2 = bitcast i8 %x3 to <8 x i1>
1918   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1919   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x2
1920   ret <4 x i32> %3
1921 }
1922
1923 declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
1924
1925 define <8 x i32> @test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
1926 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
1927 ; X86:       # %bb.0:
1928 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1929 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1930 ; X86-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
1931 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1932 ; X86-NEXT:    retl # encoding: [0xc3]
1933 ;
1934 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
1935 ; X64:       # %bb.0:
1936 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1937 ; X64-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
1938 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1939 ; X64-NEXT:    retq # encoding: [0xc3]
1940   %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %x0, <16 x i16> %x1)
1941   %2 = bitcast i8 %x3 to <8 x i1>
1942   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
1943   ret <8 x i32> %3
1944 }
1945
1946 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
1947
1948 define <8 x i16> @test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
1949 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
1950 ; X86:       # %bb.0:
1951 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1952 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1953 ; X86-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
1954 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1955 ; X86-NEXT:    retl # encoding: [0xc3]
1956 ;
1957 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
1958 ; X64:       # %bb.0:
1959 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1960 ; X64-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
1961 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1962 ; X64-NEXT:    retq # encoding: [0xc3]
1963   %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %x0, <16 x i8> %x1)
1964   %2 = bitcast i8 %x3 to <8 x i1>
1965   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1966   ret <8 x i16> %3
1967 }
1968
1969 declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
1970
1971 define <16 x i16> @test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
1972 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
1973 ; X86:       # %bb.0:
1974 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1975 ; X86-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
1976 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1977 ; X86-NEXT:    retl # encoding: [0xc3]
1978 ;
1979 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
1980 ; X64:       # %bb.0:
1981 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1982 ; X64-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
1983 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1984 ; X64-NEXT:    retq # encoding: [0xc3]
1985   %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %x0, <32 x i8> %x1)
1986   %2 = bitcast i16 %x3 to <16 x i1>
1987   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1988   ret <16 x i16> %3
1989 }
1990
1991 declare <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8>, <16 x i8>, i32)
1992
1993 define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
1994 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
1995 ; X86:       # %bb.0:
1996 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1997 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1998 ; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]
1999 ; X86-NEXT:    vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
2000 ; X86-NEXT:    vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04]
2001 ; X86-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
2002 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
2003 ; X86-NEXT:    retl # encoding: [0xc3]
2004 ;
2005 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
2006 ; X64:       # %bb.0:
2007 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2008 ; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]
2009 ; X64-NEXT:    vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
2010 ; X64-NEXT:    vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04]
2011 ; X64-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
2012 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
2013 ; X64-NEXT:    retq # encoding: [0xc3]
2014   %1 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2)
2015   %2 = bitcast i8 %x4 to <8 x i1>
2016   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
2017   %4 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3)
2018   %5 = bitcast i8 %x4 to <8 x i1>
2019   %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
2020   %7 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4)
2021   %res3 = add <8 x i16> %3, %6
2022   %res4 = add <8 x i16> %7, %res3
2023   ret <8 x i16> %res4
2024 }
2025
2026 declare <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8>, <32 x i8>, i32)
2027
2028 define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
2029 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
2030 ; X86:       # %bb.0:
2031 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2032 ; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]
2033 ; X86-NEXT:    vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
2034 ; X86-NEXT:    vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04]
2035 ; X86-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
2036 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
2037 ; X86-NEXT:    retl # encoding: [0xc3]
2038 ;
2039 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
2040 ; X64:       # %bb.0:
2041 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2042 ; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]
2043 ; X64-NEXT:    vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
2044 ; X64-NEXT:    vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04]
2045 ; X64-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
2046 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
2047 ; X64-NEXT:    retq # encoding: [0xc3]
2048   %1 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2)
2049   %2 = bitcast i16 %x4 to <16 x i1>
2050   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
2051   %4 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3)
2052   %5 = bitcast i16 %x4 to <16 x i1>
2053   %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
2054   %7 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4)
2055   %res3 = add <16 x i16> %3, %6
2056   %res4 = add <16 x i16> %res3, %7
2057   ret <16 x i16> %res4
2058 }
2059
2060 declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2061
2062 define <16 x i16>@test_int_x86_avx512_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
2063 ; CHECK-LABEL: test_int_x86_avx512_psrlv16_hi:
2064 ; CHECK:       # %bb.0:
2065 ; CHECK-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1]
2066 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2067   %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
2068   ret <16 x i16> %res
2069 }
2070
2071 define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2072 ; X86-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
2073 ; X86:       # %bb.0:
2074 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2075 ; X86-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
2076 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2077 ; X86-NEXT:    retl # encoding: [0xc3]
2078 ;
2079 ; X64-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
2080 ; X64:       # %bb.0:
2081 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2082 ; X64-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
2083 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2084 ; X64-NEXT:    retq # encoding: [0xc3]
2085   %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
2086   ret <16 x i16> %res
2087 }
2088
2089 define <16 x i16>@test_int_x86_avx512_maskz_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2090 ; X86-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
2091 ; X86:       # %bb.0:
2092 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2093 ; X86-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
2094 ; X86-NEXT:    retl # encoding: [0xc3]
2095 ;
2096 ; X64-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
2097 ; X64:       # %bb.0:
2098 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2099 ; X64-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
2100 ; X64-NEXT:    retq # encoding: [0xc3]
2101   %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
2102   ret <16 x i16> %res
2103 }
2104
2105 declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2106
2107 define <8 x i16>@test_int_x86_avx512_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2108 ; CHECK-LABEL: test_int_x86_avx512_psrlv8_hi:
2109 ; CHECK:       # %bb.0:
2110 ; CHECK-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1]
2111 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2112   %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2113   ret <8 x i16> %res
2114 }
2115
2116 define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2117 ; X86-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
2118 ; X86:       # %bb.0:
2119 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2120 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2121 ; X86-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
2122 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2123 ; X86-NEXT:    retl # encoding: [0xc3]
2124 ;
2125 ; X64-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
2126 ; X64:       # %bb.0:
2127 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2128 ; X64-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
2129 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2130 ; X64-NEXT:    retq # encoding: [0xc3]
2131   %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2132   ret <8 x i16> %res
2133 }
2134
2135 define <8 x i16>@test_int_x86_avx512_maskz_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2136 ; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
2137 ; X86:       # %bb.0:
2138 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2139 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2140 ; X86-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
2141 ; X86-NEXT:    retl # encoding: [0xc3]
2142 ;
2143 ; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
2144 ; X64:       # %bb.0:
2145 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2146 ; X64-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
2147 ; X64-NEXT:    retq # encoding: [0xc3]
2148   %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
2149   ret <8 x i16> %res
2150 }
2151
2152
2153 define <8 x i16> @test_int_x86_avx512_psrlv_w_128_const() optsize {
2154 ; X86-LABEL: test_int_x86_avx512_psrlv_w_128_const:
2155 ; X86:       # %bb.0:
2156 ; X86-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2157 ; X86-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2158 ; X86-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2159 ; X86-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A]
2160 ; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2161 ; X86-NEXT:    retl # encoding: [0xc3]
2162 ;
2163 ; X64-LABEL: test_int_x86_avx512_psrlv_w_128_const:
2164 ; X64:       # %bb.0:
2165 ; X64-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2166 ; X64-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2167 ; X64-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2168 ; X64-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A]
2169 ; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2170 ; X64-NEXT:    retq # encoding: [0xc3]
2171   %res = call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2172   ret <8 x i16> %res
2173 }
2174
2175 declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>)
2176
2177 define <16 x i16> @test_int_x86_avx512_psrlv_w_256_const() optsize {
2178 ; X86-LABEL: test_int_x86_avx512_psrlv_w_256_const:
2179 ; X86:       # %bb.0:
2180 ; X86-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2181 ; X86-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2182 ; X86-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2183 ; X86-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A]
2184 ; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2185 ; X86-NEXT:    retl # encoding: [0xc3]
2186 ;
2187 ; X64-LABEL: test_int_x86_avx512_psrlv_w_256_const:
2188 ; X64:       # %bb.0:
2189 ; X64-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2190 ; X64-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2191 ; X64-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2192 ; X64-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A]
2193 ; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2194 ; X64-NEXT:    retq # encoding: [0xc3]
2195   %res = call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2196   ret <16 x i16> %res
2197 }
2198
2199 declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>)
2200
2201 declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>)
2202
2203 define <16 x i16>@test_int_x86_avx512_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1) {
2204 ; CHECK-LABEL: test_int_x86_avx512_psrav16_hi:
2205 ; CHECK:       # %bb.0:
2206 ; CHECK-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1]
2207 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2208   %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1)
2209   ret <16 x i16> %1
2210 }
2211
2212 define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2213 ; X86-LABEL: test_int_x86_avx512_mask_psrav16_hi:
2214 ; X86:       # %bb.0:
2215 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2216 ; X86-NEXT:    vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
2217 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2218 ; X86-NEXT:    retl # encoding: [0xc3]
2219 ;
2220 ; X64-LABEL: test_int_x86_avx512_mask_psrav16_hi:
2221 ; X64:       # %bb.0:
2222 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2223 ; X64-NEXT:    vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
2224 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2225 ; X64-NEXT:    retq # encoding: [0xc3]
2226   %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1)
2227   %2 = bitcast i16 %x3 to <16 x i1>
2228   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
2229   ret <16 x i16> %3
2230 }
2231
2232 define <16 x i16>@test_int_x86_avx512_maskz_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2233 ; X86-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
2234 ; X86:       # %bb.0:
2235 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2236 ; X86-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
2237 ; X86-NEXT:    retl # encoding: [0xc3]
2238 ;
2239 ; X64-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
2240 ; X64:       # %bb.0:
2241 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2242 ; X64-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
2243 ; X64-NEXT:    retq # encoding: [0xc3]
2244   %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1)
2245   %2 = bitcast i16 %x3 to <16 x i1>
2246   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
2247   ret <16 x i16> %3
2248 }
2249
2250 declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>)
2251
2252 define <8 x i16>@test_int_x86_avx512_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1) {
2253 ; CHECK-LABEL: test_int_x86_avx512_psrav8_hi:
2254 ; CHECK:       # %bb.0:
2255 ; CHECK-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1]
2256 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2257   %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1)
2258   ret <8 x i16> %1
2259 }
2260
2261 define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2262 ; X86-LABEL: test_int_x86_avx512_mask_psrav8_hi:
2263 ; X86:       # %bb.0:
2264 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2265 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2266 ; X86-NEXT:    vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
2267 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2268 ; X86-NEXT:    retl # encoding: [0xc3]
2269 ;
2270 ; X64-LABEL: test_int_x86_avx512_mask_psrav8_hi:
2271 ; X64:       # %bb.0:
2272 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2273 ; X64-NEXT:    vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
2274 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2275 ; X64-NEXT:    retq # encoding: [0xc3]
2276   %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1)
2277   %2 = bitcast i8 %x3 to <8 x i1>
2278   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
2279   ret <8 x i16> %3
2280 }
2281
2282 define <8 x i16>@test_int_x86_avx512_maskz_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2283 ; X86-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
2284 ; X86:       # %bb.0:
2285 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2286 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2287 ; X86-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
2288 ; X86-NEXT:    retl # encoding: [0xc3]
2289 ;
2290 ; X64-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
2291 ; X64:       # %bb.0:
2292 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2293 ; X64-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
2294 ; X64-NEXT:    retq # encoding: [0xc3]
2295   %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1)
2296   %2 = bitcast i8 %x3 to <8 x i1>
2297   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
2298   ret <8 x i16> %3
2299 }
2300
2301 define <16 x i16>@test_int_x86_avx512_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1) {
2302 ; CHECK-LABEL: test_int_x86_avx512_psllv16_hi:
2303 ; CHECK:       # %bb.0:
2304 ; CHECK-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1]
2305 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2306   %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1)
2307   ret <16 x i16> %1
2308 }
2309
2310 define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2311 ; X86-LABEL: test_int_x86_avx512_mask_psllv16_hi:
2312 ; X86:       # %bb.0:
2313 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2314 ; X86-NEXT:    vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
2315 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2316 ; X86-NEXT:    retl # encoding: [0xc3]
2317 ;
2318 ; X64-LABEL: test_int_x86_avx512_mask_psllv16_hi:
2319 ; X64:       # %bb.0:
2320 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2321 ; X64-NEXT:    vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
2322 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2323 ; X64-NEXT:    retq # encoding: [0xc3]
2324   %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1)
2325   %2 = bitcast i16 %x3 to <16 x i1>
2326   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
2327   ret <16 x i16> %3
2328 }
2329
2330 define <16 x i16>@test_int_x86_avx512_maskz_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2331 ; X86-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
2332 ; X86:       # %bb.0:
2333 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2334 ; X86-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
2335 ; X86-NEXT:    retl # encoding: [0xc3]
2336 ;
2337 ; X64-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
2338 ; X64:       # %bb.0:
2339 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2340 ; X64-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
2341 ; X64-NEXT:    retq # encoding: [0xc3]
2342   %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1)
2343   %2 = bitcast i16 %x3 to <16 x i1>
2344   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
2345   ret <16 x i16> %3
2346 }
2347
2348 define <8 x i16>@test_int_x86_avx512_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1) {
2349 ; CHECK-LABEL: test_int_x86_avx512_psllv8_hi:
2350 ; CHECK:       # %bb.0:
2351 ; CHECK-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1]
2352 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2353   %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1)
2354   ret <8 x i16> %1
2355 }
2356
2357 define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2358 ; X86-LABEL: test_int_x86_avx512_mask_psllv8_hi:
2359 ; X86:       # %bb.0:
2360 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2361 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2362 ; X86-NEXT:    vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
2363 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2364 ; X86-NEXT:    retl # encoding: [0xc3]
2365 ;
2366 ; X64-LABEL: test_int_x86_avx512_mask_psllv8_hi:
2367 ; X64:       # %bb.0:
2368 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2369 ; X64-NEXT:    vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
2370 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2371 ; X64-NEXT:    retq # encoding: [0xc3]
2372   %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1)
2373   %2 = bitcast i8 %x3 to <8 x i1>
2374   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
2375   ret <8 x i16> %3
2376 }
2377
2378 define <8 x i16>@test_int_x86_avx512_maskz_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2379 ; X86-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
2380 ; X86:       # %bb.0:
2381 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2382 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2383 ; X86-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
2384 ; X86-NEXT:    retl # encoding: [0xc3]
2385 ;
2386 ; X64-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
2387 ; X64:       # %bb.0:
2388 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2389 ; X64-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
2390 ; X64-NEXT:    retq # encoding: [0xc3]
2391   %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1)
2392   %2 = bitcast i8 %x3 to <8 x i1>
2393   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
2394   ret <8 x i16> %3
2395 }
2396
2397 define <8 x i16> @test_int_x86_avx512_psllv_w_128_const() optsize {
2398 ; X86-LABEL: test_int_x86_avx512_psllv_w_128_const:
2399 ; X86:       # %bb.0:
2400 ; X86-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2401 ; X86-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2402 ; X86-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2403 ; X86-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A]
2404 ; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2405 ; X86-NEXT:    retl # encoding: [0xc3]
2406 ;
2407 ; X64-LABEL: test_int_x86_avx512_psllv_w_128_const:
2408 ; X64:       # %bb.0:
2409 ; X64-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2410 ; X64-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2411 ; X64-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2412 ; X64-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A]
2413 ; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2414 ; X64-NEXT:    retq # encoding: [0xc3]
2415   %res = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2416   ret <8 x i16> %res
2417 }
2418
2419 declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>)
2420
2421
2422 define <16 x i16> @test_int_x86_avx512_psllv_w_256_const() optsize {
2423 ; X86-LABEL: test_int_x86_avx512_psllv_w_256_const:
2424 ; X86:       # %bb.0:
2425 ; X86-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2426 ; X86-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2427 ; X86-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2428 ; X86-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A]
2429 ; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2430 ; X86-NEXT:    retl # encoding: [0xc3]
2431 ;
2432 ; X64-LABEL: test_int_x86_avx512_psllv_w_256_const:
2433 ; X64:       # %bb.0:
2434 ; X64-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2435 ; X64-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2436 ; X64-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2437 ; X64-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A]
2438 ; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2439 ; X64-NEXT:    retq # encoding: [0xc3]
2440   %res = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2441   ret <16 x i16> %res
2442 }
2443
2444 declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>)
2445
2446
2447
2448 declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>)
2449
2450 define <8 x i16>@test_int_x86_avx512_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1) {
2451 ; CHECK-LABEL: test_int_x86_avx512_permvar_hi_128:
2452 ; CHECK:       # %bb.0:
2453 ; CHECK-NEXT:    vpermw %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0]
2454 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2455   %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
2456   ret <8 x i16> %1
2457 }
2458
2459 define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2460 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
2461 ; X86:       # %bb.0:
2462 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2463 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2464 ; X86-NEXT:    vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
2465 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2466 ; X86-NEXT:    retl # encoding: [0xc3]
2467 ;
2468 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
2469 ; X64:       # %bb.0:
2470 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2471 ; X64-NEXT:    vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
2472 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2473 ; X64-NEXT:    retq # encoding: [0xc3]
2474   %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
2475   %2 = bitcast i8 %x3 to <8 x i1>
2476   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
2477   ret <8 x i16> %3
2478 }
2479
2480 define <8 x i16>@test_int_x86_avx512_maskz_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2481 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
2482 ; X86:       # %bb.0:
2483 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2484 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2485 ; X86-NEXT:    vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
2486 ; X86-NEXT:    retl # encoding: [0xc3]
2487 ;
2488 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
2489 ; X64:       # %bb.0:
2490 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2491 ; X64-NEXT:    vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
2492 ; X64-NEXT:    retq # encoding: [0xc3]
2493   %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
2494   %2 = bitcast i8 %x3 to <8 x i1>
2495   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
2496   ret <8 x i16> %3
2497 }
2498
2499 declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>)
2500
2501 define <16 x i16>@test_int_x86_avx512_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1) {
2502 ; CHECK-LABEL: test_int_x86_avx512_permvar_hi_256:
2503 ; CHECK:       # %bb.0:
2504 ; CHECK-NEXT:    vpermw %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0]
2505 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2506   %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
2507   ret <16 x i16> %1
2508 }
2509
2510 define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2511 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
2512 ; X86:       # %bb.0:
2513 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2514 ; X86-NEXT:    vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
2515 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2516 ; X86-NEXT:    retl # encoding: [0xc3]
2517 ;
2518 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
2519 ; X64:       # %bb.0:
2520 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2521 ; X64-NEXT:    vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
2522 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2523 ; X64-NEXT:    retq # encoding: [0xc3]
2524   %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
2525   %2 = bitcast i16 %x3 to <16 x i1>
2526   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
2527   ret <16 x i16> %3
2528 }
2529
2530 define <16 x i16>@test_int_x86_avx512_maskz_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2531 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
2532 ; X86:       # %bb.0:
2533 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2534 ; X86-NEXT:    vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
2535 ; X86-NEXT:    retl # encoding: [0xc3]
2536 ;
2537 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
2538 ; X64:       # %bb.0:
2539 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2540 ; X64-NEXT:    vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
2541 ; X64-NEXT:    retq # encoding: [0xc3]
2542   %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
2543   %2 = bitcast i16 %x3 to <16 x i1>
2544   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
2545   ret <16 x i16> %3
2546 }