test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
   4
   5 define <8 x i16> @test_mask_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
   6 ; X86-LABEL: test_mask_expand_load_w_128:
   7 ; X86:       # %bb.0:
   8 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
  10 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
  11 ; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
  12 ; X86-NEXT:    retl # encoding: [0xc3]
  13 ;
  14 ; X64-LABEL: test_mask_expand_load_w_128:
  15 ; X64:       # %bb.0:
  16 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
  17 ; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
  18 ; X64-NEXT:    retq # encoding: [0xc3]
  19   %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
  20   ret <8 x i16> %res
  21 }
  22
  23 define <8 x i16> @test_maskz_expand_load_w_128(i8* %addr, i8 %mask) {
  24 ; X86-LABEL: test_maskz_expand_load_w_128:
  25 ; X86:       # %bb.0:
  26 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  27 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
  28 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
  29 ; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00]
  30 ; X86-NEXT:    retl # encoding: [0xc3]
  31 ;
  32 ; X64-LABEL: test_maskz_expand_load_w_128:
  33 ; X64:       # %bb.0:
  34 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
  35 ; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07]
  36 ; X64-NEXT:    retq # encoding: [0xc3]
  37   %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> zeroinitializer, i8 %mask)
  38   ret <8 x i16> %res
  39 }
  40
  41 declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
  42
  43 define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data) {
  44 ; X86-LABEL: test_expand_load_w_128:
  45 ; X86:       # %bb.0:
  46 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  47 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
  48 ; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
  49 ; X86-NEXT:    retl # encoding: [0xc3]
  50 ;
  51 ; X64-LABEL: test_expand_load_w_128:
  52 ; X64:       # %bb.0:
  53 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
  54 ; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
  55 ; X64-NEXT:    retq # encoding: [0xc3]
  56   %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 -1)
  57   ret <8 x i16> %res
  58 }
  59
  60 define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
  61 ; CHECK-LABEL: test_expand_w_128:
  62 ; CHECK:       # %bb.0:
  63 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
  64   %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
  65   ret <8 x i16> %res
  66 }
  67
  68 define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
  69 ; X86-LABEL: test_mask_expand_w_128:
  70 ; X86:       # %bb.0:
  71 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  72 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
  73 ; X86-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
  74 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  75 ; X86-NEXT:    retl # encoding: [0xc3]
  76 ;
  77 ; X64-LABEL: test_mask_expand_w_128:
  78 ; X64:       # %bb.0:
  79 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
  80 ; X64-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
  81 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  82 ; X64-NEXT:    retq # encoding: [0xc3]
  83   %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
  84   ret <8 x i16> %res
  85 }
  86
  87 define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
  88 ; X86-LABEL: test_maskz_expand_w_128:
  89 ; X86:       # %bb.0:
  90 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  91 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
  92 ; X86-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
  93 ; X86-NEXT:    retl # encoding: [0xc3]
  94 ;
  95 ; X64-LABEL: test_maskz_expand_w_128:
  96 ; X64:       # %bb.0:
  97 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
  98 ; X64-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
  99 ; X64-NEXT:    retq # encoding: [0xc3]
 100   %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
 101   ret <8 x i16> %res
 102 }
 103
 104 declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
 105
 106 define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
 107 ; X86-LABEL: test_mask_expand_load_b_128:
 108 ; X86:       # %bb.0:
 109 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 110 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 111 ; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
 112 ; X86-NEXT:    retl # encoding: [0xc3]
 113 ;
 114 ; X64-LABEL: test_mask_expand_load_b_128:
 115 ; X64:       # %bb.0:
 116 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 117 ; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
 118 ; X64-NEXT:    retq # encoding: [0xc3]
 119   %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
 120   ret <16 x i8> %res
 121 }
 122
 123 define <16 x i8> @test_maskz_expand_load_b_128(i8* %addr, i16 %mask) {
 124 ; X86-LABEL: test_maskz_expand_load_b_128:
 125 ; X86:       # %bb.0:
 126 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 127 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 128 ; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00]
 129 ; X86-NEXT:    retl # encoding: [0xc3]
 130 ;
 131 ; X64-LABEL: test_maskz_expand_load_b_128:
 132 ; X64:       # %bb.0:
 133 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 134 ; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07]
 135 ; X64-NEXT:    retq # encoding: [0xc3]
 136   %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> zeroinitializer, i16 %mask)
 137   ret <16 x i8> %res
 138 }
 139
 140 declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
 141
 142 define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data) {
 143 ; X86-LABEL: test_expand_load_b_128:
 144 ; X86:       # %bb.0:
 145 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 146 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 147 ; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
 148 ; X86-NEXT:    retl # encoding: [0xc3]
 149 ;
 150 ; X64-LABEL: test_expand_load_b_128:
 151 ; X64:       # %bb.0:
 152 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 153 ; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
 154 ; X64-NEXT:    retq # encoding: [0xc3]
 155   %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 -1)
 156   ret <16 x i8> %res
 157 }
 158
 159 define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
 160 ; CHECK-LABEL: test_expand_b_128:
 161 ; CHECK:       # %bb.0:
 162 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 163   %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
 164   ret <16 x i8> %res
 165 }
 166
 167 define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
 168 ; X86-LABEL: test_mask_expand_b_128:
 169 ; X86:       # %bb.0:
 170 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 171 ; X86-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
 172 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 173 ; X86-NEXT:    retl # encoding: [0xc3]
 174 ;
 175 ; X64-LABEL: test_mask_expand_b_128:
 176 ; X64:       # %bb.0:
 177 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 178 ; X64-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
 179 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 180 ; X64-NEXT:    retq # encoding: [0xc3]
 181   %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
 182   ret <16 x i8> %res
 183 }
 184
 185 define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
 186 ; X86-LABEL: test_maskz_expand_b_128:
 187 ; X86:       # %bb.0:
 188 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 189 ; X86-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
 190 ; X86-NEXT:    retl # encoding: [0xc3]
 191 ;
 192 ; X64-LABEL: test_maskz_expand_b_128:
 193 ; X64:       # %bb.0:
 194 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 195 ; X64-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
 196 ; X64-NEXT:    retq # encoding: [0xc3]
 197   %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
 198   ret <16 x i8> %res
 199 }
 200
 201 declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
 202
 203 define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
 204 ; X86-LABEL: test_mask_compress_store_w_128:
 205 ; X86:       # %bb.0:
 206 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 207 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
 208 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
 209 ; X86-NEXT:    vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
 210 ; X86-NEXT:    retl # encoding: [0xc3]
 211 ;
 212 ; X64-LABEL: test_mask_compress_store_w_128:
 213 ; X64:       # %bb.0:
 214 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 215 ; X64-NEXT:    vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
 216 ; X64-NEXT:    retq # encoding: [0xc3]
 217   call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
 218   ret void
 219 }
 220
 221 declare void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
 222
 223 define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data) {
 224 ; X86-LABEL: test_compress_store_w_128:
 225 ; X86:       # %bb.0:
 226 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 227 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 228 ; X86-NEXT:    vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
 229 ; X86-NEXT:    retl # encoding: [0xc3]
 230 ;
 231 ; X64-LABEL: test_compress_store_w_128:
 232 ; X64:       # %bb.0:
 233 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 234 ; X64-NEXT:    vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
 235 ; X64-NEXT:    retq # encoding: [0xc3]
 236   call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 -1)
 237   ret void
 238 }
 239
 240 define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
 241 ; X86-LABEL: test_mask_compress_w_128:
 242 ; X86:       # %bb.0:
 243 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 244 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 245 ; X86-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
 246 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 247 ; X86-NEXT:    retl # encoding: [0xc3]
 248 ;
 249 ; X64-LABEL: test_mask_compress_w_128:
 250 ; X64:       # %bb.0:
 251 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 252 ; X64-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
 253 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 254 ; X64-NEXT:    retq # encoding: [0xc3]
 255   %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
 256   ret <8 x i16> %res
 257 }
 258
 259 define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
 260 ; X86-LABEL: test_maskz_compress_w_128:
 261 ; X86:       # %bb.0:
 262 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 263 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 264 ; X86-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
 265 ; X86-NEXT:    retl # encoding: [0xc3]
 266 ;
 267 ; X64-LABEL: test_maskz_compress_w_128:
 268 ; X64:       # %bb.0:
 269 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 270 ; X64-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
 271 ; X64-NEXT:    retq # encoding: [0xc3]
 272   %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
 273   ret <8 x i16> %res
 274 }
 275
 276 define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
 277 ; CHECK-LABEL: test_compress_w_128:
 278 ; CHECK:       # %bb.0:
 279 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 280   %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
 281   ret <8 x i16> %res
 282 }
 283
 284 declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
 285
 286 define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
 287 ; X86-LABEL: test_mask_compress_store_b_128:
 288 ; X86:       # %bb.0:
 289 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 290 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 291 ; X86-NEXT:    vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
 292 ; X86-NEXT:    retl # encoding: [0xc3]
 293 ;
 294 ; X64-LABEL: test_mask_compress_store_b_128:
 295 ; X64:       # %bb.0:
 296 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 297 ; X64-NEXT:    vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
 298 ; X64-NEXT:    retq # encoding: [0xc3]
 299   call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
 300   ret void
 301 }
 302
 303 declare void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
 304
 305 define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data) {
 306 ; X86-LABEL: test_compress_store_b_128:
 307 ; X86:       # %bb.0:
 308 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 309 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 310 ; X86-NEXT:    vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
 311 ; X86-NEXT:    retl # encoding: [0xc3]
 312 ;
 313 ; X64-LABEL: test_compress_store_b_128:
 314 ; X64:       # %bb.0:
 315 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 316 ; X64-NEXT:    vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
 317 ; X64-NEXT:    retq # encoding: [0xc3]
 318   call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 -1)
 319   ret void
 320 }
 321
 322 define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
 323 ; X86-LABEL: test_mask_compress_b_128:
 324 ; X86:       # %bb.0:
 325 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 326 ; X86-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
 327 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 328 ; X86-NEXT:    retl # encoding: [0xc3]
 329 ;
 330 ; X64-LABEL: test_mask_compress_b_128:
 331 ; X64:       # %bb.0:
 332 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 333 ; X64-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
 334 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 335 ; X64-NEXT:    retq # encoding: [0xc3]
 336   %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
 337   ret <16 x i8> %res
 338 }
 339
 340 define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
 341 ; X86-LABEL: test_maskz_compress_b_128:
 342 ; X86:       # %bb.0:
 343 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 344 ; X86-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
 345 ; X86-NEXT:    retl # encoding: [0xc3]
 346 ;
 347 ; X64-LABEL: test_maskz_compress_b_128:
 348 ; X64:       # %bb.0:
 349 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 350 ; X64-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
 351 ; X64-NEXT:    retq # encoding: [0xc3]
 352   %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
 353   ret <16 x i8> %res
 354 }
 355
 356 define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
 357 ; CHECK-LABEL: test_compress_b_128:
 358 ; CHECK:       # %bb.0:
 359 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 360   %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
 361   ret <16 x i8> %res
 362 }
 363
 364 declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
 365
 366 define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
 367 ; X86-LABEL: test_mask_expand_load_w_256:
 368 ; X86:       # %bb.0:
 369 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 370 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 371 ; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
 372 ; X86-NEXT:    retl # encoding: [0xc3]
 373 ;
 374 ; X64-LABEL: test_mask_expand_load_w_256:
 375 ; X64:       # %bb.0:
 376 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 377 ; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
 378 ; X64-NEXT:    retq # encoding: [0xc3]
 379   %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
 380   ret <16 x i16> %res
 381 }
 382
 383 define <16 x i16> @test_maskz_expand_load_w_256(i8* %addr, i16 %mask) {
 384 ; X86-LABEL: test_maskz_expand_load_w_256:
 385 ; X86:       # %bb.0:
 386 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 387 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 388 ; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00]
 389 ; X86-NEXT:    retl # encoding: [0xc3]
 390 ;
 391 ; X64-LABEL: test_maskz_expand_load_w_256:
 392 ; X64:       # %bb.0:
 393 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 394 ; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07]
 395 ; X64-NEXT:    retq # encoding: [0xc3]
 396   %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> zeroinitializer, i16 %mask)
 397   ret <16 x i16> %res
 398 }
 399
 400 declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
 401
 402 define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data) {
 403 ; X86-LABEL: test_expand_load_w_256:
 404 ; X86:       # %bb.0:
 405 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 406 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 407 ; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
 408 ; X86-NEXT:    retl # encoding: [0xc3]
 409 ;
 410 ; X64-LABEL: test_expand_load_w_256:
 411 ; X64:       # %bb.0:
 412 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 413 ; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
 414 ; X64-NEXT:    retq # encoding: [0xc3]
 415   %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 -1)
 416   ret <16 x i16> %res
 417 }
 418
 419 define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
 420 ; CHECK-LABEL: test_expand_w_256:
 421 ; CHECK:       # %bb.0:
 422 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 423   %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
 424   ret <16 x i16> %res
 425 }
 426
 427 define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
 428 ; X86-LABEL: test_mask_expand_w_256:
 429 ; X86:       # %bb.0:
 430 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 431 ; X86-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
 432 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 433 ; X86-NEXT:    retl # encoding: [0xc3]
 434 ;
 435 ; X64-LABEL: test_mask_expand_w_256:
 436 ; X64:       # %bb.0:
 437 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 438 ; X64-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
 439 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 440 ; X64-NEXT:    retq # encoding: [0xc3]
 441   %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
 442   ret <16 x i16> %res
 443 }
 444
 445 define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
 446 ; X86-LABEL: test_maskz_expand_w_256:
 447 ; X86:       # %bb.0:
 448 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 449 ; X86-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
 450 ; X86-NEXT:    retl # encoding: [0xc3]
 451 ;
 452 ; X64-LABEL: test_maskz_expand_w_256:
 453 ; X64:       # %bb.0:
 454 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 455 ; X64-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
 456 ; X64-NEXT:    retq # encoding: [0xc3]
 457   %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
 458   ret <16 x i16> %res
 459 }
 460
 461 declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
 462
 463 define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
 464 ; X86-LABEL: test_mask_expand_load_b_256:
 465 ; X86:       # %bb.0:
 466 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 467 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
 468 ; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
 469 ; X86-NEXT:    retl # encoding: [0xc3]
 470 ;
 471 ; X64-LABEL: test_mask_expand_load_b_256:
 472 ; X64:       # %bb.0:
 473 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 474 ; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
 475 ; X64-NEXT:    retq # encoding: [0xc3]
 476   %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
 477   ret <32 x i8> %res
 478 }
 479
 480 define <32 x i8> @test_maskz_expand_load_b_256(i8* %addr, i32 %mask) {
 481 ; X86-LABEL: test_maskz_expand_load_b_256:
 482 ; X86:       # %bb.0:
 483 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 484 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
 485 ; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00]
 486 ; X86-NEXT:    retl # encoding: [0xc3]
 487 ;
 488 ; X64-LABEL: test_maskz_expand_load_b_256:
 489 ; X64:       # %bb.0:
 490 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 491 ; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07]
 492 ; X64-NEXT:    retq # encoding: [0xc3]
 493   %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> zeroinitializer, i32 %mask)
 494   ret <32 x i8> %res
 495 }
 496
 497 declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
 498
 499 define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data) {
 500 ; X86-LABEL: test_expand_load_b_256:
 501 ; X86:       # %bb.0:
 502 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 503 ; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
 504 ; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
 505 ; X86-NEXT:    retl # encoding: [0xc3]
 506 ;
 507 ; X64-LABEL: test_expand_load_b_256:
 508 ; X64:       # %bb.0:
 509 ; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
 510 ; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
 511 ; X64-NEXT:    retq # encoding: [0xc3]
 512   %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 -1)
 513   ret <32 x i8> %res
 514 }
 515
 516 define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
 517 ; CHECK-LABEL: test_expand_b_256:
 518 ; CHECK:       # %bb.0:
 519 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 520   %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
 521   ret <32 x i8> %res
 522 }
 523
 524 define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
 525 ; X86-LABEL: test_mask_expand_b_256:
 526 ; X86:       # %bb.0:
 527 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
 528 ; X86-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
 529 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 530 ; X86-NEXT:    retl # encoding: [0xc3]
 531 ;
 532 ; X64-LABEL: test_mask_expand_b_256:
 533 ; X64:       # %bb.0:
 534 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 535 ; X64-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
 536 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 537 ; X64-NEXT:    retq # encoding: [0xc3]
 538   %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
 539   ret <32 x i8> %res
 540 }
 541
 542 define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
 543 ; X86-LABEL: test_maskz_expand_b_256:
 544 ; X86:       # %bb.0:
 545 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
 546 ; X86-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
 547 ; X86-NEXT:    retl # encoding: [0xc3]
 548 ;
 549 ; X64-LABEL: test_maskz_expand_b_256:
 550 ; X64:       # %bb.0:
 551 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 552 ; X64-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
 553 ; X64-NEXT:    retq # encoding: [0xc3]
 554   %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
 555   ret <32 x i8> %res
 556 }
 557
 558 declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
 559
 560 define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
 561 ; X86-LABEL: test_mask_compress_store_w_256:
 562 ; X86:       # %bb.0:
 563 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 564 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 565 ; X86-NEXT:    vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
 566 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 567 ; X86-NEXT:    retl # encoding: [0xc3]
 568 ;
 569 ; X64-LABEL: test_mask_compress_store_w_256:
 570 ; X64:       # %bb.0:
 571 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 572 ; X64-NEXT:    vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
 573 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 574 ; X64-NEXT:    retq # encoding: [0xc3]
 575   call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
 576   ret void
 577 }
 578
 579 declare void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
 580
 581 define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data) {
 582 ; X86-LABEL: test_compress_store_w_256:
 583 ; X86:       # %bb.0:
 584 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 585 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 586 ; X86-NEXT:    vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
 587 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 588 ; X86-NEXT:    retl # encoding: [0xc3]
 589 ;
 590 ; X64-LABEL: test_compress_store_w_256:
 591 ; X64:       # %bb.0:
 592 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 593 ; X64-NEXT:    vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
 594 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 595 ; X64-NEXT:    retq # encoding: [0xc3]
 596   call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 -1)
 597   ret void
 598 }
 599
 600 define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
 601 ; X86-LABEL: test_mask_compress_w_256:
 602 ; X86:       # %bb.0:
 603 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 604 ; X86-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
 605 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 606 ; X86-NEXT:    retl # encoding: [0xc3]
 607 ;
 608 ; X64-LABEL: test_mask_compress_w_256:
 609 ; X64:       # %bb.0:
 610 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 611 ; X64-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
 612 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 613 ; X64-NEXT:    retq # encoding: [0xc3]
 614   %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
 615   ret <16 x i16> %res
 616 }
 617
 618 define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
 619 ; X86-LABEL: test_maskz_compress_w_256:
 620 ; X86:       # %bb.0:
 621 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 622 ; X86-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
 623 ; X86-NEXT:    retl # encoding: [0xc3]
 624 ;
 625 ; X64-LABEL: test_maskz_compress_w_256:
 626 ; X64:       # %bb.0:
 627 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 628 ; X64-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
 629 ; X64-NEXT:    retq # encoding: [0xc3]
 630   %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
 631   ret <16 x i16> %res
 632 }
 633
 634 define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
 635 ; CHECK-LABEL: test_compress_w_256:
 636 ; CHECK:       # %bb.0:
 637 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 638   %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
 639   ret <16 x i16> %res
 640 }
 641
 642 declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
 643
 644 define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
 645 ; X86-LABEL: test_mask_compress_store_b_256:
 646 ; X86:       # %bb.0:
 647 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 648 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
 649 ; X86-NEXT:    vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
 650 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 651 ; X86-NEXT:    retl # encoding: [0xc3]
 652 ;
 653 ; X64-LABEL: test_mask_compress_store_b_256:
 654 ; X64:       # %bb.0:
 655 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 656 ; X64-NEXT:    vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
 657 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 658 ; X64-NEXT:    retq # encoding: [0xc3]
 659   call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
 660   ret void
 661 }
 662
 663 declare void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
 664
 665 define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data) {
 666 ; X86-LABEL: test_compress_store_b_256:
 667 ; X86:       # %bb.0:
 668 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 669 ; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
 670 ; X86-NEXT:    vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
 671 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 672 ; X86-NEXT:    retl # encoding: [0xc3]
 673 ;
 674 ; X64-LABEL: test_compress_store_b_256:
 675 ; X64:       # %bb.0:
 676 ; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
 677 ; X64-NEXT:    vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
 678 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 679 ; X64-NEXT:    retq # encoding: [0xc3]
 680   call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 -1)
 681   ret void
 682 }
 683
 684 define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
 685 ; X86-LABEL: test_mask_compress_b_256:
 686 ; X86:       # %bb.0:
 687 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
 688 ; X86-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
 689 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 690 ; X86-NEXT:    retl # encoding: [0xc3]
 691 ;
 692 ; X64-LABEL: test_mask_compress_b_256:
 693 ; X64:       # %bb.0:
 694 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 695 ; X64-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
 696 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 697 ; X64-NEXT:    retq # encoding: [0xc3]
 698   %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
 699   ret <32 x i8> %res
 700 }
 701
 702 define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
 703 ; X86-LABEL: test_maskz_compress_b_256:
 704 ; X86:       # %bb.0:
 705 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
 706 ; X86-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
 707 ; X86-NEXT:    retl # encoding: [0xc3]
 708 ;
 709 ; X64-LABEL: test_maskz_compress_b_256:
 710 ; X64:       # %bb.0:
 711 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 712 ; X64-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
 713 ; X64-NEXT:    retq # encoding: [0xc3]
 714   %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
 715   ret <32 x i8> %res
 716 }
 717
 718 define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
 719 ; CHECK-LABEL: test_compress_b_256:
 720 ; CHECK:       # %bb.0:
 721 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 722   %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
 723   ret <32 x i8> %res
 724 }
 725
 726 declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
 727
 728 define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
 729 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
 730 ; X86:       # %bb.0:
 731 ; X86-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x16]
 732 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 733 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 734 ; X86-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16]
 735 ; X86-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x16]
 736 ; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
 737 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
 738 ; X86-NEXT:    retl # encoding: [0xc3]
 739 ;
 740 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
 741 ; X64:       # %bb.0:
 742 ; X64-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x16]
 743 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 744 ; X64-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16]
 745 ; X64-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x16]
 746 ; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
 747 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
 748 ; X64-NEXT:    retq # encoding: [0xc3]
 749   %res = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
 750   %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1)
 751   %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4)
 752   %res3 = add <4 x i32> %res, %res1
 753   %res4 = add <4 x i32> %res3, %res2
 754   ret <4 x i32> %res4
 755 }
 756 declare <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
 757
 758 define <8 x i32>@test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
 759 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
 760 ; X86:       # %bb.0:
 761 ; X86-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x16]
 762 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 763 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 764 ; X86-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
 765 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
 766 ; X86-NEXT:    retl # encoding: [0xc3]
 767 ;
 768 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
 769 ; X64:       # %bb.0:
 770 ; X64-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x16]
 771 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 772 ; X64-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
 773 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
 774 ; X64-NEXT:    retq # encoding: [0xc3]
 775   %res = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
 776   %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
 777   %res2 = add <8 x i32> %res, %res1
 778   ret <8 x i32> %res2
 779 }
 780 declare <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
 781
 782 define <2 x i64>@test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
 783 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
 784 ; X86:       # %bb.0:
 785 ; X86-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x16]
 786 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 787 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 788 ; X86-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
 789 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
 790 ; X86-NEXT:    retl # encoding: [0xc3]
 791 ;
 792 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
 793 ; X64:       # %bb.0:
 794 ; X64-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x16]
 795 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 796 ; X64-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
 797 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
 798 ; X64-NEXT:    retq # encoding: [0xc3]
 799   %res = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
 800   %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1)
 801   %res2 = add <2 x i64> %res, %res1
 802   ret <2 x i64> %res2
 803 }
 804 declare <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
 805
 806 define <4 x i64>@test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
 807 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
 808 ; X86:       # %bb.0:
 809 ; X86-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x16]
 810 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 811 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 812 ; X86-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
 813 ; X86-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
 814 ; X86-NEXT:    retl # encoding: [0xc3]
 815 ;
 816 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
 817 ; X64:       # %bb.0:
 818 ; X64-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x16]
 819 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 820 ; X64-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
 821 ; X64-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
 822 ; X64-NEXT:    retq # encoding: [0xc3]
 823   %res = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
 824   %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
 825   %res2 = add <4 x i64> %res, %res1
 826   ret <4 x i64> %res2
 827 }
 828 declare <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
 829
 830 define <8 x i16>@test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
 831 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
 832 ; X86:       # %bb.0:
 833 ; X86-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x06]
 834 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 835 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 836 ; X86-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
 837 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
 838 ; X86-NEXT:    retl # encoding: [0xc3]
 839 ;
 840 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
 841 ; X64:       # %bb.0:
 842 ; X64-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x06]
 843 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 844 ; X64-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
 845 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
 846 ; X64-NEXT:    retq # encoding: [0xc3]
 847   %res = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4)
 848   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 -1)
 849   %res2 = add <8 x i16> %res, %res1
 850   ret <8 x i16> %res2
 851 }
 852 declare <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8)
 853
 854 define <16 x i16>@test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
 855 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
 856 ; X86:       # %bb.0:
 857 ; X86-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x06]
 858 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 859 ; X86-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
 860 ; X86-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
 861 ; X86-NEXT:    retl # encoding: [0xc3]
 862 ;
 863 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
 864 ; X64:       # %bb.0:
 865 ; X64-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x06]
 866 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 867 ; X64-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
 868 ; X64-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
 869 ; X64-NEXT:    retq # encoding: [0xc3]
 870   %res = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4)
 871   %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 -1)
 872   %res2 = add <16 x i16> %res, %res1
 873   ret <16 x i16> %res2
 874 }
 875 declare <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16)
 876
 877 define <4 x i32>@test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
 878 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
 879 ; X86:       # %bb.0:
 880 ; X86-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x16]
 881 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 882 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 883 ; X86-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16]
 884 ; X86-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x16]
 885 ; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
 886 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
 887 ; X86-NEXT:    retl # encoding: [0xc3]
 888 ;
 889 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
 890 ; X64:       # %bb.0:
 891 ; X64-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x16]
 892 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 893 ; X64-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16]
 894 ; X64-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x16]
 895 ; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
 896 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
 897 ; X64-NEXT:    retq # encoding: [0xc3]
 898   %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
 899   %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1)
 900   %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4)
 901   %res3 = add <4 x i32> %res, %res1
 902   %res4 = add <4 x i32> %res3, %res2
 903   ret <4 x i32> %res4
 904 }
 905 declare <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
 906
 907 define <8 x i32>@test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
 908 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
 909 ; X86:       # %bb.0:
 910 ; X86-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x16]
 911 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 912 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 913 ; X86-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
 914 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
 915 ; X86-NEXT:    retl # encoding: [0xc3]
 916 ;
 917 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
 918 ; X64:       # %bb.0:
 919 ; X64-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x16]
 920 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 921 ; X64-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
 922 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
 923 ; X64-NEXT:    retq # encoding: [0xc3]
 924   %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
 925   %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
 926   %res2 = add <8 x i32> %res, %res1
 927   ret <8 x i32> %res2
 928 }
 929 declare <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
 930
 931 define <2 x i64>@test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
 932 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
 933 ; X86:       # %bb.0:
 934 ; X86-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x16]
 935 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 936 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 937 ; X86-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
 938 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
 939 ; X86-NEXT:    retl # encoding: [0xc3]
 940 ;
 941 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
 942 ; X64:       # %bb.0:
 943 ; X64-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x16]
 944 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 945 ; X64-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
 946 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
 947 ; X64-NEXT:    retq # encoding: [0xc3]
 948   %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
 949   %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1)
 950   %res2 = add <2 x i64> %res, %res1
 951   ret <2 x i64> %res2
 952 }
 953 declare <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
 954
 955 define <4 x i64>@test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
 956 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
 957 ; X86:       # %bb.0:
 958 ; X86-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x16]
 959 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 960 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 961 ; X86-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
 962 ; X86-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
 963 ; X86-NEXT:    retl # encoding: [0xc3]
 964 ;
 965 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
 966 ; X64:       # %bb.0:
 967 ; X64-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x16]
 968 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 969 ; X64-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
 970 ; X64-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
 971 ; X64-NEXT:    retq # encoding: [0xc3]
 972   %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
 973   %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
 974   %res2 = add <4 x i64> %res, %res1
 975   ret <4 x i64> %res2
 976 }
 977 declare <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
 978
 979 define <8 x i16>@test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
 980 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
 981 ; X86:       # %bb.0:
 982 ; X86-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x06]
 983 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 984 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 985 ; X86-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
 986 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
 987 ; X86-NEXT:    retl # encoding: [0xc3]
 988 ;
 989 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
 990 ; X64:       # %bb.0:
 991 ; X64-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x06]
 992 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 993 ; X64-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
 994 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
 995 ; X64-NEXT:    retq # encoding: [0xc3]
 996   %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4)
 997   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 -1)
 998   %res2 = add <8 x i16> %res, %res1
 999   ret <8 x i16> %res2
1000 }
1001 declare <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8)
1002
1003 define <16 x i16>@test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1004 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1005 ; X86:       # %bb.0:
1006 ; X86-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x06]
1007 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1008 ; X86-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1009 ; X86-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
1010 ; X86-NEXT:    retl # encoding: [0xc3]
1011 ;
1012 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1013 ; X64:       # %bb.0:
1014 ; X64-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x06]
1015 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1016 ; X64-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1017 ; X64-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
1018 ; X64-NEXT:    retq # encoding: [0xc3]
1019   %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4)
1020   %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 -1)
1021   %res2 = add <16 x i16> %res, %res1
1022   ret <16 x i16> %res2
1023 }
1024 declare <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16)
1025
1026 define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128_2(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
1027 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128_2:
1028 ; X86:       # %bb.0:
1029 ; X86-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x16]
1030 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1031 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1032 ; X86-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16]
1033 ; X86-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x16]
1034 ; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1035 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
1036 ; X86-NEXT:    retl # encoding: [0xc3]
1037 ;
1038 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128_2:
1039 ; X64:       # %bb.0:
1040 ; X64-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x16]
1041 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1042 ; X64-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16]
1043 ; X64-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x16]
1044 ; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1045 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
1046 ; X64-NEXT:    retq # encoding: [0xc3]
1047   %1 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22)
1048   %2 = bitcast i8 %x4 to <8 x i1>
1049   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1050   %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
1051   %4 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22)
1052   %5 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22)
1053   %6 = bitcast i8 %x4 to <8 x i1>
1054   %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1055   %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
1056   %res3 = add <4 x i32> %3, %4
1057   %res4 = add <4 x i32> %res3, %7
1058   ret <4 x i32> %res4
1059 }
1060 declare <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32>, <4 x i32>, i32)
1061
1062 define <8 x i32>@test_int_x86_avx512_mask_vpshld_d_256_2(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
1063 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256_2:
1064 ; X86:       # %bb.0:
1065 ; X86-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x16]
1066 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1067 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1068 ; X86-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
1069 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
1070 ; X86-NEXT:    retl # encoding: [0xc3]
1071 ;
1072 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256_2:
1073 ; X64:       # %bb.0:
1074 ; X64-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x16]
1075 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1076 ; X64-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
1077 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
1078 ; X64-NEXT:    retq # encoding: [0xc3]
1079   %1 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22)
1080   %2 = bitcast i8 %x4 to <8 x i1>
1081   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
1082   %4 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22)
1083   %res2 = add <8 x i32> %3, %4
1084   ret <8 x i32> %res2
1085 }
1086 declare <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32>, <8 x i32>, i32)
1087
1088 define <2 x i64>@test_int_x86_avx512_mask_vpshld_q_128_2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
1089 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128_2:
1090 ; X86:       # %bb.0:
1091 ; X86-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x16]
1092 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1093 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1094 ; X86-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
1095 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
1096 ; X86-NEXT:    retl # encoding: [0xc3]
1097 ;
1098 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128_2:
1099 ; X64:       # %bb.0:
1100 ; X64-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x16]
1101 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1102 ; X64-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
1103 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
1104 ; X64-NEXT:    retq # encoding: [0xc3]
1105   %1 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22)
1106   %2 = bitcast i8 %x4 to <8 x i1>
1107   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1108   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
1109   %4 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22)
1110   %res2 = add <2 x i64> %3, %4
1111   ret <2 x i64> %res2
1112 }
1113 declare <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64>, <2 x i64>, i32)
1114
1115 define <4 x i64>@test_int_x86_avx512_mask_vpshld_q_256_2(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
1116 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256_2:
1117 ; X86:       # %bb.0:
1118 ; X86-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x16]
1119 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1120 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1121 ; X86-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
1122 ; X86-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
1123 ; X86-NEXT:    retl # encoding: [0xc3]
1124 ;
1125 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256_2:
1126 ; X64:       # %bb.0:
1127 ; X64-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x16]
1128 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1129 ; X64-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
1130 ; X64-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
1131 ; X64-NEXT:    retq # encoding: [0xc3]
1132   %1 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22)
1133   %2 = bitcast i8 %x4 to <8 x i1>
1134   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1135   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
1136   %4 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22)
1137   %res2 = add <4 x i64> %3, %4
1138   ret <4 x i64> %res2
1139 }
1140 declare <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64>, <4 x i64>, i32)
1141
1142 define <8 x i16>@test_int_x86_avx512_mask_vpshld_w_128_2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1143 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128_2:
1144 ; X86:       # %bb.0:
1145 ; X86-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x06]
1146 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1147 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1148 ; X86-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
1149 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
1150 ; X86-NEXT:    retl # encoding: [0xc3]
1151 ;
1152 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128_2:
1153 ; X64:       # %bb.0:
1154 ; X64-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x06]
1155 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1156 ; X64-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
1157 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
1158 ; X64-NEXT:    retq # encoding: [0xc3]
1159   %1 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
1160   %2 = bitcast i8 %x4 to <8 x i1>
1161   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
1162   %4 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
1163   %res2 = add <8 x i16> %3, %4
1164   ret <8 x i16> %res2
1165 }
1166 declare <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16>, <8 x i16>, i32)
1167
1168 define <16 x i16>@test_int_x86_avx512_mask_vpshld_w_256_2(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1169 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256_2:
1170 ; X86:       # %bb.0:
1171 ; X86-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x06]
1172 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1173 ; X86-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
1174 ; X86-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
1175 ; X86-NEXT:    retl # encoding: [0xc3]
1176 ;
1177 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256_2:
1178 ; X64:       # %bb.0:
1179 ; X64-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x06]
1180 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1181 ; X64-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
1182 ; X64-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
1183 ; X64-NEXT:    retq # encoding: [0xc3]
1184   %1 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
1185   %2 = bitcast i16 %x4 to <16 x i1>
1186   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
1187   %4 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
1188   %res2 = add <16 x i16> %3, %4
1189   ret <16 x i16> %res2
1190 }
1191 declare <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16>, <16 x i16>, i32)
1192
1193 define <4 x i32>@test_int_x86_avx512_mask_vpshrd_d_128_2(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
1194 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128_2:
1195 ; X86:       # %bb.0:
1196 ; X86-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x16]
1197 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1198 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1199 ; X86-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16]
1200 ; X86-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x16]
1201 ; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1202 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
1203 ; X86-NEXT:    retl # encoding: [0xc3]
1204 ;
1205 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128_2:
1206 ; X64:       # %bb.0:
1207 ; X64-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x16]
1208 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1209 ; X64-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16]
1210 ; X64-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x16]
1211 ; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1212 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
1213 ; X64-NEXT:    retq # encoding: [0xc3]
1214   %1 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22)
1215   %2 = bitcast i8 %x4 to <8 x i1>
1216   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1217   %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
1218   %4 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22)
1219   %5 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22)
1220   %6 = bitcast i8 %x4 to <8 x i1>
1221   %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1222   %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
1223   %res3 = add <4 x i32> %3, %4
1224   %res4 = add <4 x i32> %res3, %7
1225   ret <4 x i32> %res4
1226 }
1227 declare <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32>, <4 x i32>, i32)
1228
1229 define <8 x i32>@test_int_x86_avx512_mask_vpshrd_d_256_2(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
1230 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256_2:
1231 ; X86:       # %bb.0:
1232 ; X86-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x16]
1233 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1234 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1235 ; X86-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
1236 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
1237 ; X86-NEXT:    retl # encoding: [0xc3]
1238 ;
1239 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256_2:
1240 ; X64:       # %bb.0:
1241 ; X64-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x16]
1242 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1243 ; X64-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
1244 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
1245 ; X64-NEXT:    retq # encoding: [0xc3]
1246   %1 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22)
1247   %2 = bitcast i8 %x4 to <8 x i1>
1248   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
1249   %4 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22)
1250   %res2 = add <8 x i32> %3, %4
1251   ret <8 x i32> %res2
1252 }
1253 declare <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32>, <8 x i32>, i32)
1254
1255 define <2 x i64>@test_int_x86_avx512_mask_vpshrd_q_128_2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
1256 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128_2:
1257 ; X86:       # %bb.0:
1258 ; X86-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x16]
1259 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1260 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1261 ; X86-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
1262 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
1263 ; X86-NEXT:    retl # encoding: [0xc3]
1264 ;
1265 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128_2:
1266 ; X64:       # %bb.0:
1267 ; X64-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x16]
1268 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1269 ; X64-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
1270 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
1271 ; X64-NEXT:    retq # encoding: [0xc3]
1272   %1 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22)
1273   %2 = bitcast i8 %x4 to <8 x i1>
1274   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1275   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
1276   %4 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22)
1277   %res2 = add <2 x i64> %3, %4
1278   ret <2 x i64> %res2
1279 }
1280 declare <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64>, <2 x i64>, i32)
1281
1282 define <4 x i64>@test_int_x86_avx512_mask_vpshrd_q_256_2(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
1283 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256_2:
1284 ; X86:       # %bb.0:
1285 ; X86-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x16]
1286 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1287 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1288 ; X86-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
1289 ; X86-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
1290 ; X86-NEXT:    retl # encoding: [0xc3]
1291 ;
1292 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256_2:
1293 ; X64:       # %bb.0:
1294 ; X64-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x16]
1295 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1296 ; X64-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
1297 ; X64-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
1298 ; X64-NEXT:    retq # encoding: [0xc3]
1299   %1 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22)
1300   %2 = bitcast i8 %x4 to <8 x i1>
1301   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1302   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
1303   %4 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22)
1304   %res2 = add <4 x i64> %3, %4
1305   ret <4 x i64> %res2
1306 }
1307 declare <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64>, <4 x i64>, i32)
1308
1309 define <8 x i16>@test_int_x86_avx512_mask_vpshrd_w_128_2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1310 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128_2:
1311 ; X86:       # %bb.0:
1312 ; X86-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x06]
1313 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1314 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1315 ; X86-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1316 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
1317 ; X86-NEXT:    retl # encoding: [0xc3]
1318 ;
1319 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128_2:
1320 ; X64:       # %bb.0:
1321 ; X64-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x06]
1322 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1323 ; X64-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1324 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
1325 ; X64-NEXT:    retq # encoding: [0xc3]
1326   %1 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
1327   %2 = bitcast i8 %x4 to <8 x i1>
1328   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
1329   %4 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
1330   %res2 = add <8 x i16> %3, %4
1331   ret <8 x i16> %res2
1332 }
1333 declare <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16>, <8 x i16>, i32)
1334
1335 define <16 x i16>@test_int_x86_avx512_mask_vpshrd_w_256_2(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1336 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256_2:
1337 ; X86:       # %bb.0:
1338 ; X86-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x06]
1339 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1340 ; X86-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1341 ; X86-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
1342 ; X86-NEXT:    retl # encoding: [0xc3]
1343 ;
1344 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256_2:
1345 ; X64:       # %bb.0:
1346 ; X64-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x06]
1347 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1348 ; X64-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1349 ; X64-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
1350 ; X64-NEXT:    retq # encoding: [0xc3]
1351   %1 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
1352   %2 = bitcast i16 %x4 to <16 x i1>
1353   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
1354   %4 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
1355   %res2 = add <16 x i16> %3, %4
1356   ret <16 x i16> %res2
1357 }
1358 declare <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16>, <16 x i16>, i32)
1359
1360 declare <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1361 declare <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1362
1363 define <8 x i32>@test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
1364 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1365 ; X86:       # %bb.0:
1366 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1367 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1368 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1369 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1370 ; X86-NEXT:    vpshrdvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x18]
1371 ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1372 ; X86-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xe2]
1373 ; X86-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2]
1374 ; X86-NEXT:    vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
1375 ; X86-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
1376 ; X86-NEXT:    retl # encoding: [0xc3]
1377 ;
1378 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1379 ; X64:       # %bb.0:
1380 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1381 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1382 ; X64-NEXT:    vpshrdvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x1f]
1383 ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1384 ; X64-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xe2]
1385 ; X64-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2]
1386 ; X64-NEXT:    vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
1387 ; X64-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
1388 ; X64-NEXT:    retq # encoding: [0xc3]
1389   %x2 = load <8 x i32>, <8 x i32>* %x2p
1390   %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
1391   %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
1392   %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8  %x3)
1393   %res3 = add <8 x i32> %res, %res1
1394   %res4 = add <8 x i32> %res2, %res3
1395   ret <8 x i32> %res4
1396 }
1397
1398 declare <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1399 declare <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1400
1401 define <4 x i32>@test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
1402 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1403 ; X86:       # %bb.0:
1404 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1405 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1406 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1407 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1408 ; X86-NEXT:    vpshrdvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x18]
1409 ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1410 ; X86-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xe2]
1411 ; X86-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2]
1412 ; X86-NEXT:    vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
1413 ; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1414 ; X86-NEXT:    retl # encoding: [0xc3]
1415 ;
1416 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1417 ; X64:       # %bb.0:
1418 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1419 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1420 ; X64-NEXT:    vpshrdvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x1f]
1421 ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1422 ; X64-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xe2]
1423 ; X64-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2]
1424 ; X64-NEXT:    vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
1425 ; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1426 ; X64-NEXT:    retq # encoding: [0xc3]
1427   %x2 = load <4 x i32>, <4 x i32>* %x2p
1428   %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
1429   %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
1430   %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8  %x3)
1431   %res3 = add <4 x i32> %res, %res1
1432   %res4 = add <4 x i32> %res2, %res3
1433   ret <4 x i32> %res4
1434 }
1435
1436 declare <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1437 declare <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1438
1439 define <4 x i64>@test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) {
1440 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1441 ; X86:       # %bb.0:
1442 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1443 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1444 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1445 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1446 ; X86-NEXT:    vpshrdvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x18]
1447 ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1448 ; X86-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xe2]
1449 ; X86-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2]
1450 ; X86-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
1451 ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
1452 ; X86-NEXT:    retl # encoding: [0xc3]
1453 ;
1454 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1455 ; X64:       # %bb.0:
1456 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1457 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1458 ; X64-NEXT:    vpshrdvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x1f]
1459 ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1460 ; X64-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xe2]
1461 ; X64-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2]
1462 ; X64-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
1463 ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
1464 ; X64-NEXT:    retq # encoding: [0xc3]
1465   %x2 = load <4 x i64>, <4 x i64>* %x2p
1466   %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
1467   %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 -1)
1468   %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8  %x3)
1469   %res3 = add <4 x i64> %res, %res1
1470   %res4 = add <4 x i64> %res2, %res3
1471   ret <4 x i64> %res4
1472 }
1473
1474 declare <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1475 declare <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1476
1477 define <2 x i64>@test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) {
1478 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1479 ; X86:       # %bb.0:
1480 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1481 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1482 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1483 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1484 ; X86-NEXT:    vpshrdvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x18]
1485 ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1486 ; X86-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xe2]
1487 ; X86-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2]
1488 ; X86-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
1489 ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
1490 ; X86-NEXT:    retl # encoding: [0xc3]
1491 ;
1492 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1493 ; X64:       # %bb.0:
1494 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1495 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1496 ; X64-NEXT:    vpshrdvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x1f]
1497 ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1498 ; X64-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xe2]
1499 ; X64-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2]
1500 ; X64-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
1501 ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
1502 ; X64-NEXT:    retq # encoding: [0xc3]
1503   %x2 = load <2 x i64>, <2 x i64>* %x2p
1504   %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
1505   %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 -1)
1506   %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8  %x3)
1507   %res3 = add <2 x i64> %res, %res1
1508   %res4 = add <2 x i64> %res2, %res3
1509   ret <2 x i64> %res4
1510 }
1511
1512 declare <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1513 declare <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1514
1515 define <16 x i16>@test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) {
1516 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1517 ; X86:       # %bb.0:
1518 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1519 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1520 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1521 ; X86-NEXT:    vpshrdvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x18]
1522 ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1523 ; X86-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xe2]
1524 ; X86-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xc2]
1525 ; X86-NEXT:    vpaddw %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfd,0xc0]
1526 ; X86-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
1527 ; X86-NEXT:    retl # encoding: [0xc3]
1528 ;
1529 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1530 ; X64:       # %bb.0:
1531 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1532 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1533 ; X64-NEXT:    vpshrdvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x1f]
1534 ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1535 ; X64-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xe2]
1536 ; X64-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xc2]
1537 ; X64-NEXT:    vpaddw %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfd,0xc0]
1538 ; X64-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
1539 ; X64-NEXT:    retq # encoding: [0xc3]
1540   %x2 = load <16 x i16>, <16 x i16>* %x2p
1541   %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1542   %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 -1)
1543   %res2 = call <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16  %x3)
1544   %res3 = add <16 x i16> %res, %res1
1545   %res4 = add <16 x i16> %res2, %res3
1546   ret <16 x i16> %res4
1547 }
1548
1549 declare <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1550 declare <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1551
1552 define <8 x i16>@test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) {
1553 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1554 ; X86:       # %bb.0:
1555 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1556 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1557 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1558 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1559 ; X86-NEXT:    vpshrdvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x18]
1560 ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1561 ; X86-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xe2]
1562 ; X86-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2]
1563 ; X86-NEXT:    vpaddw %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfd,0xc0]
1564 ; X86-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
1565 ; X86-NEXT:    retl # encoding: [0xc3]
1566 ;
1567 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1568 ; X64:       # %bb.0:
1569 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1570 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1571 ; X64-NEXT:    vpshrdvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x1f]
1572 ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1573 ; X64-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xe2]
1574 ; X64-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2]
1575 ; X64-NEXT:    vpaddw %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfd,0xc0]
1576 ; X64-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
1577 ; X64-NEXT:    retq # encoding: [0xc3]
1578   %x2 = load <8 x i16>, <8 x i16>* %x2p
1579   %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1580   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 -1)
1581   %res2 = call <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8  %x3)
1582   %res3 = add <8 x i16> %res, %res1
1583   %res4 = add <8 x i16> %res2, %res3
1584   ret <8 x i16> %res4
1585 }
1586
1587 declare <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1588 declare <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1589
1590 define <8 x i32>@test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
1591 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1592 ; X86:       # %bb.0:
1593 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1594 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1595 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1596 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1597 ; X86-NEXT:    vpshldvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x18]
1598 ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1599 ; X86-NEXT:    vpshldvd %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xe2]
1600 ; X86-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2]
1601 ; X86-NEXT:    vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
1602 ; X86-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
1603 ; X86-NEXT:    retl # encoding: [0xc3]
1604 ;
1605 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1606 ; X64:       # %bb.0:
1607 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1608 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1609 ; X64-NEXT:    vpshldvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x1f]
1610 ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1611 ; X64-NEXT:    vpshldvd %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xe2]
1612 ; X64-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2]
1613 ; X64-NEXT:    vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
1614 ; X64-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
1615 ; X64-NEXT:    retq # encoding: [0xc3]
1616   %x2 = load <8 x i32>, <8 x i32>* %x2p
1617   %res = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
1618   %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
1619   %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8  %x3)
1620   %res3 = add <8 x i32> %res, %res1
1621   %res4 = add <8 x i32> %res2, %res3
1622   ret <8 x i32> %res4
1623 }
1624
1625 declare <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1626 declare <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1627
1628 define <4 x i32>@test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
1629 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1630 ; X86:       # %bb.0:
1631 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1632 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1633 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1634 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1635 ; X86-NEXT:    vpshldvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x18]
1636 ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1637 ; X86-NEXT:    vpshldvd %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xe2]
1638 ; X86-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2]
1639 ; X86-NEXT:    vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
1640 ; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1641 ; X86-NEXT:    retl # encoding: [0xc3]
1642 ;
1643 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1644 ; X64:       # %bb.0:
1645 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1646 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1647 ; X64-NEXT:    vpshldvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x1f]
1648 ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1649 ; X64-NEXT:    vpshldvd %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xe2]
1650 ; X64-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2]
1651 ; X64-NEXT:    vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
1652 ; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1653 ; X64-NEXT:    retq # encoding: [0xc3]
1654   %x2 = load <4 x i32>, <4 x i32>* %x2p
1655   %res = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
1656   %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
1657   %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8  %x3)
1658   %res3 = add <4 x i32> %res, %res1
1659   %res4 = add <4 x i32> %res2, %res3
1660   ret <4 x i32> %res4
1661 }
1662
1663 declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1664 declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1665
1666 define <4 x i64>@test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) {
1667 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1668 ; X86:       # %bb.0:
1669 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1670 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1671 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1672 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1673 ; X86-NEXT:    vpshldvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x18]
1674 ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1675 ; X86-NEXT:    vpshldvq %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xe2]
1676 ; X86-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2]
1677 ; X86-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
1678 ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
1679 ; X86-NEXT:    retl # encoding: [0xc3]
1680 ;
1681 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1682 ; X64:       # %bb.0:
1683 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1684 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1685 ; X64-NEXT:    vpshldvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x1f]
1686 ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1687 ; X64-NEXT:    vpshldvq %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xe2]
1688 ; X64-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2]
1689 ; X64-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
1690 ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
1691 ; X64-NEXT:    retq # encoding: [0xc3]
1692   %x2 = load <4 x i64>, <4 x i64>* %x2p
1693   %res = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
1694   %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 -1)
1695   %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8  %x3)
1696   %res3 = add <4 x i64> %res, %res1
1697   %res4 = add <4 x i64> %res2, %res3
1698   ret <4 x i64> %res4
1699 }
1700
1701 declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1702 declare <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1703
1704 define <2 x i64>@test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) {
1705 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1706 ; X86:       # %bb.0:
1707 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1708 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1709 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1710 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1711 ; X86-NEXT:    vpshldvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x18]
1712 ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1713 ; X86-NEXT:    vpshldvq %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xe2]
1714 ; X86-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2]
1715 ; X86-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
1716 ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
1717 ; X86-NEXT:    retl # encoding: [0xc3]
1718 ;
1719 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1720 ; X64:       # %bb.0:
1721 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1722 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1723 ; X64-NEXT:    vpshldvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x1f]
1724 ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1725 ; X64-NEXT:    vpshldvq %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xe2]
1726 ; X64-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2]
1727 ; X64-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
1728 ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
1729 ; X64-NEXT:    retq # encoding: [0xc3]
1730   %x2 = load <2 x i64>, <2 x i64>* %x2p
1731   %res = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
1732   %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 -1)
1733   %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8  %x3)
1734   %res3 = add <2 x i64> %res, %res1
1735   %res4 = add <2 x i64> %res2, %res3
1736   ret <2 x i64> %res4
1737 }
1738
1739 declare <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1740 declare <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1741
1742 define <16 x i16>@test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) {
1743 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1744 ; X86:       # %bb.0:
1745 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1746 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1747 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1748 ; X86-NEXT:    vpshldvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x18]
1749 ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1750 ; X86-NEXT:    vpshldvw %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xe2]
1751 ; X86-NEXT:    vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2]
1752 ; X86-NEXT:    vpaddw %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfd,0xc0]
1753 ; X86-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
1754 ; X86-NEXT:    retl # encoding: [0xc3]
1755 ;
1756 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1757 ; X64:       # %bb.0:
1758 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1759 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1760 ; X64-NEXT:    vpshldvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x1f]
1761 ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
1762 ; X64-NEXT:    vpshldvw %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xe2]
1763 ; X64-NEXT:    vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2]
1764 ; X64-NEXT:    vpaddw %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfd,0xc0]
1765 ; X64-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
1766 ; X64-NEXT:    retq # encoding: [0xc3]
1767   %x2 = load <16 x i16>, <16 x i16>* %x2p
1768   %res = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1769   %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 -1)
1770   %res2 = call <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16  %x3)
1771   %res3 = add <16 x i16> %res, %res1
1772   %res4 = add <16 x i16> %res2, %res3
1773   ret <16 x i16> %res4
1774 }
1775
1776 declare <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1777 declare <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1778
1779 define <8 x i16>@test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) {
1780 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1781 ; X86:       # %bb.0:
1782 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1783 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1784 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1785 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1786 ; X86-NEXT:    vpshldvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x18]
1787 ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1788 ; X86-NEXT:    vpshldvw %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xe2]
1789 ; X86-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xc2]
1790 ; X86-NEXT:    vpaddw %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfd,0xc0]
1791 ; X86-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
1792 ; X86-NEXT:    retl # encoding: [0xc3]
1793 ;
1794 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1795 ; X64:       # %bb.0:
1796 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1797 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1798 ; X64-NEXT:    vpshldvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x1f]
1799 ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
1800 ; X64-NEXT:    vpshldvw %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xe2]
1801 ; X64-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xc2]
1802 ; X64-NEXT:    vpaddw %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfd,0xc0]
1803 ; X64-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
1804 ; X64-NEXT:    retq # encoding: [0xc3]
1805   %x2 = load <8 x i16>, <8 x i16>* %x2p
1806   %res = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1807   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 -1)
1808   %res2 = call <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8  %x3)
1809   %res3 = add <8 x i16> %res, %res1
1810   %res4 = add <8 x i16> %res2, %res3
1811   ret <8 x i16> %res4
1812 }