llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
   4
   5 define <8 x i16> @test_mask_expand_load_w_128(ptr %addr, <8 x i16> %data, i8 %mask) {
   6 ; X86-LABEL: test_mask_expand_load_w_128:
   7 ; X86:       # %bb.0:
   8 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
  10 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
  11 ; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
  12 ; X86-NEXT:    retl # encoding: [0xc3]
  13 ;
  14 ; X64-LABEL: test_mask_expand_load_w_128:
  15 ; X64:       # %bb.0:
  16 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
  17 ; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
  18 ; X64-NEXT:    retq # encoding: [0xc3]
  19   %1 = bitcast i8 %mask to <8 x i1>
  20   %2 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> %1, <8 x i16> %data)
  21   ret <8 x i16> %2
  22 }
  23
  24 define <8 x i16> @test_maskz_expand_load_w_128(ptr %addr, i8 %mask) {
  25 ; X86-LABEL: test_maskz_expand_load_w_128:
  26 ; X86:       # %bb.0:
  27 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  28 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
  29 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
  30 ; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00]
  31 ; X86-NEXT:    retl # encoding: [0xc3]
  32 ;
  33 ; X64-LABEL: test_maskz_expand_load_w_128:
  34 ; X64:       # %bb.0:
  35 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
  36 ; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07]
  37 ; X64-NEXT:    retq # encoding: [0xc3]
  38   %1 = bitcast i8 %mask to <8 x i1>
  39   %2 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> %1, <8 x i16> zeroinitializer)
  40   ret <8 x i16> %2
  41 }
  42
  43 define <8 x i16> @test_expand_load_w_128(ptr %addr, <8 x i16> %data) {
  44 ; X86-LABEL: test_expand_load_w_128:
  45 ; X86:       # %bb.0:
  46 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
  47 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
  48 ; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
  49 ; X86-NEXT:    retl # encoding: [0xc3]
  50 ;
  51 ; X64-LABEL: test_expand_load_w_128:
  52 ; X64:       # %bb.0:
  53 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
  54 ; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
  55 ; X64-NEXT:    retq # encoding: [0xc3]
  56   %1 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %data)
  57   ret <8 x i16> %1
  58 }
  59
  60 define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
  61 ; CHECK-LABEL: test_expand_w_128:
  62 ; CHECK:       # %bb.0:
  63 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
  64   %1 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
  65   ret <8 x i16> %1
  66 }
  67
  68 define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
  69 ; X86-LABEL: test_mask_expand_w_128:
  70 ; X86:       # %bb.0:
  71 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  72 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
  73 ; X86-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
  74 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  75 ; X86-NEXT:    retl # encoding: [0xc3]
  76 ;
  77 ; X64-LABEL: test_mask_expand_w_128:
  78 ; X64:       # %bb.0:
  79 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
  80 ; X64-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
  81 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  82 ; X64-NEXT:    retq # encoding: [0xc3]
  83   %1 = bitcast i8 %mask to <8 x i1>
  84   %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
  85   ret <8 x i16> %2
  86 }
  87
  88 define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
  89 ; X86-LABEL: test_maskz_expand_w_128:
  90 ; X86:       # %bb.0:
  91 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  92 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
  93 ; X86-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
  94 ; X86-NEXT:    retl # encoding: [0xc3]
  95 ;
  96 ; X64-LABEL: test_maskz_expand_w_128:
  97 ; X64:       # %bb.0:
  98 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
  99 ; X64-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
 100 ; X64-NEXT:    retq # encoding: [0xc3]
 101   %1 = bitcast i8 %mask to <8 x i1>
 102   %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
 103   ret <8 x i16> %2
 104 }
 105
 106 define <16 x i8> @test_mask_expand_load_b_128(ptr %addr, <16 x i8> %data, i16 %mask) {
 107 ; X86-LABEL: test_mask_expand_load_b_128:
 108 ; X86:       # %bb.0:
 109 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 110 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 111 ; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
 112 ; X86-NEXT:    retl # encoding: [0xc3]
 113 ;
 114 ; X64-LABEL: test_mask_expand_load_b_128:
 115 ; X64:       # %bb.0:
 116 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 117 ; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
 118 ; X64-NEXT:    retq # encoding: [0xc3]
 119   %1 = bitcast i16 %mask to <16 x i1>
 120   %2 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> %1, <16 x i8> %data)
 121   ret <16 x i8> %2
 122 }
 123
 124 define <16 x i8> @test_maskz_expand_load_b_128(ptr %addr, i16 %mask) {
 125 ; X86-LABEL: test_maskz_expand_load_b_128:
 126 ; X86:       # %bb.0:
 127 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 128 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 129 ; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00]
 130 ; X86-NEXT:    retl # encoding: [0xc3]
 131 ;
 132 ; X64-LABEL: test_maskz_expand_load_b_128:
 133 ; X64:       # %bb.0:
 134 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 135 ; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07]
 136 ; X64-NEXT:    retq # encoding: [0xc3]
 137   %1 = bitcast i16 %mask to <16 x i1>
 138   %2 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> %1, <16 x i8> zeroinitializer)
 139   ret <16 x i8> %2
 140 }
 141
 142 define <16 x i8> @test_expand_load_b_128(ptr %addr, <16 x i8> %data) {
 143 ; X86-LABEL: test_expand_load_b_128:
 144 ; X86:       # %bb.0:
 145 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 146 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 147 ; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
 148 ; X86-NEXT:    retl # encoding: [0xc3]
 149 ;
 150 ; X64-LABEL: test_expand_load_b_128:
 151 ; X64:       # %bb.0:
 152 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 153 ; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
 154 ; X64-NEXT:    retq # encoding: [0xc3]
 155   %1 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %data)
 156   ret <16 x i8> %1
 157 }
 158
 159 define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
 160 ; CHECK-LABEL: test_expand_b_128:
 161 ; CHECK:       # %bb.0:
 162 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 163   %1 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 164   ret <16 x i8> %1
 165 }
 166
 167 define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
 168 ; X86-LABEL: test_mask_expand_b_128:
 169 ; X86:       # %bb.0:
 170 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 171 ; X86-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
 172 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 173 ; X86-NEXT:    retl # encoding: [0xc3]
 174 ;
 175 ; X64-LABEL: test_mask_expand_b_128:
 176 ; X64:       # %bb.0:
 177 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 178 ; X64-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
 179 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 180 ; X64-NEXT:    retq # encoding: [0xc3]
 181   %1 = bitcast i16 %mask to <16 x i1>
 182   %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
 183   ret <16 x i8> %2
 184 }
 185
 186 define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
 187 ; X86-LABEL: test_maskz_expand_b_128:
 188 ; X86:       # %bb.0:
 189 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 190 ; X86-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
 191 ; X86-NEXT:    retl # encoding: [0xc3]
 192 ;
 193 ; X64-LABEL: test_maskz_expand_b_128:
 194 ; X64:       # %bb.0:
 195 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 196 ; X64-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
 197 ; X64-NEXT:    retq # encoding: [0xc3]
 198   %1 = bitcast i16 %mask to <16 x i1>
 199   %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
 200   ret <16 x i8> %2
 201 }
 202
 203 define void @test_mask_compress_store_w_128(ptr %addr, <8 x i16> %data, i8 %mask) {
 204 ; X86-LABEL: test_mask_compress_store_w_128:
 205 ; X86:       # %bb.0:
 206 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 207 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
 208 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
 209 ; X86-NEXT:    vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
 210 ; X86-NEXT:    retl # encoding: [0xc3]
 211 ;
 212 ; X64-LABEL: test_mask_compress_store_w_128:
 213 ; X64:       # %bb.0:
 214 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 215 ; X64-NEXT:    vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
 216 ; X64-NEXT:    retq # encoding: [0xc3]
 217   %1 = bitcast i8 %mask to <8 x i1>
 218   call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr %addr, <8 x i1> %1)
 219   ret void
 220 }
 221
 222 define void @test_compress_store_w_128(ptr %addr, <8 x i16> %data) {
 223 ; X86-LABEL: test_compress_store_w_128:
 224 ; X86:       # %bb.0:
 225 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 226 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 227 ; X86-NEXT:    vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
 228 ; X86-NEXT:    retl # encoding: [0xc3]
 229 ;
 230 ; X64-LABEL: test_compress_store_w_128:
 231 ; X64:       # %bb.0:
 232 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 233 ; X64-NEXT:    vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
 234 ; X64-NEXT:    retq # encoding: [0xc3]
 235   call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr %addr, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 236   ret void
 237 }
 238
 239 define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
 240 ; X86-LABEL: test_mask_compress_w_128:
 241 ; X86:       # %bb.0:
 242 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 243 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 244 ; X86-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
 245 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 246 ; X86-NEXT:    retl # encoding: [0xc3]
 247 ;
 248 ; X64-LABEL: test_mask_compress_w_128:
 249 ; X64:       # %bb.0:
 250 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 251 ; X64-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
 252 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 253 ; X64-NEXT:    retq # encoding: [0xc3]
 254   %1 = bitcast i8 %mask to <8 x i1>
 255   %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
 256   ret <8 x i16> %2
 257 }
 258
 259 define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
 260 ; X86-LABEL: test_maskz_compress_w_128:
 261 ; X86:       # %bb.0:
 262 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 263 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 264 ; X86-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
 265 ; X86-NEXT:    retl # encoding: [0xc3]
 266 ;
 267 ; X64-LABEL: test_maskz_compress_w_128:
 268 ; X64:       # %bb.0:
 269 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 270 ; X64-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
 271 ; X64-NEXT:    retq # encoding: [0xc3]
 272   %1 = bitcast i8 %mask to <8 x i1>
 273   %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
 274   ret <8 x i16> %2
 275 }
 276
 277 define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
 278 ; CHECK-LABEL: test_compress_w_128:
 279 ; CHECK:       # %bb.0:
 280 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 281   %1 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 282   ret <8 x i16> %1
 283 }
 284
 285 define void @test_mask_compress_store_b_128(ptr %addr, <16 x i8> %data, i16 %mask) {
 286 ; X86-LABEL: test_mask_compress_store_b_128:
 287 ; X86:       # %bb.0:
 288 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 289 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 290 ; X86-NEXT:    vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
 291 ; X86-NEXT:    retl # encoding: [0xc3]
 292 ;
 293 ; X64-LABEL: test_mask_compress_store_b_128:
 294 ; X64:       # %bb.0:
 295 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 296 ; X64-NEXT:    vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
 297 ; X64-NEXT:    retq # encoding: [0xc3]
 298   %1 = bitcast i16 %mask to <16 x i1>
 299   call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %addr, <16 x i1> %1)
 300   ret void
 301 }
 302
 303 define void @test_compress_store_b_128(ptr %addr, <16 x i8> %data) {
 304 ; X86-LABEL: test_compress_store_b_128:
 305 ; X86:       # %bb.0:
 306 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 307 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 308 ; X86-NEXT:    vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
 309 ; X86-NEXT:    retl # encoding: [0xc3]
 310 ;
 311 ; X64-LABEL: test_compress_store_b_128:
 312 ; X64:       # %bb.0:
 313 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 314 ; X64-NEXT:    vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
 315 ; X64-NEXT:    retq # encoding: [0xc3]
 316   call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 317   ret void
 318 }
 319
 320 define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
 321 ; X86-LABEL: test_mask_compress_b_128:
 322 ; X86:       # %bb.0:
 323 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 324 ; X86-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
 325 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 326 ; X86-NEXT:    retl # encoding: [0xc3]
 327 ;
 328 ; X64-LABEL: test_mask_compress_b_128:
 329 ; X64:       # %bb.0:
 330 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 331 ; X64-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
 332 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 333 ; X64-NEXT:    retq # encoding: [0xc3]
 334   %1 = bitcast i16 %mask to <16 x i1>
 335   %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
 336   ret <16 x i8> %2
 337 }
 338
 339 define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
 340 ; X86-LABEL: test_maskz_compress_b_128:
 341 ; X86:       # %bb.0:
 342 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 343 ; X86-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
 344 ; X86-NEXT:    retl # encoding: [0xc3]
 345 ;
 346 ; X64-LABEL: test_maskz_compress_b_128:
 347 ; X64:       # %bb.0:
 348 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 349 ; X64-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
 350 ; X64-NEXT:    retq # encoding: [0xc3]
 351   %1 = bitcast i16 %mask to <16 x i1>
 352   %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
 353   ret <16 x i8> %2
 354 }
 355
 356 define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
 357 ; CHECK-LABEL: test_compress_b_128:
 358 ; CHECK:       # %bb.0:
 359 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 360   %1 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 361   ret <16 x i8> %1
 362 }
 363
 364 define <16 x i16> @test_mask_expand_load_w_256(ptr %addr, <16 x i16> %data, i16 %mask) {
 365 ; X86-LABEL: test_mask_expand_load_w_256:
 366 ; X86:       # %bb.0:
 367 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 368 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 369 ; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
 370 ; X86-NEXT:    retl # encoding: [0xc3]
 371 ;
 372 ; X64-LABEL: test_mask_expand_load_w_256:
 373 ; X64:       # %bb.0:
 374 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 375 ; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
 376 ; X64-NEXT:    retq # encoding: [0xc3]
 377   %1 = bitcast i16 %mask to <16 x i1>
 378   %2 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> %1, <16 x i16> %data)
 379   ret <16 x i16> %2
 380 }
 381
 382 define <16 x i16> @test_maskz_expand_load_w_256(ptr %addr, i16 %mask) {
 383 ; X86-LABEL: test_maskz_expand_load_w_256:
 384 ; X86:       # %bb.0:
 385 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 386 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 387 ; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00]
 388 ; X86-NEXT:    retl # encoding: [0xc3]
 389 ;
 390 ; X64-LABEL: test_maskz_expand_load_w_256:
 391 ; X64:       # %bb.0:
 392 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 393 ; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07]
 394 ; X64-NEXT:    retq # encoding: [0xc3]
 395   %1 = bitcast i16 %mask to <16 x i1>
 396   %2 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> %1, <16 x i16> zeroinitializer)
 397   ret <16 x i16> %2
 398 }
 399
 400 define <16 x i16> @test_expand_load_w_256(ptr %addr, <16 x i16> %data) {
 401 ; X86-LABEL: test_expand_load_w_256:
 402 ; X86:       # %bb.0:
 403 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 404 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 405 ; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
 406 ; X86-NEXT:    retl # encoding: [0xc3]
 407 ;
 408 ; X64-LABEL: test_expand_load_w_256:
 409 ; X64:       # %bb.0:
 410 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 411 ; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
 412 ; X64-NEXT:    retq # encoding: [0xc3]
 413   %1 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %data)
 414   ret <16 x i16> %1
 415 }
 416
 417 define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
 418 ; CHECK-LABEL: test_expand_w_256:
 419 ; CHECK:       # %bb.0:
 420 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 421   %1 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 422   ret <16 x i16> %1
 423 }
 424
 425 define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
 426 ; X86-LABEL: test_mask_expand_w_256:
 427 ; X86:       # %bb.0:
 428 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 429 ; X86-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
 430 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 431 ; X86-NEXT:    retl # encoding: [0xc3]
 432 ;
 433 ; X64-LABEL: test_mask_expand_w_256:
 434 ; X64:       # %bb.0:
 435 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 436 ; X64-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
 437 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 438 ; X64-NEXT:    retq # encoding: [0xc3]
 439   %1 = bitcast i16 %mask to <16 x i1>
 440   %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
 441   ret <16 x i16> %2
 442 }
 443
 444 define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
 445 ; X86-LABEL: test_maskz_expand_w_256:
 446 ; X86:       # %bb.0:
 447 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 448 ; X86-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
 449 ; X86-NEXT:    retl # encoding: [0xc3]
 450 ;
 451 ; X64-LABEL: test_maskz_expand_w_256:
 452 ; X64:       # %bb.0:
 453 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 454 ; X64-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
 455 ; X64-NEXT:    retq # encoding: [0xc3]
 456   %1 = bitcast i16 %mask to <16 x i1>
 457   %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
 458   ret <16 x i16> %2
 459 }
 460
 461 define <32 x i8> @test_mask_expand_load_b_256(ptr %addr, <32 x i8> %data, i32 %mask) {
 462 ; X86-LABEL: test_mask_expand_load_b_256:
 463 ; X86:       # %bb.0:
 464 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 465 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
 466 ; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
 467 ; X86-NEXT:    retl # encoding: [0xc3]
 468 ;
 469 ; X64-LABEL: test_mask_expand_load_b_256:
 470 ; X64:       # %bb.0:
 471 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 472 ; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
 473 ; X64-NEXT:    retq # encoding: [0xc3]
 474   %1 = bitcast i32 %mask to <32 x i1>
 475   %2 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> %1, <32 x i8> %data)
 476   ret <32 x i8> %2
 477 }
 478
 479 define <32 x i8> @test_maskz_expand_load_b_256(ptr %addr, i32 %mask) {
 480 ; X86-LABEL: test_maskz_expand_load_b_256:
 481 ; X86:       # %bb.0:
 482 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 483 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
 484 ; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00]
 485 ; X86-NEXT:    retl # encoding: [0xc3]
 486 ;
 487 ; X64-LABEL: test_maskz_expand_load_b_256:
 488 ; X64:       # %bb.0:
 489 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 490 ; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07]
 491 ; X64-NEXT:    retq # encoding: [0xc3]
 492   %1 = bitcast i32 %mask to <32 x i1>
 493   %2 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> %1, <32 x i8> zeroinitializer)
 494   ret <32 x i8> %2
 495 }
 496
 497 define <32 x i8> @test_expand_load_b_256(ptr %addr, <32 x i8> %data) {
 498 ; X86-LABEL: test_expand_load_b_256:
 499 ; X86:       # %bb.0:
 500 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 501 ; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
 502 ; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
 503 ; X86-NEXT:    retl # encoding: [0xc3]
 504 ;
 505 ; X64-LABEL: test_expand_load_b_256:
 506 ; X64:       # %bb.0:
 507 ; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
 508 ; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
 509 ; X64-NEXT:    retq # encoding: [0xc3]
 510   %1 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> %data)
 511   ret <32 x i8> %1
 512 }
 513
 514 define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
 515 ; CHECK-LABEL: test_expand_b_256:
 516 ; CHECK:       # %bb.0:
 517 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 518   %1 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 519   ret <32 x i8> %1
 520 }
 521
 522 define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
 523 ; X86-LABEL: test_mask_expand_b_256:
 524 ; X86:       # %bb.0:
 525 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
 526 ; X86-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
 527 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 528 ; X86-NEXT:    retl # encoding: [0xc3]
 529 ;
 530 ; X64-LABEL: test_mask_expand_b_256:
 531 ; X64:       # %bb.0:
 532 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 533 ; X64-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
 534 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 535 ; X64-NEXT:    retq # encoding: [0xc3]
 536   %1 = bitcast i32 %mask to <32 x i1>
 537   %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
 538   ret <32 x i8> %2
 539 }
 540
 541 define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
 542 ; X86-LABEL: test_maskz_expand_b_256:
 543 ; X86:       # %bb.0:
 544 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
 545 ; X86-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
 546 ; X86-NEXT:    retl # encoding: [0xc3]
 547 ;
 548 ; X64-LABEL: test_maskz_expand_b_256:
 549 ; X64:       # %bb.0:
 550 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 551 ; X64-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
 552 ; X64-NEXT:    retq # encoding: [0xc3]
 553   %1 = bitcast i32 %mask to <32 x i1>
 554   %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
 555   ret <32 x i8> %2
 556 }
 557
 558 define void @test_mask_compress_store_w_256(ptr %addr, <16 x i16> %data, i16 %mask) {
 559 ; X86-LABEL: test_mask_compress_store_w_256:
 560 ; X86:       # %bb.0:
 561 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 562 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 563 ; X86-NEXT:    vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
 564 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 565 ; X86-NEXT:    retl # encoding: [0xc3]
 566 ;
 567 ; X64-LABEL: test_mask_compress_store_w_256:
 568 ; X64:       # %bb.0:
 569 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 570 ; X64-NEXT:    vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
 571 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 572 ; X64-NEXT:    retq # encoding: [0xc3]
 573   %1 = bitcast i16 %mask to <16 x i1>
 574   call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr %addr, <16 x i1> %1)
 575   ret void
 576 }
 577
 578 define void @test_compress_store_w_256(ptr %addr, <16 x i16> %data) {
 579 ; X86-LABEL: test_compress_store_w_256:
 580 ; X86:       # %bb.0:
 581 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 582 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 583 ; X86-NEXT:    vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
 584 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 585 ; X86-NEXT:    retl # encoding: [0xc3]
 586 ;
 587 ; X64-LABEL: test_compress_store_w_256:
 588 ; X64:       # %bb.0:
 589 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
 590 ; X64-NEXT:    vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
 591 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 592 ; X64-NEXT:    retq # encoding: [0xc3]
 593   call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 594   ret void
 595 }
 596
 597 define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
 598 ; X86-LABEL: test_mask_compress_w_256:
 599 ; X86:       # %bb.0:
 600 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 601 ; X86-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
 602 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 603 ; X86-NEXT:    retl # encoding: [0xc3]
 604 ;
 605 ; X64-LABEL: test_mask_compress_w_256:
 606 ; X64:       # %bb.0:
 607 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 608 ; X64-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
 609 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 610 ; X64-NEXT:    retq # encoding: [0xc3]
 611   %1 = bitcast i16 %mask to <16 x i1>
 612   %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
 613   ret <16 x i16> %2
 614 }
 615
 616 define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
 617 ; X86-LABEL: test_maskz_compress_w_256:
 618 ; X86:       # %bb.0:
 619 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 620 ; X86-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
 621 ; X86-NEXT:    retl # encoding: [0xc3]
 622 ;
 623 ; X64-LABEL: test_maskz_compress_w_256:
 624 ; X64:       # %bb.0:
 625 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 626 ; X64-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
 627 ; X64-NEXT:    retq # encoding: [0xc3]
 628   %1 = bitcast i16 %mask to <16 x i1>
 629   %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
 630   ret <16 x i16> %2
 631 }
 632
 633 define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
 634 ; CHECK-LABEL: test_compress_w_256:
 635 ; CHECK:       # %bb.0:
 636 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 637   %1 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 638   ret <16 x i16> %1
 639 }
 640
 641 define void @test_mask_compress_store_b_256(ptr %addr, <32 x i8> %data, i32 %mask) {
 642 ; X86-LABEL: test_mask_compress_store_b_256:
 643 ; X86:       # %bb.0:
 644 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 645 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
 646 ; X86-NEXT:    vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
 647 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 648 ; X86-NEXT:    retl # encoding: [0xc3]
 649 ;
 650 ; X64-LABEL: test_mask_compress_store_b_256:
 651 ; X64:       # %bb.0:
 652 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
 653 ; X64-NEXT:    vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
 654 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 655 ; X64-NEXT:    retq # encoding: [0xc3]
 656   %1 = bitcast i32 %mask to <32 x i1>
 657   call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %addr, <32 x i1> %1)
 658   ret void
 659 }
 660
 661 define void @test_compress_store_b_256(ptr %addr, <32 x i8> %data) {
 662 ; X86-LABEL: test_compress_store_b_256:
 663 ; X86:       # %bb.0:
 664 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 665 ; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
 666 ; X86-NEXT:    vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
 667 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 668 ; X86-NEXT:    retl # encoding: [0xc3]
 669 ;
 670 ; X64-LABEL: test_compress_store_b_256:
 671 ; X64:       # %bb.0:
 672 ; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
 673 ; X64-NEXT:    vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
 674 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 675 ; X64-NEXT:    retq # encoding: [0xc3]
 676   call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 677   ret void
 678 }
 679
 680 define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
 681 ; X86-LABEL: test_mask_compress_b_256:
 682 ; X86:       # %bb.0:
 683 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
 684 ; X86-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
 685 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 686 ; X86-NEXT:    retl # encoding: [0xc3]
 687 ;
 688 ; X64-LABEL: test_mask_compress_b_256:
 689 ; X64:       # %bb.0:
 690 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 691 ; X64-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
 692 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 693 ; X64-NEXT:    retq # encoding: [0xc3]
 694   %1 = bitcast i32 %mask to <32 x i1>
 695   %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
 696   ret <32 x i8> %2
 697 }
 698
 699 define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
 700 ; X86-LABEL: test_maskz_compress_b_256:
 701 ; X86:       # %bb.0:
 702 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
 703 ; X86-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
 704 ; X86-NEXT:    retl # encoding: [0xc3]
 705 ;
 706 ; X64-LABEL: test_maskz_compress_b_256:
 707 ; X64:       # %bb.0:
 708 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 709 ; X64-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
 710 ; X64-NEXT:    retq # encoding: [0xc3]
 711   %1 = bitcast i32 %mask to <32 x i1>
 712   %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
 713   ret <32 x i8> %2
 714 }
 715
 716 define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
 717 ; CHECK-LABEL: test_compress_b_256:
 718 ; CHECK:       # %bb.0:
 719 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 720   %1 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 721   ret <32 x i8> %1
 722 }
 723
 724 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
 725 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
 726 ; X86:       # %bb.0:
 727 ; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
 728 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 729 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 730 ; X86-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
 731 ; X86-NEXT:    vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
 732 ; X86-NEXT:    vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
 733 ; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
 734 ; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
 735 ; X86-NEXT:    retl # encoding: [0xc3]
 736 ;
 737 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
 738 ; X64:       # %bb.0:
 739 ; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
 740 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 741 ; X64-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
 742 ; X64-NEXT:    vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
 743 ; X64-NEXT:    vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
 744 ; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
 745 ; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
 746 ; X64-NEXT:    retq # encoding: [0xc3]
 747   %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 22, i32 22, i32 22, i32 22>)
 748   %2 = bitcast i8 %x4 to <8 x i1>
 749   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 750   %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
 751   %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 23, i32 23, i32 23, i32 23>)
 752   %5 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 24, i32 24, i32 24, i32 24>)
 753   %6 = bitcast i8 %x4 to <8 x i1>
 754   %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 755   %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
 756   %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
 757   %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1
 758   %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2
 759   ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
 760 }
 761
 762 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
 763 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
 764 ; X86:       # %bb.0:
 765 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 766 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 767 ; X86-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
 768 ; X86-NEXT:    vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17]
 769 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 770 ; X86-NEXT:    retl # encoding: [0xc3]
 771 ;
 772 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
 773 ; X64:       # %bb.0:
 774 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 775 ; X64-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
 776 ; X64-NEXT:    vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17]
 777 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 778 ; X64-NEXT:    retq # encoding: [0xc3]
 779   %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
 780   %2 = bitcast i8 %x4 to <8 x i1>
 781   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
 782   %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
 783   %5 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
 784   %6 = insertvalue { <8 x i32>, <8 x i32> } %5, <8 x i32> %4, 1
 785   ret { <8 x i32>, <8 x i32> } %6
 786 }
 787
 788 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
 789 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
 790 ; X86:       # %bb.0:
 791 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 792 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 793 ; X86-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
 794 ; X86-NEXT:    vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17]
 795 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 796 ; X86-NEXT:    retl # encoding: [0xc3]
 797 ;
 798 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
 799 ; X64:       # %bb.0:
 800 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 801 ; X64-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
 802 ; X64-NEXT:    vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17]
 803 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 804 ; X64-NEXT:    retq # encoding: [0xc3]
 805   %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 22, i64 22>)
 806   %2 = bitcast i8 %x4 to <8 x i1>
 807   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
 808   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
 809   %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 23, i64 23>)
 810   %5 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
 811   %6 = insertvalue { <2 x i64>, <2 x i64> } %5, <2 x i64> %4, 1
 812   ret { <2 x i64>, <2 x i64> } %6
 813 }
 814
 815 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
 816 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
 817 ; X86:       # %bb.0:
 818 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 819 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 820 ; X86-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
 821 ; X86-NEXT:    vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17]
 822 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 823 ; X86-NEXT:    retl # encoding: [0xc3]
 824 ;
 825 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
 826 ; X64:       # %bb.0:
 827 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 828 ; X64-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
 829 ; X64-NEXT:    vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17]
 830 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 831 ; X64-NEXT:    retq # encoding: [0xc3]
 832   %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 22, i64 22, i64 22, i64 22>)
 833   %2 = bitcast i8 %x4 to <8 x i1>
 834   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 835   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
 836   %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 23, i64 23, i64 23, i64 23>)
 837   %5 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
 838   %6 = insertvalue { <4 x i64>, <4 x i64> } %5, <4 x i64> %4, 1
 839   ret { <4 x i64>, <4 x i64> } %6
 840 }
 841
 842 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
 843 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
 844 ; X86:       # %bb.0:
 845 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 846 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 847 ; X86-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
 848 ; X86-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07]
 849 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 850 ; X86-NEXT:    retl # encoding: [0xc3]
 851 ;
 852 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
 853 ; X64:       # %bb.0:
 854 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 855 ; X64-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
 856 ; X64-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07]
 857 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 858 ; X64-NEXT:    retq # encoding: [0xc3]
 859   %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
 860   %2 = bitcast i8 %x4 to <8 x i1>
 861   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
 862   %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
 863   %5 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
 864   %6 = insertvalue { <8 x i16>, <8 x i16> } %5, <8 x i16> %4, 1
 865   ret { <8 x i16>, <8 x i16> } %6
 866 }
 867
 868 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
 869 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
 870 ; X86:       # %bb.0:
 871 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 872 ; X86-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
 873 ; X86-NEXT:    vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07]
 874 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 875 ; X86-NEXT:    retl # encoding: [0xc3]
 876 ;
 877 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
 878 ; X64:       # %bb.0:
 879 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 880 ; X64-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
 881 ; X64-NEXT:    vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07]
 882 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 883 ; X64-NEXT:    retq # encoding: [0xc3]
 884   %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
 885   %2 = bitcast i16 %x4 to <16 x i1>
 886   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
 887   %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
 888   %5 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
 889   %6 = insertvalue { <16 x i16>, <16 x i16> } %5, <16 x i16> %4, 1
 890   ret { <16 x i16>, <16 x i16> } %6
 891 }
 892
 893 define { <4 x i32>, <4 x i32>,  <4 x i32> } @test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
 894 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
 895 ; X86:       # %bb.0:
 896 ; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
 897 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 898 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 899 ; X86-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
 900 ; X86-NEXT:    vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
 901 ; X86-NEXT:    vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
 902 ; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
 903 ; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
 904 ; X86-NEXT:    retl # encoding: [0xc3]
 905 ;
 906 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
 907 ; X64:       # %bb.0:
 908 ; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
 909 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 910 ; X64-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
 911 ; X64-NEXT:    vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
 912 ; X64-NEXT:    vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
 913 ; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
 914 ; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
 915 ; X64-NEXT:    retq # encoding: [0xc3]
 916   %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 22, i32 22, i32 22, i32 22>)
 917   %2 = bitcast i8 %x4 to <8 x i1>
 918   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 919   %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
 920   %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 23, i32 23, i32 23, i32 23>)
 921   %5 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 24, i32 24, i32 24, i32 24>)
 922   %6 = bitcast i8 %x4 to <8 x i1>
 923   %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 924   %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
 925   %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
 926   %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1
 927   %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2
 928   ret { <4 x i32>, <4 x i32>,  <4 x i32> } %res5
 929 }
 930
 931 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
 932 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
 933 ; X86:       # %bb.0:
 934 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 935 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 936 ; X86-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
 937 ; X86-NEXT:    vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17]
 938 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 939 ; X86-NEXT:    retl # encoding: [0xc3]
 940 ;
 941 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
 942 ; X64:       # %bb.0:
 943 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 944 ; X64-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
 945 ; X64-NEXT:    vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17]
 946 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 947 ; X64-NEXT:    retq # encoding: [0xc3]
 948   %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
 949   %2 = bitcast i8 %x4 to <8 x i1>
 950   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
 951   %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
 952   %5 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
 953   %6 = insertvalue { <8 x i32>, <8 x i32> } %5, <8 x i32> %4, 1
 954   ret { <8 x i32>, <8 x i32> } %6
 955 }
 956
 957 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
 958 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
 959 ; X86:       # %bb.0:
 960 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 961 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 962 ; X86-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
 963 ; X86-NEXT:    vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17]
 964 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 965 ; X86-NEXT:    retl # encoding: [0xc3]
 966 ;
 967 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
 968 ; X64:       # %bb.0:
 969 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 970 ; X64-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
 971 ; X64-NEXT:    vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17]
 972 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
 973 ; X64-NEXT:    retq # encoding: [0xc3]
 974   %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 22, i64 22>)
 975   %2 = bitcast i8 %x4 to <8 x i1>
 976   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
 977   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
 978   %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 23, i64 23>)
 979   %5 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
 980   %6 = insertvalue { <2 x i64>, <2 x i64> } %5, <2 x i64> %4, 1
 981   ret { <2 x i64>, <2 x i64> } %6
 982 }
 983
 984 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
 985 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
 986 ; X86:       # %bb.0:
 987 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 988 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 989 ; X86-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
 990 ; X86-NEXT:    vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17]
 991 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
 992 ; X86-NEXT:    retl # encoding: [0xc3]
 993 ;
 994 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
 995 ; X64:       # %bb.0:
 996 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 997 ; X64-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
 998 ; X64-NEXT:    vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17]
 999 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1000 ; X64-NEXT:    retq # encoding: [0xc3]
1001   %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 22, i64 22, i64 22, i64 22>)
1002   %2 = bitcast i8 %x4 to <8 x i1>
1003   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1004   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
1005   %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 23, i64 23, i64 23, i64 23>)
1006   %5 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1007   %6 = insertvalue { <4 x i64>, <4 x i64> } %5, <4 x i64> %4, 1
1008   ret { <4 x i64>, <4 x i64> } %6
1009 }
1010
1011 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1012 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1013 ; X86:       # %bb.0:
1014 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1015 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1016 ; X86-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1017 ; X86-NEXT:    vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07]
1018 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1019 ; X86-NEXT:    retl # encoding: [0xc3]
1020 ;
1021 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1022 ; X64:       # %bb.0:
1023 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1024 ; X64-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1025 ; X64-NEXT:    vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07]
1026 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1027 ; X64-NEXT:    retq # encoding: [0xc3]
1028   %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
1029   %2 = bitcast i8 %x4 to <8 x i1>
1030   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
1031   %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1032   %5 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1033   %6 = insertvalue { <8 x i16>, <8 x i16> } %5, <8 x i16> %4, 1
1034   ret { <8 x i16>, <8 x i16> } %6
1035 }
1036
1037 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1038 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1039 ; X86:       # %bb.0:
1040 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1041 ; X86-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1042 ; X86-NEXT:    vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07]
1043 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1044 ; X86-NEXT:    retl # encoding: [0xc3]
1045 ;
1046 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1047 ; X64:       # %bb.0:
1048 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1049 ; X64-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1050 ; X64-NEXT:    vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07]
1051 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1052 ; X64-NEXT:    retq # encoding: [0xc3]
1053   %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
1054   %2 = bitcast i16 %x4 to <16 x i1>
1055   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
1056   %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1057   %5 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1058   %6 = insertvalue { <16 x i16>, <16 x i16> } %5, <16 x i16> %4, 1
1059   ret { <16 x i16>, <16 x i16> } %6
1060 }
1061
1062 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
1063 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1064 ; X86:       # %bb.0:
1065 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1066 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1067 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1068 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1069 ; X86-NEXT:    vpshrdvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x00]
1070 ; X86-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda]
1071 ; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1072 ; X86-NEXT:    retl # encoding: [0xc3]
1073 ;
1074 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1075 ; X64:       # %bb.0:
1076 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1077 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1078 ; X64-NEXT:    vpshrdvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x07]
1079 ; X64-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda]
1080 ; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1081 ; X64-NEXT:    retq # encoding: [0xc3]
1082   %x2 = load <8 x i32>, ptr %x2p
1083   %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1084   %2 = bitcast i8 %x3 to <8 x i1>
1085   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
1086   %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x4)
1087   %5 = bitcast i8 %x3 to <8 x i1>
1088   %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
1089   %res3 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
1090   %res4 = insertvalue { <8 x i32>, <8 x i32> } %res3, <8 x i32> %6, 1
1091   ret { <8 x i32>, <8 x i32> } %res4
1092 }
1093
1094 define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
1095 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1096 ; X86:       # %bb.0:
1097 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1098 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1099 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1100 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1101 ; X86-NEXT:    vpshrdvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x00]
1102 ; X86-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda]
1103 ; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1104 ; X86-NEXT:    retl # encoding: [0xc3]
1105 ;
1106 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1107 ; X64:       # %bb.0:
1108 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1109 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1110 ; X64-NEXT:    vpshrdvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x07]
1111 ; X64-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda]
1112 ; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1113 ; X64-NEXT:    retq # encoding: [0xc3]
1114   %x2 = load <4 x i32>, ptr %x2p
1115   %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1116   %2 = bitcast i8 %x3 to <8 x i1>
1117   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1118   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
1119   %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x4)
1120   %5 = bitcast i8 %x3 to <8 x i1>
1121   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1122   %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
1123   %res3 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
1124   %res4 = insertvalue { <4 x i32>, <4 x i32> } %res3, <4 x i32> %6, 1
1125   ret { <4 x i32>, <4 x i32> } %res4
1126 }
1127
1128 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) {
1129 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1130 ; X86:       # %bb.0:
1131 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1132 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1133 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1134 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1135 ; X86-NEXT:    vpshrdvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x00]
1136 ; X86-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda]
1137 ; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1138 ; X86-NEXT:    retl # encoding: [0xc3]
1139 ;
1140 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1141 ; X64:       # %bb.0:
1142 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1143 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1144 ; X64-NEXT:    vpshrdvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x07]
1145 ; X64-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda]
1146 ; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1147 ; X64-NEXT:    retq # encoding: [0xc3]
1148   %x2 = load <4 x i64>, ptr %x2p
1149   %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1150   %2 = bitcast i8 %x3 to <8 x i1>
1151   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1152   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
1153   %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x4)
1154   %5 = bitcast i8 %x3 to <8 x i1>
1155   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1156   %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer
1157   %res3 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1158   %res4 = insertvalue { <4 x i64>, <4 x i64> } %res3, <4 x i64> %6, 1
1159   ret { <4 x i64>, <4 x i64> } %res4
1160 }
1161
1162 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) {
1163 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1164 ; X86:       # %bb.0:
1165 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1166 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1167 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1168 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1169 ; X86-NEXT:    vpshrdvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x00]
1170 ; X86-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda]
1171 ; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1172 ; X86-NEXT:    retl # encoding: [0xc3]
1173 ;
1174 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1175 ; X64:       # %bb.0:
1176 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1177 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1178 ; X64-NEXT:    vpshrdvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x07]
1179 ; X64-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda]
1180 ; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1181 ; X64-NEXT:    retq # encoding: [0xc3]
1182   %x2 = load <2 x i64>, ptr %x2p
1183   %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1184   %2 = bitcast i8 %x3 to <8 x i1>
1185   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1186   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
1187   %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x4)
1188   %5 = bitcast i8 %x3 to <8 x i1>
1189   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
1190   %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer
1191   %res3 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
1192   %res4 = insertvalue { <2 x i64>, <2 x i64> } %res3, <2 x i64> %6, 1
1193   ret { <2 x i64>, <2 x i64> } %res4
1194 }
1195
1196 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) {
1197 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1198 ; X86:       # %bb.0:
1199 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1200 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1201 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1202 ; X86-NEXT:    vpshrdvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x00]
1203 ; X86-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda]
1204 ; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1205 ; X86-NEXT:    retl # encoding: [0xc3]
1206 ;
1207 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1208 ; X64:       # %bb.0:
1209 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1210 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1211 ; X64-NEXT:    vpshrdvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x07]
1212 ; X64-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda]
1213 ; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1214 ; X64-NEXT:    retq # encoding: [0xc3]
1215   %x2 = load <16 x i16>, ptr %x2p
1216   %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1217   %2 = bitcast i16 %x3 to <16 x i1>
1218   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0
1219   %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x4)
1220   %5 = bitcast i16 %x3 to <16 x i1>
1221   %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
1222   %7 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1223   %8 = insertvalue { <16 x i16>, <16 x i16> } %7, <16 x i16> %6, 1
1224   ret { <16 x i16>, <16 x i16> } %8
1225 }
1226
1227 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) {
1228 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1229 ; X86:       # %bb.0:
1230 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1231 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1232 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1233 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1234 ; X86-NEXT:    vpshrdvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x00]
1235 ; X86-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda]
1236 ; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1237 ; X86-NEXT:    retl # encoding: [0xc3]
1238 ;
1239 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1240 ; X64:       # %bb.0:
1241 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1242 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1243 ; X64-NEXT:    vpshrdvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x07]
1244 ; X64-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda]
1245 ; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1246 ; X64-NEXT:    retq # encoding: [0xc3]
1247   %x2 = load <8 x i16>, ptr %x2p
1248   %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1249   %2 = bitcast i8 %x3 to <8 x i1>
1250   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0
1251   %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x4)
1252   %5 = bitcast i8 %x3 to <8 x i1>
1253   %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
1254   %7 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1255   %8 = insertvalue { <8 x i16>, <8 x i16> } %7, <8 x i16> %6, 1
1256   ret { <8 x i16>, <8 x i16> } %8
1257 }
1258
1259 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
1260 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1261 ; X86:       # %bb.0:
1262 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1263 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1264 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1265 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1266 ; X86-NEXT:    vpshldvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x00]
1267 ; X86-NEXT:    vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda]
1268 ; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1269 ; X86-NEXT:    retl # encoding: [0xc3]
1270 ;
1271 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1272 ; X64:       # %bb.0:
1273 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1274 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1275 ; X64-NEXT:    vpshldvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x07]
1276 ; X64-NEXT:    vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda]
1277 ; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1278 ; X64-NEXT:    retq # encoding: [0xc3]
1279   %x2 = load <8 x i32>, ptr %x2p
1280   %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
1281   %2 = bitcast i8 %x3 to <8 x i1>
1282   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
1283   %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
1284   %5 = bitcast i8 %x3 to <8 x i1>
1285   %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
1286   %7 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
1287   %8 = insertvalue { <8 x i32>, <8 x i32> } %7, <8 x i32> %6, 1
1288   ret { <8 x i32>, <8 x i32> } %8
1289 }
1290
1291 define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
1292 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1293 ; X86:       # %bb.0:
1294 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1295 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1296 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1297 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1298 ; X86-NEXT:    vpshldvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x00]
1299 ; X86-NEXT:    vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda]
1300 ; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1301 ; X86-NEXT:    retl # encoding: [0xc3]
1302 ;
1303 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1304 ; X64:       # %bb.0:
1305 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1306 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1307 ; X64-NEXT:    vpshldvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x07]
1308 ; X64-NEXT:    vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda]
1309 ; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1310 ; X64-NEXT:    retq # encoding: [0xc3]
1311   %x2 = load <4 x i32>, ptr %x2p
1312   %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
1313   %2 = bitcast i8 %x3 to <8 x i1>
1314   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1315   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
1316   %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
1317   %5 = bitcast i8 %x3 to <8 x i1>
1318   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1319   %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
1320   %7 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
1321   %8 = insertvalue { <4 x i32>, <4 x i32> } %7, <4 x i32> %6, 1
1322   ret { <4 x i32>, <4 x i32> } %8
1323 }
1324
1325 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) {
1326 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1327 ; X86:       # %bb.0:
1328 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1329 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1330 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1331 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1332 ; X86-NEXT:    vpshldvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x00]
1333 ; X86-NEXT:    vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda]
1334 ; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1335 ; X86-NEXT:    retl # encoding: [0xc3]
1336 ;
1337 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1338 ; X64:       # %bb.0:
1339 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1340 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1341 ; X64-NEXT:    vpshldvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x07]
1342 ; X64-NEXT:    vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda]
1343 ; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1344 ; X64-NEXT:    retq # encoding: [0xc3]
1345   %x2 = load <4 x i64>, ptr %x2p
1346   %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
1347   %2 = bitcast i8 %x3 to <8 x i1>
1348   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1349   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
1350   %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4)
1351   %5 = bitcast i8 %x3 to <8 x i1>
1352   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1353   %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer
1354   %7 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1355   %8 = insertvalue { <4 x i64>, <4 x i64> } %7, <4 x i64> %6, 1
1356   ret { <4 x i64>, <4 x i64> } %8
1357 }
1358
1359 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) {
1360 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1361 ; X86:       # %bb.0:
1362 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1363 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1364 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1365 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1366 ; X86-NEXT:    vpshldvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x00]
1367 ; X86-NEXT:    vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda]
1368 ; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1369 ; X86-NEXT:    retl # encoding: [0xc3]
1370 ;
1371 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1372 ; X64:       # %bb.0:
1373 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1374 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1375 ; X64-NEXT:    vpshldvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x07]
1376 ; X64-NEXT:    vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda]
1377 ; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1378 ; X64-NEXT:    retq # encoding: [0xc3]
1379   %x2 = load <2 x i64>, ptr %x2p
1380   %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
1381   %2 = bitcast i8 %x3 to <8 x i1>
1382   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1383   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
1384   %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4)
1385   %5 = bitcast i8 %x3 to <8 x i1>
1386   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
1387   %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer
1388   %7 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
1389   %8 = insertvalue { <2 x i64>, <2 x i64> } %7, <2 x i64> %6, 1
1390   ret { <2 x i64>, <2 x i64> } %8
1391 }
1392
1393 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) {
1394 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1395 ; X86:       # %bb.0:
1396 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1397 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1398 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1399 ; X86-NEXT:    vpshldvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x00]
1400 ; X86-NEXT:    vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda]
1401 ; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1402 ; X86-NEXT:    retl # encoding: [0xc3]
1403 ;
1404 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1405 ; X64:       # %bb.0:
1406 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1407 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1408 ; X64-NEXT:    vpshldvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x07]
1409 ; X64-NEXT:    vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda]
1410 ; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1411 ; X64-NEXT:    retq # encoding: [0xc3]
1412   %x2 = load <16 x i16>, ptr %x2p
1413   %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
1414   %2 = bitcast i16 %x3 to <16 x i1>
1415   %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0
1416   %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4)
1417   %5 = bitcast i16 %x3 to <16 x i1>
1418   %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
1419   %7 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1420   %8 = insertvalue { <16 x i16>, <16 x i16> } %7, <16 x i16> %6, 1
1421   ret { <16 x i16>, <16 x i16> } %8
1422 }
1423
1424 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) {
1425 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1426 ; X86:       # %bb.0:
1427 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1428 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1429 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1430 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1431 ; X86-NEXT:    vpshldvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x00]
1432 ; X86-NEXT:    vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda]
1433 ; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1434 ; X86-NEXT:    retl # encoding: [0xc3]
1435 ;
1436 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1437 ; X64:       # %bb.0:
1438 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1439 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1440 ; X64-NEXT:    vpshldvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x07]
1441 ; X64-NEXT:    vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda]
1442 ; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1443 ; X64-NEXT:    retq # encoding: [0xc3]
1444   %x2 = load <8 x i16>, ptr %x2p
1445   %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
1446   %2 = bitcast i8 %x3 to <8 x i1>
1447   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0
1448   %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4)
1449   %5 = bitcast i8 %x3 to <8 x i1>
1450   %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
1451   %7 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1452   %8 = insertvalue { <8 x i16>, <8 x i16> } %7, <8 x i16> %6, 1
1453   ret { <8 x i16>, <8 x i16> } %8
1454 }
1455
1456 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
1457 declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
1458 declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
1459 declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
1460 declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
1461 declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
1462 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
1463 declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
1464 declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
1465 declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
1466 declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
1467 declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
1468 declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
1469 declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>)
1470 declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>)
1471 declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>)
1472 declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>)
1473 declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>)
1474 declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>)
1475 declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>)
1476 declare <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
1477 declare <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
1478 declare <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
1479 declare <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
1480 declare <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
1481 declare <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)
1482 declare <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
1483 declare <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)