llvm/test/CodeGen/X86/avx512vl-intrinsics.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
   4
   5 define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
   6 ; X86-LABEL: test_mask_compress_pd_128:
   7 ; X86:       # %bb.0:
   8 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
  10 ; X86-NEXT:    vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
  11 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  12 ; X86-NEXT:    retl # encoding: [0xc3]
  13 ;
  14 ; X64-LABEL: test_mask_compress_pd_128:
  15 ; X64:       # %bb.0:
  16 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  17 ; X64-NEXT:    vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
  18 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  19 ; X64-NEXT:    retq # encoding: [0xc3]
  20   %1 = bitcast i8 %mask to <8 x i1>
  21   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
  22   %2 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> %passthru, <2 x i1> %extract)
  23   ret <2 x double> %2
  24 }
  25
  26 define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
  27 ; X86-LABEL: test_maskz_compress_pd_128:
  28 ; X86:       # %bb.0:
  29 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  30 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
  31 ; X86-NEXT:    vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
  32 ; X86-NEXT:    retl # encoding: [0xc3]
  33 ;
  34 ; X64-LABEL: test_maskz_compress_pd_128:
  35 ; X64:       # %bb.0:
  36 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  37 ; X64-NEXT:    vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
  38 ; X64-NEXT:    retq # encoding: [0xc3]
  39   %1 = bitcast i8 %mask to <8 x i1>
  40   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
  41   %2 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> zeroinitializer, <2 x i1> %extract)
  42   ret <2 x double> %2
  43 }
  44
  45 define <2 x double> @test_compress_pd_128(<2 x double> %data) {
  46 ; CHECK-LABEL: test_compress_pd_128:
  47 ; CHECK:       # %bb.0:
  48 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
  49   %1 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> undef, <2 x i1> <i1 true, i1 true>)
  50   ret <2 x double> %1
  51 }
  52
  53 define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
  54 ; X86-LABEL: test_mask_compress_ps_128:
  55 ; X86:       # %bb.0:
  56 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  57 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
  58 ; X86-NEXT:    vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
  59 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  60 ; X86-NEXT:    retl # encoding: [0xc3]
  61 ;
  62 ; X64-LABEL: test_mask_compress_ps_128:
  63 ; X64:       # %bb.0:
  64 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  65 ; X64-NEXT:    vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
  66 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  67 ; X64-NEXT:    retq # encoding: [0xc3]
  68   %1 = bitcast i8 %mask to <8 x i1>
  69   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  70   %2 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> %passthru, <4 x i1> %extract)
  71   ret <4 x float> %2
  72 }
  73
  74 define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
  75 ; X86-LABEL: test_maskz_compress_ps_128:
  76 ; X86:       # %bb.0:
  77 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  78 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
  79 ; X86-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
  80 ; X86-NEXT:    retl # encoding: [0xc3]
  81 ;
  82 ; X64-LABEL: test_maskz_compress_ps_128:
  83 ; X64:       # %bb.0:
  84 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  85 ; X64-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
  86 ; X64-NEXT:    retq # encoding: [0xc3]
  87   %1 = bitcast i8 %mask to <8 x i1>
  88   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  89   %2 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> zeroinitializer, <4 x i1> %extract)
  90   ret <4 x float> %2
  91 }
  92
  93 define <4 x float> @test_compress_ps_128(<4 x float> %data) {
  94 ; CHECK-LABEL: test_compress_ps_128:
  95 ; CHECK:       # %bb.0:
  96 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
  97   %1 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
  98   ret <4 x float> %1
  99 }
 100
 101 define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
 102 ; X86-LABEL: test_mask_compress_q_128:
 103 ; X86:       # %bb.0:
 104 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 105 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 106 ; X86-NEXT:    vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
 107 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 108 ; X86-NEXT:    retl # encoding: [0xc3]
 109 ;
 110 ; X64-LABEL: test_mask_compress_q_128:
 111 ; X64:       # %bb.0:
 112 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 113 ; X64-NEXT:    vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
 114 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 115 ; X64-NEXT:    retq # encoding: [0xc3]
 116   %1 = bitcast i8 %mask to <8 x i1>
 117   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 118   %2 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> %passthru, <2 x i1> %extract)
 119   ret <2 x i64> %2
 120 }
 121
 122 define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
 123 ; X86-LABEL: test_maskz_compress_q_128:
 124 ; X86:       # %bb.0:
 125 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 126 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 127 ; X86-NEXT:    vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
 128 ; X86-NEXT:    retl # encoding: [0xc3]
 129 ;
 130 ; X64-LABEL: test_maskz_compress_q_128:
 131 ; X64:       # %bb.0:
 132 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 133 ; X64-NEXT:    vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
 134 ; X64-NEXT:    retq # encoding: [0xc3]
 135   %1 = bitcast i8 %mask to <8 x i1>
 136   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 137   %2 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> zeroinitializer, <2 x i1> %extract)
 138   ret <2 x i64> %2
 139 }
 140
 141 define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
 142 ; CHECK-LABEL: test_compress_q_128:
 143 ; CHECK:       # %bb.0:
 144 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 145   %1 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> undef, <2 x i1> <i1 true, i1 true>)
 146   ret <2 x i64> %1
 147 }
 148
 149 define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
 150 ; X86-LABEL: test_mask_compress_d_128:
 151 ; X86:       # %bb.0:
 152 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 153 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 154 ; X86-NEXT:    vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
 155 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 156 ; X86-NEXT:    retl # encoding: [0xc3]
 157 ;
 158 ; X64-LABEL: test_mask_compress_d_128:
 159 ; X64:       # %bb.0:
 160 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 161 ; X64-NEXT:    vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
 162 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 163 ; X64-NEXT:    retq # encoding: [0xc3]
 164   %1 = bitcast i8 %mask to <8 x i1>
 165   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 166   %2 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> %passthru, <4 x i1> %extract)
 167   ret <4 x i32> %2
 168 }
 169
 170 define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
 171 ; X86-LABEL: test_maskz_compress_d_128:
 172 ; X86:       # %bb.0:
 173 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 174 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 175 ; X86-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
 176 ; X86-NEXT:    retl # encoding: [0xc3]
 177 ;
 178 ; X64-LABEL: test_maskz_compress_d_128:
 179 ; X64:       # %bb.0:
 180 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 181 ; X64-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
 182 ; X64-NEXT:    retq # encoding: [0xc3]
 183   %1 = bitcast i8 %mask to <8 x i1>
 184   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 185   %2 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> zeroinitializer, <4 x i1> %extract)
 186   ret <4 x i32> %2
 187 }
 188
 189 define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
 190 ; CHECK-LABEL: test_compress_d_128:
 191 ; CHECK:       # %bb.0:
 192 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 193   %1 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 194   ret <4 x i32> %1
 195 }
 196
 197 define <2 x double> @test_expand_pd_128(<2 x double> %data) {
 198 ; CHECK-LABEL: test_expand_pd_128:
 199 ; CHECK:       # %bb.0:
 200 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 201   %1 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> undef, <2 x i1> <i1 true, i1 true>)
 202   ret <2 x double> %1
 203 }
 204
 205 define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
 206 ; X86-LABEL: test_mask_expand_pd_128:
 207 ; X86:       # %bb.0:
 208 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 209 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 210 ; X86-NEXT:    vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
 211 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 212 ; X86-NEXT:    retl # encoding: [0xc3]
 213 ;
 214 ; X64-LABEL: test_mask_expand_pd_128:
 215 ; X64:       # %bb.0:
 216 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 217 ; X64-NEXT:    vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
 218 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 219 ; X64-NEXT:    retq # encoding: [0xc3]
 220   %1 = bitcast i8 %mask to <8 x i1>
 221   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 222   %2 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> %passthru, <2 x i1> %extract)
 223   ret <2 x double> %2
 224 }
 225
 226 define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
 227 ; X86-LABEL: test_maskz_expand_pd_128:
 228 ; X86:       # %bb.0:
 229 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 230 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 231 ; X86-NEXT:    vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
 232 ; X86-NEXT:    retl # encoding: [0xc3]
 233 ;
 234 ; X64-LABEL: test_maskz_expand_pd_128:
 235 ; X64:       # %bb.0:
 236 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 237 ; X64-NEXT:    vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
 238 ; X64-NEXT:    retq # encoding: [0xc3]
 239   %1 = bitcast i8 %mask to <8 x i1>
 240   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 241   %2 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> zeroinitializer, <2 x i1> %extract)
 242   ret <2 x double> %2
 243 }
 244
 245 define <4 x float> @test_expand_ps_128(<4 x float> %data) {
 246 ; CHECK-LABEL: test_expand_ps_128:
 247 ; CHECK:       # %bb.0:
 248 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 249   %1 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 250   ret <4 x float> %1
 251 }
 252
 253 define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
 254 ; X86-LABEL: test_mask_expand_ps_128:
 255 ; X86:       # %bb.0:
 256 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 257 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 258 ; X86-NEXT:    vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
 259 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 260 ; X86-NEXT:    retl # encoding: [0xc3]
 261 ;
 262 ; X64-LABEL: test_mask_expand_ps_128:
 263 ; X64:       # %bb.0:
 264 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 265 ; X64-NEXT:    vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
 266 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 267 ; X64-NEXT:    retq # encoding: [0xc3]
 268   %1 = bitcast i8 %mask to <8 x i1>
 269   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 270   %2 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> %passthru, <4 x i1> %extract)
 271   ret <4 x float> %2
 272 }
 273
 274 define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
 275 ; X86-LABEL: test_maskz_expand_ps_128:
 276 ; X86:       # %bb.0:
 277 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 278 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 279 ; X86-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
 280 ; X86-NEXT:    retl # encoding: [0xc3]
 281 ;
 282 ; X64-LABEL: test_maskz_expand_ps_128:
 283 ; X64:       # %bb.0:
 284 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 285 ; X64-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
 286 ; X64-NEXT:    retq # encoding: [0xc3]
 287   %1 = bitcast i8 %mask to <8 x i1>
 288   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 289   %2 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> zeroinitializer, <4 x i1> %extract)
 290   ret <4 x float> %2
 291 }
 292
 293 define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
 294 ; CHECK-LABEL: test_expand_q_128:
 295 ; CHECK:       # %bb.0:
 296 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 297   %1 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> undef, <2 x i1> <i1 true, i1 true>)
 298   ret <2 x i64> %1
 299 }
 300
 301 define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
 302 ; X86-LABEL: test_mask_expand_q_128:
 303 ; X86:       # %bb.0:
 304 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 305 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 306 ; X86-NEXT:    vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
 307 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 308 ; X86-NEXT:    retl # encoding: [0xc3]
 309 ;
 310 ; X64-LABEL: test_mask_expand_q_128:
 311 ; X64:       # %bb.0:
 312 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 313 ; X64-NEXT:    vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
 314 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 315 ; X64-NEXT:    retq # encoding: [0xc3]
 316   %1 = bitcast i8 %mask to <8 x i1>
 317   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 318   %2 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> %passthru, <2 x i1> %extract)
 319   ret <2 x i64> %2
 320 }
 321
 322 define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
 323 ; X86-LABEL: test_maskz_expand_q_128:
 324 ; X86:       # %bb.0:
 325 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 326 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 327 ; X86-NEXT:    vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
 328 ; X86-NEXT:    retl # encoding: [0xc3]
 329 ;
 330 ; X64-LABEL: test_maskz_expand_q_128:
 331 ; X64:       # %bb.0:
 332 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 333 ; X64-NEXT:    vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
 334 ; X64-NEXT:    retq # encoding: [0xc3]
 335   %1 = bitcast i8 %mask to <8 x i1>
 336   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 337   %2 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> zeroinitializer, <2 x i1> %extract)
 338   ret <2 x i64> %2
 339 }
 340
 341 define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
 342 ; CHECK-LABEL: test_expand_d_128:
 343 ; CHECK:       # %bb.0:
 344 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 345   %1 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 346   ret <4 x i32> %1
 347 }
 348
 349 define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
 350 ; X86-LABEL: test_mask_expand_d_128:
 351 ; X86:       # %bb.0:
 352 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 353 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 354 ; X86-NEXT:    vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
 355 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 356 ; X86-NEXT:    retl # encoding: [0xc3]
 357 ;
 358 ; X64-LABEL: test_mask_expand_d_128:
 359 ; X64:       # %bb.0:
 360 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 361 ; X64-NEXT:    vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
 362 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 363 ; X64-NEXT:    retq # encoding: [0xc3]
 364   %1 = bitcast i8 %mask to <8 x i1>
 365   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 366   %2 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> %passthru, <4 x i1> %extract)
 367   ret <4 x i32> %2
 368 }
 369
 370 define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
 371 ; X86-LABEL: test_maskz_expand_d_128:
 372 ; X86:       # %bb.0:
 373 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 374 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 375 ; X86-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
 376 ; X86-NEXT:    retl # encoding: [0xc3]
 377 ;
 378 ; X64-LABEL: test_maskz_expand_d_128:
 379 ; X64:       # %bb.0:
 380 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 381 ; X64-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
 382 ; X64-NEXT:    retq # encoding: [0xc3]
 383   %1 = bitcast i8 %mask to <8 x i1>
 384   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 385   %2 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> zeroinitializer, <4 x i1> %extract)
 386   ret <4 x i32> %2
 387 }
 388
 389 define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
 390 ; X86-LABEL: test_mask_compress_pd_256:
 391 ; X86:       # %bb.0:
 392 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 393 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 394 ; X86-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
 395 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 396 ; X86-NEXT:    retl # encoding: [0xc3]
 397 ;
 398 ; X64-LABEL: test_mask_compress_pd_256:
 399 ; X64:       # %bb.0:
 400 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 401 ; X64-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
 402 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 403 ; X64-NEXT:    retq # encoding: [0xc3]
 404   %1 = bitcast i8 %mask to <8 x i1>
 405   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 406   %2 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> %passthru, <4 x i1> %extract)
 407   ret <4 x double> %2
 408 }
 409
 410 define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
 411 ; X86-LABEL: test_maskz_compress_pd_256:
 412 ; X86:       # %bb.0:
 413 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 414 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 415 ; X86-NEXT:    vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
 416 ; X86-NEXT:    retl # encoding: [0xc3]
 417 ;
 418 ; X64-LABEL: test_maskz_compress_pd_256:
 419 ; X64:       # %bb.0:
 420 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 421 ; X64-NEXT:    vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
 422 ; X64-NEXT:    retq # encoding: [0xc3]
 423   %1 = bitcast i8 %mask to <8 x i1>
 424   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 425   %2 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> zeroinitializer, <4 x i1> %extract)
 426   ret <4 x double> %2
 427 }
 428
 429 define <4 x double> @test_compress_pd_256(<4 x double> %data) {
 430 ; CHECK-LABEL: test_compress_pd_256:
 431 ; CHECK:       # %bb.0:
 432 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 433   %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 434   ret <4 x double> %1
 435 }
 436
 437 define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
 438 ; X86-LABEL: test_mask_compress_ps_256:
 439 ; X86:       # %bb.0:
 440 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 441 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 442 ; X86-NEXT:    vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
 443 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 444 ; X86-NEXT:    retl # encoding: [0xc3]
 445 ;
 446 ; X64-LABEL: test_mask_compress_ps_256:
 447 ; X64:       # %bb.0:
 448 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 449 ; X64-NEXT:    vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
 450 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 451 ; X64-NEXT:    retq # encoding: [0xc3]
 452   %1 = bitcast i8 %mask to <8 x i1>
 453   %2 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> %passthru, <8 x i1> %1)
 454   ret <8 x float> %2
 455 }
 456
 457 define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
 458 ; X86-LABEL: test_maskz_compress_ps_256:
 459 ; X86:       # %bb.0:
 460 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 461 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 462 ; X86-NEXT:    vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
 463 ; X86-NEXT:    retl # encoding: [0xc3]
 464 ;
 465 ; X64-LABEL: test_maskz_compress_ps_256:
 466 ; X64:       # %bb.0:
 467 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 468 ; X64-NEXT:    vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
 469 ; X64-NEXT:    retq # encoding: [0xc3]
 470   %1 = bitcast i8 %mask to <8 x i1>
 471   %2 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> zeroinitializer, <8 x i1> %1)
 472   ret <8 x float> %2
 473 }
 474
 475 define <8 x float> @test_compress_ps_256(<8 x float> %data) {
 476 ; CHECK-LABEL: test_compress_ps_256:
 477 ; CHECK:       # %bb.0:
 478 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 479   %1 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 480   ret <8 x float> %1
 481 }
 482
 483 define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
 484 ; X86-LABEL: test_mask_compress_q_256:
 485 ; X86:       # %bb.0:
 486 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 487 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 488 ; X86-NEXT:    vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
 489 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 490 ; X86-NEXT:    retl # encoding: [0xc3]
 491 ;
 492 ; X64-LABEL: test_mask_compress_q_256:
 493 ; X64:       # %bb.0:
 494 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 495 ; X64-NEXT:    vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
 496 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 497 ; X64-NEXT:    retq # encoding: [0xc3]
 498   %1 = bitcast i8 %mask to <8 x i1>
 499   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 500   %2 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> %passthru, <4 x i1> %extract)
 501   ret <4 x i64> %2
 502 }
 503
 504 define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
 505 ; X86-LABEL: test_maskz_compress_q_256:
 506 ; X86:       # %bb.0:
 507 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 508 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 509 ; X86-NEXT:    vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
 510 ; X86-NEXT:    retl # encoding: [0xc3]
 511 ;
 512 ; X64-LABEL: test_maskz_compress_q_256:
 513 ; X64:       # %bb.0:
 514 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 515 ; X64-NEXT:    vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
 516 ; X64-NEXT:    retq # encoding: [0xc3]
 517   %1 = bitcast i8 %mask to <8 x i1>
 518   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 519   %2 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> zeroinitializer, <4 x i1> %extract)
 520   ret <4 x i64> %2
 521 }
 522
 523 define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
 524 ; CHECK-LABEL: test_compress_q_256:
 525 ; CHECK:       # %bb.0:
 526 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 527   %1 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 528   ret <4 x i64> %1
 529 }
 530
 531 define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
 532 ; X86-LABEL: test_mask_compress_d_256:
 533 ; X86:       # %bb.0:
 534 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 535 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 536 ; X86-NEXT:    vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
 537 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 538 ; X86-NEXT:    retl # encoding: [0xc3]
 539 ;
 540 ; X64-LABEL: test_mask_compress_d_256:
 541 ; X64:       # %bb.0:
 542 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 543 ; X64-NEXT:    vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
 544 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 545 ; X64-NEXT:    retq # encoding: [0xc3]
 546   %1 = bitcast i8 %mask to <8 x i1>
 547   %2 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> %passthru, <8 x i1> %1)
 548   ret <8 x i32> %2
 549 }
 550
 551 define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
 552 ; X86-LABEL: test_maskz_compress_d_256:
 553 ; X86:       # %bb.0:
 554 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 555 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 556 ; X86-NEXT:    vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
 557 ; X86-NEXT:    retl # encoding: [0xc3]
 558 ;
 559 ; X64-LABEL: test_maskz_compress_d_256:
 560 ; X64:       # %bb.0:
 561 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 562 ; X64-NEXT:    vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
 563 ; X64-NEXT:    retq # encoding: [0xc3]
 564   %1 = bitcast i8 %mask to <8 x i1>
 565   %2 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> zeroinitializer, <8 x i1> %1)
 566   ret <8 x i32> %2
 567 }
 568
 569 define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
 570 ; CHECK-LABEL: test_compress_d_256:
 571 ; CHECK:       # %bb.0:
 572 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 573   %1 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 574   ret <8 x i32> %1
 575 }
 576
 577 define <4 x double> @test_expand_pd_256(<4 x double> %data) {
 578 ; CHECK-LABEL: test_expand_pd_256:
 579 ; CHECK:       # %bb.0:
 580 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 581   %1 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 582   ret <4 x double> %1
 583 }
 584
 585 define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
 586 ; X86-LABEL: test_mask_expand_pd_256:
 587 ; X86:       # %bb.0:
 588 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 589 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 590 ; X86-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
 591 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 592 ; X86-NEXT:    retl # encoding: [0xc3]
 593 ;
 594 ; X64-LABEL: test_mask_expand_pd_256:
 595 ; X64:       # %bb.0:
 596 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 597 ; X64-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
 598 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 599 ; X64-NEXT:    retq # encoding: [0xc3]
 600   %1 = bitcast i8 %mask to <8 x i1>
 601   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 602   %2 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> %passthru, <4 x i1> %extract)
 603   ret <4 x double> %2
 604 }
 605
 606 define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
 607 ; X86-LABEL: test_maskz_expand_pd_256:
 608 ; X86:       # %bb.0:
 609 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 610 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 611 ; X86-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
 612 ; X86-NEXT:    retl # encoding: [0xc3]
 613 ;
 614 ; X64-LABEL: test_maskz_expand_pd_256:
 615 ; X64:       # %bb.0:
 616 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 617 ; X64-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
 618 ; X64-NEXT:    retq # encoding: [0xc3]
 619   %1 = bitcast i8 %mask to <8 x i1>
 620   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 621   %2 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> zeroinitializer, <4 x i1> %extract)
 622   ret <4 x double> %2
 623 }
 624
 625 define <8 x float> @test_expand_ps_256(<8 x float> %data) {
 626 ; CHECK-LABEL: test_expand_ps_256:
 627 ; CHECK:       # %bb.0:
 628 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 629   %1 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 630   ret <8 x float> %1
 631 }
 632
 633 define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
 634 ; X86-LABEL: test_mask_expand_ps_256:
 635 ; X86:       # %bb.0:
 636 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 637 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 638 ; X86-NEXT:    vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
 639 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 640 ; X86-NEXT:    retl # encoding: [0xc3]
 641 ;
 642 ; X64-LABEL: test_mask_expand_ps_256:
 643 ; X64:       # %bb.0:
 644 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 645 ; X64-NEXT:    vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
 646 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 647 ; X64-NEXT:    retq # encoding: [0xc3]
 648   %1 = bitcast i8 %mask to <8 x i1>
 649   %2 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> %passthru, <8 x i1> %1)
 650   ret <8 x float> %2
 651 }
 652
 653 define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
 654 ; X86-LABEL: test_maskz_expand_ps_256:
 655 ; X86:       # %bb.0:
 656 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 657 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 658 ; X86-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
 659 ; X86-NEXT:    retl # encoding: [0xc3]
 660 ;
 661 ; X64-LABEL: test_maskz_expand_ps_256:
 662 ; X64:       # %bb.0:
 663 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 664 ; X64-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
 665 ; X64-NEXT:    retq # encoding: [0xc3]
 666   %1 = bitcast i8 %mask to <8 x i1>
 667   %2 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> zeroinitializer, <8 x i1> %1)
 668   ret <8 x float> %2
 669 }
 670
 671 define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
 672 ; CHECK-LABEL: test_expand_q_256:
 673 ; CHECK:       # %bb.0:
 674 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 675   %1 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 676   ret <4 x i64> %1
 677 }
 678
 679 define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
 680 ; X86-LABEL: test_mask_expand_q_256:
 681 ; X86:       # %bb.0:
 682 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 683 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 684 ; X86-NEXT:    vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
 685 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 686 ; X86-NEXT:    retl # encoding: [0xc3]
 687 ;
 688 ; X64-LABEL: test_mask_expand_q_256:
 689 ; X64:       # %bb.0:
 690 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 691 ; X64-NEXT:    vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
 692 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 693 ; X64-NEXT:    retq # encoding: [0xc3]
 694   %1 = bitcast i8 %mask to <8 x i1>
 695   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 696   %2 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> %passthru, <4 x i1> %extract)
 697   ret <4 x i64> %2
 698 }
 699
 700 define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
 701 ; X86-LABEL: test_maskz_expand_q_256:
 702 ; X86:       # %bb.0:
 703 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 704 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 705 ; X86-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
 706 ; X86-NEXT:    retl # encoding: [0xc3]
 707 ;
 708 ; X64-LABEL: test_maskz_expand_q_256:
 709 ; X64:       # %bb.0:
 710 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 711 ; X64-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
 712 ; X64-NEXT:    retq # encoding: [0xc3]
 713   %1 = bitcast i8 %mask to <8 x i1>
 714   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 715   %2 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> zeroinitializer, <4 x i1> %extract)
 716   ret <4 x i64> %2
 717 }
 718
 719 define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
 720 ; CHECK-LABEL: test_expand_d_256:
 721 ; CHECK:       # %bb.0:
 722 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 723   %1 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 724   ret <8 x i32> %1
 725 }
 726
 727 define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
 728 ; X86-LABEL: test_mask_expand_d_256:
 729 ; X86:       # %bb.0:
 730 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 731 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 732 ; X86-NEXT:    vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
 733 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 734 ; X86-NEXT:    retl # encoding: [0xc3]
 735 ;
 736 ; X64-LABEL: test_mask_expand_d_256:
 737 ; X64:       # %bb.0:
 738 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 739 ; X64-NEXT:    vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
 740 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 741 ; X64-NEXT:    retq # encoding: [0xc3]
 742   %1 = bitcast i8 %mask to <8 x i1>
 743   %2 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> %passthru, <8 x i1> %1)
 744   ret <8 x i32> %2
 745 }
 746
 747 define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
 748 ; X86-LABEL: test_maskz_expand_d_256:
 749 ; X86:       # %bb.0:
 750 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 751 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 752 ; X86-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
 753 ; X86-NEXT:    retl # encoding: [0xc3]
 754 ;
 755 ; X64-LABEL: test_maskz_expand_d_256:
 756 ; X64:       # %bb.0:
 757 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 758 ; X64-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
 759 ; X64-NEXT:    retq # encoding: [0xc3]
 760   %1 = bitcast i8 %mask to <8 x i1>
 761   %2 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> zeroinitializer, <8 x i1> %1)
 762   ret <8 x i32> %2
 763 }
 764
 765 define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
 766 ; CHECK-LABEL: test_cmpps_256:
 767 ; CHECK:       # %bb.0:
 768 ; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
 769 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 770 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 771 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 772 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 773   %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 774   %1 = bitcast <8 x i1> %res to i8
 775   ret i8 %1
 776 }
 777 declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, <8 x i1>)
 778
 779 define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
 780 ; CHECK-LABEL: test_cmpps_128:
 781 ; CHECK:       # %bb.0:
 782 ; CHECK-NEXT:    vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
 783 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 784 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 785 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 786   %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 787   %1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 788   %2 = bitcast <8 x i1> %1 to i8
 789   ret i8 %2
 790 }
 791 declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32, <4 x i1>)
 792
 793 define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
 794 ; CHECK-LABEL: test_cmppd_256:
 795 ; CHECK:       # %bb.0:
 796 ; CHECK-NEXT:    vcmplepd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
 797 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 798 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 799 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 800 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 801   %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 802   %1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 803   %2 = bitcast <8 x i1> %1 to i8
 804   ret i8 %2
 805 }
 806 declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32, <4 x i1>)
 807
 808 define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
 809 ; CHECK-LABEL: test_cmppd_128:
 810 ; CHECK:       # %bb.0:
 811 ; CHECK-NEXT:    vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
 812 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 813 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 814 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 815   %res = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, <2 x i1> <i1 true, i1 true>)
 816   %1 = shufflevector <2 x i1> %res, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
 817   %2 = bitcast <8 x i1> %1 to i8
 818   ret i8 %2
 819 }
 820 declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32, <2 x i1>)
 821
 822 define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
 823 ; X86-LABEL: test_mm512_maskz_max_ps_256:
 824 ; X86:       # %bb.0:
 825 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 826 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 827 ; X86-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
 828 ; X86-NEXT:    retl # encoding: [0xc3]
 829 ;
 830 ; X64-LABEL: test_mm512_maskz_max_ps_256:
 831 ; X64:       # %bb.0:
 832 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 833 ; X64-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
 834 ; X64-NEXT:    retq # encoding: [0xc3]
 835   %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
 836   %2 = bitcast i8 %mask to <8 x i1>
 837   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer
 838   ret <8 x float> %3
 839 }
 840
 841 define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
 842 ; X86-LABEL: test_mm512_mask_max_ps_256:
 843 ; X86:       # %bb.0:
 844 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 845 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 846 ; X86-NEXT:    vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
 847 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 848 ; X86-NEXT:    retl # encoding: [0xc3]
 849 ;
 850 ; X64-LABEL: test_mm512_mask_max_ps_256:
 851 ; X64:       # %bb.0:
 852 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 853 ; X64-NEXT:    vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
 854 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 855 ; X64-NEXT:    retq # encoding: [0xc3]
 856   %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
 857   %2 = bitcast i8 %mask to <8 x i1>
 858   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %src
 859   ret <8 x float> %3
 860 }
 861
 862 define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
 863 ; CHECK-LABEL: test_mm512_max_ps_256:
 864 ; CHECK:       # %bb.0:
 865 ; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
 866 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 867   %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
 868   ret <8 x float> %1
 869 }
 870 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>)
 871
 872 define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
 873 ; X86-LABEL: test_mm512_maskz_max_ps_128:
 874 ; X86:       # %bb.0:
 875 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 876 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 877 ; X86-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
 878 ; X86-NEXT:    retl # encoding: [0xc3]
 879 ;
 880 ; X64-LABEL: test_mm512_maskz_max_ps_128:
 881 ; X64:       # %bb.0:
 882 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 883 ; X64-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
 884 ; X64-NEXT:    retq # encoding: [0xc3]
 885   %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
 886   %2 = bitcast i8 %mask to <8 x i1>
 887   %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 888   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> zeroinitializer
 889   ret <4 x float> %3
 890 }
 891
 892 define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
 893 ; X86-LABEL: test_mm512_mask_max_ps_128:
 894 ; X86:       # %bb.0:
 895 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 896 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 897 ; X86-NEXT:    vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
 898 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
 899 ; X86-NEXT:    retl # encoding: [0xc3]
 900 ;
 901 ; X64-LABEL: test_mm512_mask_max_ps_128:
 902 ; X64:       # %bb.0:
 903 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 904 ; X64-NEXT:    vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
 905 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
 906 ; X64-NEXT:    retq # encoding: [0xc3]
 907   %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
 908   %2 = bitcast i8 %mask to <8 x i1>
 909   %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 910   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %src
 911   ret <4 x float> %3
 912 }
 913
 914 define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
 915 ; CHECK-LABEL: test_mm512_max_ps_128:
 916 ; CHECK:       # %bb.0:
 917 ; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
 918 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 919   %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
 920   ret <4 x float> %1
 921 }
 922 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
 923
 924 define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
 925 ; X86-LABEL: test_mm512_maskz_min_ps_256:
 926 ; X86:       # %bb.0:
 927 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 928 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 929 ; X86-NEXT:    vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
 930 ; X86-NEXT:    retl # encoding: [0xc3]
 931 ;
 932 ; X64-LABEL: test_mm512_maskz_min_ps_256:
 933 ; X64:       # %bb.0:
 934 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 935 ; X64-NEXT:    vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
 936 ; X64-NEXT:    retq # encoding: [0xc3]
 937   %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
 938   %2 = bitcast i8 %mask to <8 x i1>
 939   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer
 940   ret <8 x float> %3
 941 }
 942
 943 define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
 944 ; X86-LABEL: test_mm512_mask_min_ps_256:
 945 ; X86:       # %bb.0:
 946 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 947 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 948 ; X86-NEXT:    vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
 949 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 950 ; X86-NEXT:    retl # encoding: [0xc3]
 951 ;
 952 ; X64-LABEL: test_mm512_mask_min_ps_256:
 953 ; X64:       # %bb.0:
 954 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 955 ; X64-NEXT:    vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
 956 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 957 ; X64-NEXT:    retq # encoding: [0xc3]
 958   %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
 959   %2 = bitcast i8 %mask to <8 x i1>
 960   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %src
 961   ret <8 x float> %3
 962 }
 963
 964 define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
 965 ; CHECK-LABEL: test_mm512_min_ps_256:
 966 ; CHECK:       # %bb.0:
 967 ; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
 968 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 969   %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
 970   ret <8 x float> %1
 971 }
 972 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>)
 973
 974 define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
 975 ; X86-LABEL: test_mm512_maskz_min_ps_128:
 976 ; X86:       # %bb.0:
 977 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 978 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 979 ; X86-NEXT:    vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
 980 ; X86-NEXT:    retl # encoding: [0xc3]
 981 ;
 982 ; X64-LABEL: test_mm512_maskz_min_ps_128:
 983 ; X64:       # %bb.0:
 984 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 985 ; X64-NEXT:    vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
 986 ; X64-NEXT:    retq # encoding: [0xc3]
 987   %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
 988   %2 = bitcast i8 %mask to <8 x i1>
 989   %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 990   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> zeroinitializer
 991   ret <4 x float> %3
 992 }
 993
 994 define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
 995 ; X86-LABEL: test_mm512_mask_min_ps_128:
 996 ; X86:       # %bb.0:
 997 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 998 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 999 ; X86-NEXT:    vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
1000 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1001 ; X86-NEXT:    retl # encoding: [0xc3]
1002 ;
1003 ; X64-LABEL: test_mm512_mask_min_ps_128:
1004 ; X64:       # %bb.0:
1005 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1006 ; X64-NEXT:    vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
1007 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1008 ; X64-NEXT:    retq # encoding: [0xc3]
1009   %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1010   %2 = bitcast i8 %mask to <8 x i1>
1011   %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1012   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %src
1013   ret <4 x float> %3
1014 }
1015
1016 define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
1017 ; CHECK-LABEL: test_mm512_min_ps_128:
1018 ; CHECK:       # %bb.0:
1019 ; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
1020 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1021   %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1022   ret <4 x float> %1
1023 }
1024 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
1025
1026 define <4 x double> @test_getexp_pd_256(<4 x double> %a0) {
1027 ; CHECK-LABEL: test_getexp_pd_256:
1028 ; CHECK:       # %bb.0:
1029 ; CHECK-NEXT:    vgetexppd %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x42,0xc0]
1030 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1031   %res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0,  <4 x double> zeroinitializer, i8 -1)
1032   ret <4 x double> %res
1033 }
1034
1035 declare <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
1036
1037 define <8 x float> @test_getexp_ps_256(<8 x float> %a0) {
1038 ; CHECK-LABEL: test_getexp_ps_256:
1039 ; CHECK:       # %bb.0:
1040 ; CHECK-NEXT:    vgetexpps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x42,0xc0]
1041 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1042   %res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
1043   ret <8 x float> %res
1044 }
1045 declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
1046
1047 declare <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>)
1048
1049 define <4 x i32>@test_int_x86_avx512_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
1050 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_d_128:
1051 ; CHECK:       # %bb.0:
1052 ; CHECK-NEXT:    vpermt2d %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xc2]
1053 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1054   %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
1055   ret <4 x i32> %1
1056 }
1057
1058 define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
1059 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
1060 ; X86:       # %bb.0:
1061 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1062 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1063 ; X86-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
1064 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1065 ; X86-NEXT:    retl # encoding: [0xc3]
1066 ;
1067 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
1068 ; X64:       # %bb.0:
1069 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1070 ; X64-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
1071 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1072 ; X64-NEXT:    retq # encoding: [0xc3]
1073   %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
1074   %2 = bitcast i8 %x3 to <8 x i1>
1075   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1076   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x1
1077   ret <4 x i32> %3
1078 }
1079
1080 define <4 x i32>@test_int_x86_avx512_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
1081 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_d_128:
1082 ; CHECK:       # %bb.0:
1083 ; CHECK-NEXT:    vpermi2d %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x76,0xc2]
1084 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1085   %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1086   ret <4 x i32> %1
1087 }
1088
1089 define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
1090 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
1091 ; X86:       # %bb.0:
1092 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1093 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1094 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
1095 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1096 ; X86-NEXT:    retl # encoding: [0xc3]
1097 ;
1098 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
1099 ; X64:       # %bb.0:
1100 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1101 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
1102 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1103 ; X64-NEXT:    retq # encoding: [0xc3]
1104   %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1105   %2 = bitcast i8 %x3 to <8 x i1>
1106   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1107   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x1
1108   ret <4 x i32> %3
1109 }
1110
1111 define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
1112 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
1113 ; X86:       # %bb.0:
1114 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1115 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1116 ; X86-NEXT:    vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x76,0xc2]
1117 ; X86-NEXT:    retl # encoding: [0xc3]
1118 ;
1119 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
1120 ; X64:       # %bb.0:
1121 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1122 ; X64-NEXT:    vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x76,0xc2]
1123 ; X64-NEXT:    retq # encoding: [0xc3]
1124   %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1125   %2 = bitcast i8 %x3 to <8 x i1>
1126   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1127   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
1128   ret <4 x i32> %3
1129 }
1130
1131 declare <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>)
1132
1133 define <8 x i32>@test_int_x86_avx512_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
1134 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_d_256:
1135 ; CHECK:       # %bb.0:
1136 ; CHECK-NEXT:    vpermt2d %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xc2]
1137 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1138   %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
1139   ret <8 x i32> %1
1140 }
1141
1142 define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
1143 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
1144 ; X86:       # %bb.0:
1145 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1146 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1147 ; X86-NEXT:    vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
1148 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1149 ; X86-NEXT:    retl # encoding: [0xc3]
1150 ;
1151 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
1152 ; X64:       # %bb.0:
1153 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1154 ; X64-NEXT:    vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
1155 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1156 ; X64-NEXT:    retq # encoding: [0xc3]
1157   %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
1158   %2 = bitcast i8 %x3 to <8 x i1>
1159   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
1160   ret <8 x i32> %3
1161 }
1162
1163 define <8 x i32>@test_int_x86_avx512_ask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
1164 ; CHECK-LABEL: test_int_x86_avx512_ask_vpermt2var_d_256:
1165 ; CHECK:       # %bb.0:
1166 ; CHECK-NEXT:    vpermi2d %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x76,0xc2]
1167 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1168   %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1169   ret <8 x i32> %1
1170 }
1171
1172 define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
1173 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
1174 ; X86:       # %bb.0:
1175 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1176 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1177 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
1178 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1179 ; X86-NEXT:    retl # encoding: [0xc3]
1180 ;
1181 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
1182 ; X64:       # %bb.0:
1183 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1184 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
1185 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1186 ; X64-NEXT:    retq # encoding: [0xc3]
1187   %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1188   %2 = bitcast i8 %x3 to <8 x i1>
1189   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
1190   ret <8 x i32> %3
1191 }
1192
1193 define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
1194 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
1195 ; X86:       # %bb.0:
1196 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1197 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1198 ; X86-NEXT:    vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x76,0xc2]
1199 ; X86-NEXT:    retl # encoding: [0xc3]
1200 ;
1201 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
1202 ; X64:       # %bb.0:
1203 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1204 ; X64-NEXT:    vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x76,0xc2]
1205 ; X64-NEXT:    retq # encoding: [0xc3]
1206   %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1207   %2 = bitcast i8 %x3 to <8 x i1>
1208   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
1209   ret <8 x i32> %3
1210 }
1211
1212 declare <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>)
1213
1214 define <2 x double>@test_int_x86_avx512_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2) {
1215 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_pd_128:
1216 ; CHECK:       # %bb.0:
1217 ; CHECK-NEXT:    vpermt2pd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xc2]
1218 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1219   %1 = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2)
1220   ret <2 x double> %1
1221 }
1222
1223 define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
1224 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
1225 ; X86:       # %bb.0:
1226 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1227 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1228 ; X86-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
1229 ; X86-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
1230 ; X86-NEXT:    retl # encoding: [0xc3]
1231 ;
1232 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
1233 ; X64:       # %bb.0:
1234 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1235 ; X64-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
1236 ; X64-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
1237 ; X64-NEXT:    retq # encoding: [0xc3]
1238   %1 = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2)
1239   %2 = bitcast <2 x i64> %x1 to <2 x double>
1240   %3 = bitcast i8 %x3 to <8 x i1>
1241   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <2 x i32> <i32 0, i32 1>
1242   %4 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %2
1243   ret <2 x double> %4
1244 }
1245
1246 declare <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>)
1247
1248 define <4 x double>@test_int_x86_avx512_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) {
1249 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_pd_256:
1250 ; CHECK:       # %bb.0:
1251 ; CHECK-NEXT:    vpermt2pd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xc2]
1252 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1253   %1 = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2)
1254   ret <4 x double> %1
1255 }
1256
1257 define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
1258 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
1259 ; X86:       # %bb.0:
1260 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1261 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1262 ; X86-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
1263 ; X86-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
1264 ; X86-NEXT:    retl # encoding: [0xc3]
1265 ;
1266 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
1267 ; X64:       # %bb.0:
1268 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1269 ; X64-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
1270 ; X64-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
1271 ; X64-NEXT:    retq # encoding: [0xc3]
1272   %1 = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2)
1273   %2 = bitcast <4 x i64> %x1 to <4 x double>
1274   %3 = bitcast i8 %x3 to <8 x i1>
1275   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1276   %4 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %2
1277   ret <4 x double> %4
1278 }
1279
1280 declare <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>)
1281
1282 define <4 x float>@test_int_x86_avx512_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2) {
1283 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_ps_128:
1284 ; CHECK:       # %bb.0:
1285 ; CHECK-NEXT:    vpermt2ps %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xc2]
1286 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1287   %1 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2)
1288   ret <4 x float> %1
1289 }
1290
1291 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
1292 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
1293 ; X86:       # %bb.0:
1294 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1295 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1296 ; X86-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1297 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1298 ; X86-NEXT:    retl # encoding: [0xc3]
1299 ;
1300 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
1301 ; X64:       # %bb.0:
1302 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1303 ; X64-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1304 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1305 ; X64-NEXT:    retq # encoding: [0xc3]
1306   %1 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2)
1307   %2 = bitcast <4 x i32> %x1 to <4 x float>
1308   %3 = bitcast i8 %x3 to <8 x i1>
1309   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1310   %4 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %2
1311   ret <4 x float> %4
1312 }
1313
1314 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %x0, <2 x i64> %x1, <4 x float> %x2, i8 %x3) {
1315 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
1316 ; X86:       # %bb.0:
1317 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1318 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1319 ; X86-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1320 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1321 ; X86-NEXT:    retl # encoding: [0xc3]
1322 ;
1323 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
1324 ; X64:       # %bb.0:
1325 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1326 ; X64-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1327 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1328 ; X64-NEXT:    retq # encoding: [0xc3]
1329   %x1cast = bitcast <2 x i64> %x1 to <4 x i32>
1330   %1 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2)
1331   %2 = bitcast <4 x i32> %x1cast to <4 x float>
1332   %3 = bitcast i8 %x3 to <8 x i1>
1333   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1334   %4 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %2
1335   ret <4 x float> %4
1336 }
1337
1338 declare <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>)
1339
1340 define <8 x float>@test_int_x86_avx512_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2) {
1341 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_ps_256:
1342 ; CHECK:       # %bb.0:
1343 ; CHECK-NEXT:    vpermt2ps %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xc2]
1344 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1345   %1 = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2)
1346   ret <8 x float> %1
1347 }
1348
1349 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
1350 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
1351 ; X86:       # %bb.0:
1352 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1353 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1354 ; X86-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
1355 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1356 ; X86-NEXT:    retl # encoding: [0xc3]
1357 ;
1358 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
1359 ; X64:       # %bb.0:
1360 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1361 ; X64-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
1362 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1363 ; X64-NEXT:    retq # encoding: [0xc3]
1364   %1 = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2)
1365   %2 = bitcast <8 x i32> %x1 to <8 x float>
1366   %3 = bitcast i8 %x3 to <8 x i1>
1367   %4 = select <8 x i1> %3, <8 x float> %1, <8 x float> %2
1368   ret <8 x float> %4
1369 }
1370
1371 declare <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>)
1372
1373 define <2 x i64>@test_int_x86_avx512_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
1374 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_q_128:
1375 ; CHECK:       # %bb.0:
1376 ; CHECK-NEXT:    vpermt2q %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xc2]
1377 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1378   %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
1379   ret <2 x i64> %1
1380 }
1381
1382 define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
1383 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
1384 ; X86:       # %bb.0:
1385 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1386 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1387 ; X86-NEXT:    vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
1388 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1389 ; X86-NEXT:    retl # encoding: [0xc3]
1390 ;
1391 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
1392 ; X64:       # %bb.0:
1393 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1394 ; X64-NEXT:    vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
1395 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1396 ; X64-NEXT:    retq # encoding: [0xc3]
1397   %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
1398   %2 = bitcast i8 %x3 to <8 x i1>
1399   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1400   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1
1401   ret <2 x i64> %3
1402 }
1403
1404 define <2 x i64>@test_int_x86_avx512_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
1405 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_q_128:
1406 ; CHECK:       # %bb.0:
1407 ; CHECK-NEXT:    vpermi2q %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x76,0xc2]
1408 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1409   %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1410   ret <2 x i64> %1
1411 }
1412
1413 define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
1414 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
1415 ; X86:       # %bb.0:
1416 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1417 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1418 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
1419 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1420 ; X86-NEXT:    retl # encoding: [0xc3]
1421 ;
1422 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
1423 ; X64:       # %bb.0:
1424 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1425 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
1426 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1427 ; X64-NEXT:    retq # encoding: [0xc3]
1428   %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1429   %2 = bitcast i8 %x3 to <8 x i1>
1430   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1431   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1
1432   ret <2 x i64> %3
1433 }
1434
1435 define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
1436 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
1437 ; X86:       # %bb.0:
1438 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1439 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1440 ; X86-NEXT:    vpermi2q %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x76,0xc2]
1441 ; X86-NEXT:    retl # encoding: [0xc3]
1442 ;
1443 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
1444 ; X64:       # %bb.0:
1445 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1446 ; X64-NEXT:    vpermi2q %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x76,0xc2]
1447 ; X64-NEXT:    retq # encoding: [0xc3]
1448   %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1449   %2 = bitcast i8 %x3 to <8 x i1>
1450   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1451   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> zeroinitializer
1452   ret <2 x i64> %3
1453 }
1454
1455 declare <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>)
1456
1457 define <4 x i64>@test_int_x86_avx512_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
1458 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_q_256:
1459 ; CHECK:       # %bb.0:
1460 ; CHECK-NEXT:    vpermt2q %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xc2]
1461 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1462   %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
1463   ret <4 x i64> %1
1464 }
1465
1466 define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
1467 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
1468 ; X86:       # %bb.0:
1469 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1470 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1471 ; X86-NEXT:    vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
1472 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1473 ; X86-NEXT:    retl # encoding: [0xc3]
1474 ;
1475 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
1476 ; X64:       # %bb.0:
1477 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1478 ; X64-NEXT:    vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
1479 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1480 ; X64-NEXT:    retq # encoding: [0xc3]
1481   %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
1482   %2 = bitcast i8 %x3 to <8 x i1>
1483   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1484   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1
1485   ret <4 x i64> %3
1486 }
1487
1488 define <4 x i64>@test_int_x86_avx512_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
1489 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_q_256:
1490 ; CHECK:       # %bb.0:
1491 ; CHECK-NEXT:    vpermi2q %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x76,0xc2]
1492 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1493   %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1494   ret <4 x i64> %1
1495 }
1496
1497 define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
1498 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
1499 ; X86:       # %bb.0:
1500 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1501 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1502 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
1503 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1504 ; X86-NEXT:    retl # encoding: [0xc3]
1505 ;
1506 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
1507 ; X64:       # %bb.0:
1508 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1509 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
1510 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1511 ; X64-NEXT:    retq # encoding: [0xc3]
1512   %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1513   %2 = bitcast i8 %x3 to <8 x i1>
1514   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1515   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1
1516   ret <4 x i64> %3
1517 }
1518
1519 define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
1520 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
1521 ; X86:       # %bb.0:
1522 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1523 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1524 ; X86-NEXT:    vpermi2q %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x76,0xc2]
1525 ; X86-NEXT:    retl # encoding: [0xc3]
1526 ;
1527 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
1528 ; X64:       # %bb.0:
1529 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1530 ; X64-NEXT:    vpermi2q %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x76,0xc2]
1531 ; X64-NEXT:    retq # encoding: [0xc3]
1532   %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1533   %2 = bitcast i8 %x3 to <8 x i1>
1534   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1535   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> zeroinitializer
1536   ret <4 x i64> %3
1537 }
1538
1539 declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
1540
1541 define <2 x double>@test_int_x86_avx512_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2) {
1542 ; CHECK-LABEL: test_int_x86_avx512_scalef_pd_128:
1543 ; CHECK:       # %bb.0:
1544 ; CHECK-NEXT:    vscalefpd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x2c,0xc1]
1545 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1546   %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1547   ret <2 x double> %res
1548 }
1549
1550 define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1551 ; X86-LABEL: test_int_x86_avx512_mask_scalef_pd_128:
1552 ; X86:       # %bb.0:
1553 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1554 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1555 ; X86-NEXT:    vscalefpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1]
1556 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
1557 ; X86-NEXT:    retl # encoding: [0xc3]
1558 ;
1559 ; X64-LABEL: test_int_x86_avx512_mask_scalef_pd_128:
1560 ; X64:       # %bb.0:
1561 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1562 ; X64-NEXT:    vscalefpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1]
1563 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
1564 ; X64-NEXT:    retq # encoding: [0xc3]
1565   %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1566   ret <2 x double> %res
1567 }
1568
1569 declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
1570
1571 define <4 x double>@test_int_x86_avx512_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2) {
1572 ; CHECK-LABEL: test_int_x86_avx512_scalef_pd_256:
1573 ; CHECK:       # %bb.0:
1574 ; CHECK-NEXT:    vscalefpd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x2c,0xc1]
1575 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1576   %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1577   ret <4 x double> %res
1578 }
1579
1580 define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1581 ; X86-LABEL: test_int_x86_avx512_mask_scalef_pd_256:
1582 ; X86:       # %bb.0:
1583 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1584 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1585 ; X86-NEXT:    vscalefpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x2c,0xd1]
1586 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
1587 ; X86-NEXT:    retl # encoding: [0xc3]
1588 ;
1589 ; X64-LABEL: test_int_x86_avx512_mask_scalef_pd_256:
1590 ; X64:       # %bb.0:
1591 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1592 ; X64-NEXT:    vscalefpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x2c,0xd1]
1593 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
1594 ; X64-NEXT:    retq # encoding: [0xc3]
1595   %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1596   ret <4 x double> %res
1597 }
1598
1599 declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1600
1601 define <4 x float>@test_int_x86_avx512_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2) {
1602 ; CHECK-LABEL: test_int_x86_avx512_scalef_ps_128:
1603 ; CHECK:       # %bb.0:
1604 ; CHECK-NEXT:    vscalefps %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x2c,0xc1]
1605 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1606   %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1607   ret <4 x float> %res
1608 }
1609
1610 define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1611 ; X86-LABEL: test_int_x86_avx512_mask_scalef_ps_128:
1612 ; X86:       # %bb.0:
1613 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1614 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1615 ; X86-NEXT:    vscalefps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1]
1616 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1617 ; X86-NEXT:    retl # encoding: [0xc3]
1618 ;
1619 ; X64-LABEL: test_int_x86_avx512_mask_scalef_ps_128:
1620 ; X64:       # %bb.0:
1621 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1622 ; X64-NEXT:    vscalefps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1]
1623 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1624 ; X64-NEXT:    retq # encoding: [0xc3]
1625   %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1626   ret <4 x float> %res
1627 }
1628
1629 declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1630
1631 define <8 x float>@test_int_x86_avx512_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2) {
1632 ; CHECK-LABEL: test_int_x86_avx512_scalef_ps_256:
1633 ; CHECK:       # %bb.0:
1634 ; CHECK-NEXT:    vscalefps %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x2c,0xc1]
1635 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1636   %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1637   ret <8 x float> %res
1638 }
1639
1640 define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1641 ; X86-LABEL: test_int_x86_avx512_mask_scalef_ps_256:
1642 ; X86:       # %bb.0:
1643 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1644 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1645 ; X86-NEXT:    vscalefps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2c,0xd1]
1646 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1647 ; X86-NEXT:    retl # encoding: [0xc3]
1648 ;
1649 ; X64-LABEL: test_int_x86_avx512_mask_scalef_ps_256:
1650 ; X64:       # %bb.0:
1651 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1652 ; X64-NEXT:    vscalefps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2c,0xd1]
1653 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1654 ; X64-NEXT:    retq # encoding: [0xc3]
1655   %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1656   ret <8 x float> %res
1657 }
1658
1659 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8)
1660
1661 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1662 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
1663 ; X86:       # %bb.0:
1664 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1665 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1666 ; X86-NEXT:    vpmovqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc2]
1667 ; X86-NEXT:    vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
1668 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1669 ; X86-NEXT:    vpmovqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc0]
1670 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1671 ; X86-NEXT:    retl # encoding: [0xc3]
1672 ;
1673 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
1674 ; X64:       # %bb.0:
1675 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1676 ; X64-NEXT:    vpmovqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc2]
1677 ; X64-NEXT:    vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
1678 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1679 ; X64-NEXT:    vpmovqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc0]
1680 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1681 ; X64-NEXT:    retq # encoding: [0xc3]
1682     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
1683     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
1684     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1685     %res3 = add <16 x i8> %res0, %res1
1686     %res4 = add <16 x i8> %res3, %res2
1687     ret <16 x i8> %res4
1688 }
1689
1690 declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(ptr %ptr, <2 x i64>, i8)
1691
1692 define void @test_int_x86_avx512_mask_pmov_qb_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
1693 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
1694 ; X86:       # %bb.0:
1695 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1696 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1697 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1698 ; X86-NEXT:    vpmovqb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x32,0x00]
1699 ; X86-NEXT:    vpmovqb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0x00]
1700 ; X86-NEXT:    retl # encoding: [0xc3]
1701 ;
1702 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
1703 ; X64:       # %bb.0:
1704 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1705 ; X64-NEXT:    vpmovqb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x32,0x07]
1706 ; X64-NEXT:    vpmovqb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0x07]
1707 ; X64-NEXT:    retq # encoding: [0xc3]
1708     call void @llvm.x86.avx512.mask.pmov.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
1709     call void @llvm.x86.avx512.mask.pmov.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
1710     ret void
1711 }
1712
1713 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8)
1714
1715 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1716 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
1717 ; X86:       # %bb.0:
1718 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1719 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1720 ; X86-NEXT:    vpmovsqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc2]
1721 ; X86-NEXT:    vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
1722 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1723 ; X86-NEXT:    vpmovsqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc0]
1724 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1725 ; X86-NEXT:    retl # encoding: [0xc3]
1726 ;
1727 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
1728 ; X64:       # %bb.0:
1729 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1730 ; X64-NEXT:    vpmovsqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc2]
1731 ; X64-NEXT:    vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
1732 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1733 ; X64-NEXT:    vpmovsqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc0]
1734 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1735 ; X64-NEXT:    retq # encoding: [0xc3]
1736     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
1737     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
1738     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1739     %res3 = add <16 x i8> %res0, %res1
1740     %res4 = add <16 x i8> %res3, %res2
1741     ret <16 x i8> %res4
1742 }
1743
1744 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(ptr %ptr, <2 x i64>, i8)
1745
1746 define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
1747 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
1748 ; X86:       # %bb.0:
1749 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1750 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1751 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1752 ; X86-NEXT:    vpmovsqb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x22,0x00]
1753 ; X86-NEXT:    vpmovsqb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0x00]
1754 ; X86-NEXT:    retl # encoding: [0xc3]
1755 ;
1756 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
1757 ; X64:       # %bb.0:
1758 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1759 ; X64-NEXT:    vpmovsqb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07]
1760 ; X64-NEXT:    vpmovsqb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0x07]
1761 ; X64-NEXT:    retq # encoding: [0xc3]
1762     call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
1763     call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
1764     ret void
1765 }
1766
1767 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8)
1768
1769 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1770 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
1771 ; X86:       # %bb.0:
1772 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1773 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1774 ; X86-NEXT:    vpmovusqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc2]
1775 ; X86-NEXT:    vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
1776 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1777 ; X86-NEXT:    vpmovusqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc0]
1778 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1779 ; X86-NEXT:    retl # encoding: [0xc3]
1780 ;
1781 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
1782 ; X64:       # %bb.0:
1783 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1784 ; X64-NEXT:    vpmovusqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc2]
1785 ; X64-NEXT:    vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
1786 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1787 ; X64-NEXT:    vpmovusqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc0]
1788 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1789 ; X64-NEXT:    retq # encoding: [0xc3]
1790     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
1791     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
1792     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1793     %res3 = add <16 x i8> %res0, %res1
1794     %res4 = add <16 x i8> %res3, %res2
1795     ret <16 x i8> %res4
1796 }
1797
1798 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(ptr %ptr, <2 x i64>, i8)
1799
1800 define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
1801 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
1802 ; X86:       # %bb.0:
1803 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1804 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1805 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1806 ; X86-NEXT:    vpmovusqb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x12,0x00]
1807 ; X86-NEXT:    vpmovusqb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0x00]
1808 ; X86-NEXT:    retl # encoding: [0xc3]
1809 ;
1810 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
1811 ; X64:       # %bb.0:
1812 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1813 ; X64-NEXT:    vpmovusqb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07]
1814 ; X64-NEXT:    vpmovusqb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0x07]
1815 ; X64-NEXT:    retq # encoding: [0xc3]
1816     call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
1817     call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
1818     ret void
1819 }
1820
1821 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8)
1822
1823 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1824 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
1825 ; X86:       # %bb.0:
1826 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1827 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1828 ; X86-NEXT:    vpmovqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc2]
1829 ; X86-NEXT:    vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
1830 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1831 ; X86-NEXT:    vpmovqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc0]
1832 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1833 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1834 ; X86-NEXT:    retl # encoding: [0xc3]
1835 ;
1836 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
1837 ; X64:       # %bb.0:
1838 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1839 ; X64-NEXT:    vpmovqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc2]
1840 ; X64-NEXT:    vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
1841 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1842 ; X64-NEXT:    vpmovqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc0]
1843 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1844 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1845 ; X64-NEXT:    retq # encoding: [0xc3]
1846     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
1847     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
1848     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1849     %res3 = add <16 x i8> %res0, %res1
1850     %res4 = add <16 x i8> %res3, %res2
1851     ret <16 x i8> %res4
1852 }
1853
1854 declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(ptr %ptr, <4 x i64>, i8)
1855
1856 define void @test_int_x86_avx512_mask_pmov_qb_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
1857 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
1858 ; X86:       # %bb.0:
1859 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1860 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1861 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1862 ; X86-NEXT:    vpmovqb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x32,0x00]
1863 ; X86-NEXT:    vpmovqb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0x00]
1864 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1865 ; X86-NEXT:    retl # encoding: [0xc3]
1866 ;
1867 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
1868 ; X64:       # %bb.0:
1869 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1870 ; X64-NEXT:    vpmovqb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x32,0x07]
1871 ; X64-NEXT:    vpmovqb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0x07]
1872 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1873 ; X64-NEXT:    retq # encoding: [0xc3]
1874     call void @llvm.x86.avx512.mask.pmov.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
1875     call void @llvm.x86.avx512.mask.pmov.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
1876     ret void
1877 }
1878
1879 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8)
1880
1881 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1882 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
1883 ; X86:       # %bb.0:
1884 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1885 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1886 ; X86-NEXT:    vpmovsqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc2]
1887 ; X86-NEXT:    vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
1888 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1889 ; X86-NEXT:    vpmovsqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc0]
1890 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1891 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1892 ; X86-NEXT:    retl # encoding: [0xc3]
1893 ;
1894 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
1895 ; X64:       # %bb.0:
1896 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1897 ; X64-NEXT:    vpmovsqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc2]
1898 ; X64-NEXT:    vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
1899 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1900 ; X64-NEXT:    vpmovsqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc0]
1901 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1902 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1903 ; X64-NEXT:    retq # encoding: [0xc3]
1904     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
1905     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
1906     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1907     %res3 = add <16 x i8> %res0, %res1
1908     %res4 = add <16 x i8> %res3, %res2
1909     ret <16 x i8> %res4
1910 }
1911
1912 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(ptr %ptr, <4 x i64>, i8)
1913
1914 define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
1915 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
1916 ; X86:       # %bb.0:
1917 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1918 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1919 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1920 ; X86-NEXT:    vpmovsqb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x22,0x00]
1921 ; X86-NEXT:    vpmovsqb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0x00]
1922 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1923 ; X86-NEXT:    retl # encoding: [0xc3]
1924 ;
1925 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
1926 ; X64:       # %bb.0:
1927 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1928 ; X64-NEXT:    vpmovsqb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07]
1929 ; X64-NEXT:    vpmovsqb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0x07]
1930 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1931 ; X64-NEXT:    retq # encoding: [0xc3]
1932     call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
1933     call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
1934     ret void
1935 }
1936
1937 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8)
1938
1939 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1940 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
1941 ; X86:       # %bb.0:
1942 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1943 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1944 ; X86-NEXT:    vpmovusqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc2]
1945 ; X86-NEXT:    vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
1946 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1947 ; X86-NEXT:    vpmovusqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc0]
1948 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1949 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1950 ; X86-NEXT:    retl # encoding: [0xc3]
1951 ;
1952 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
1953 ; X64:       # %bb.0:
1954 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1955 ; X64-NEXT:    vpmovusqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc2]
1956 ; X64-NEXT:    vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
1957 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1958 ; X64-NEXT:    vpmovusqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc0]
1959 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1960 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1961 ; X64-NEXT:    retq # encoding: [0xc3]
1962     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
1963     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
1964     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1965     %res3 = add <16 x i8> %res0, %res1
1966     %res4 = add <16 x i8> %res3, %res2
1967     ret <16 x i8> %res4
1968 }
1969
1970 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(ptr %ptr, <4 x i64>, i8)
1971
1972 define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
1973 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
1974 ; X86:       # %bb.0:
1975 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1976 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1977 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1978 ; X86-NEXT:    vpmovusqb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x12,0x00]
1979 ; X86-NEXT:    vpmovusqb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0x00]
1980 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1981 ; X86-NEXT:    retl # encoding: [0xc3]
1982 ;
1983 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
1984 ; X64:       # %bb.0:
1985 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1986 ; X64-NEXT:    vpmovusqb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07]
1987 ; X64-NEXT:    vpmovusqb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0x07]
1988 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1989 ; X64-NEXT:    retq # encoding: [0xc3]
1990     call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
1991     call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
1992     ret void
1993 }
1994
1995 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8)
1996
1997 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
1998 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
1999 ; X86:       # %bb.0:
2000 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2001 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2002 ; X86-NEXT:    vpmovqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc2]
2003 ; X86-NEXT:    vpmovqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1]
2004 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2005 ; X86-NEXT:    vpmovqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc0]
2006 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2007 ; X86-NEXT:    retl # encoding: [0xc3]
2008 ;
2009 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
2010 ; X64:       # %bb.0:
2011 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2012 ; X64-NEXT:    vpmovqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc2]
2013 ; X64-NEXT:    vpmovqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1]
2014 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2015 ; X64-NEXT:    vpmovqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc0]
2016 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2017 ; X64-NEXT:    retq # encoding: [0xc3]
2018     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
2019     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
2020     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2021     %res3 = add <8 x i16> %res0, %res1
2022     %res4 = add <8 x i16> %res3, %res2
2023     ret <8 x i16> %res4
2024 }
2025
2026 declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(ptr %ptr, <2 x i64>, i8)
2027
2028 define void @test_int_x86_avx512_mask_pmov_qw_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2029 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
2030 ; X86:       # %bb.0:
2031 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2032 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2033 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2034 ; X86-NEXT:    vpmovqw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x34,0x00]
2035 ; X86-NEXT:    vpmovqw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0x00]
2036 ; X86-NEXT:    retl # encoding: [0xc3]
2037 ;
2038 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
2039 ; X64:       # %bb.0:
2040 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2041 ; X64-NEXT:    vpmovqw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x34,0x07]
2042 ; X64-NEXT:    vpmovqw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0x07]
2043 ; X64-NEXT:    retq # encoding: [0xc3]
2044     call void @llvm.x86.avx512.mask.pmov.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2045     call void @llvm.x86.avx512.mask.pmov.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2046     ret void
2047 }
2048
2049 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8)
2050
2051 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2052 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
2053 ; X86:       # %bb.0:
2054 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2055 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2056 ; X86-NEXT:    vpmovsqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc2]
2057 ; X86-NEXT:    vpmovsqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1]
2058 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2059 ; X86-NEXT:    vpmovsqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc0]
2060 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2061 ; X86-NEXT:    retl # encoding: [0xc3]
2062 ;
2063 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
2064 ; X64:       # %bb.0:
2065 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2066 ; X64-NEXT:    vpmovsqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc2]
2067 ; X64-NEXT:    vpmovsqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1]
2068 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2069 ; X64-NEXT:    vpmovsqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc0]
2070 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2071 ; X64-NEXT:    retq # encoding: [0xc3]
2072     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
2073     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
2074     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2075     %res3 = add <8 x i16> %res0, %res1
2076     %res4 = add <8 x i16> %res3, %res2
2077     ret <8 x i16> %res4
2078 }
2079
2080 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(ptr %ptr, <2 x i64>, i8)
2081
2082 define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2083 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
2084 ; X86:       # %bb.0:
2085 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2086 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2087 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2088 ; X86-NEXT:    vpmovsqw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x24,0x00]
2089 ; X86-NEXT:    vpmovsqw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0x00]
2090 ; X86-NEXT:    retl # encoding: [0xc3]
2091 ;
2092 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
2093 ; X64:       # %bb.0:
2094 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2095 ; X64-NEXT:    vpmovsqw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07]
2096 ; X64-NEXT:    vpmovsqw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0x07]
2097 ; X64-NEXT:    retq # encoding: [0xc3]
2098     call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2099     call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2100     ret void
2101 }
2102
2103 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8)
2104
2105 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2106 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
2107 ; X86:       # %bb.0:
2108 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2109 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2110 ; X86-NEXT:    vpmovusqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc2]
2111 ; X86-NEXT:    vpmovusqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1]
2112 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2113 ; X86-NEXT:    vpmovusqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc0]
2114 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2115 ; X86-NEXT:    retl # encoding: [0xc3]
2116 ;
2117 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
2118 ; X64:       # %bb.0:
2119 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2120 ; X64-NEXT:    vpmovusqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc2]
2121 ; X64-NEXT:    vpmovusqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1]
2122 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2123 ; X64-NEXT:    vpmovusqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc0]
2124 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2125 ; X64-NEXT:    retq # encoding: [0xc3]
2126     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
2127     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
2128     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2129     %res3 = add <8 x i16> %res0, %res1
2130     %res4 = add <8 x i16> %res3, %res2
2131     ret <8 x i16> %res4
2132 }
2133
2134 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(ptr %ptr, <2 x i64>, i8)
2135
2136 define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2137 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
2138 ; X86:       # %bb.0:
2139 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2140 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2141 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2142 ; X86-NEXT:    vpmovusqw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x14,0x00]
2143 ; X86-NEXT:    vpmovusqw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0x00]
2144 ; X86-NEXT:    retl # encoding: [0xc3]
2145 ;
2146 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
2147 ; X64:       # %bb.0:
2148 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2149 ; X64-NEXT:    vpmovusqw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07]
2150 ; X64-NEXT:    vpmovusqw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0x07]
2151 ; X64-NEXT:    retq # encoding: [0xc3]
2152     call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2153     call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2154     ret void
2155 }
2156
2157 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8)
2158
2159 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2160 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
2161 ; X86:       # %bb.0:
2162 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2163 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2164 ; X86-NEXT:    vpmovqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc2]
2165 ; X86-NEXT:    vpmovqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1]
2166 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2167 ; X86-NEXT:    vpmovqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc0]
2168 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2169 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2170 ; X86-NEXT:    retl # encoding: [0xc3]
2171 ;
2172 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
2173 ; X64:       # %bb.0:
2174 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2175 ; X64-NEXT:    vpmovqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc2]
2176 ; X64-NEXT:    vpmovqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1]
2177 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2178 ; X64-NEXT:    vpmovqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc0]
2179 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2180 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2181 ; X64-NEXT:    retq # encoding: [0xc3]
2182     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
2183     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
2184     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2185     %res3 = add <8 x i16> %res0, %res1
2186     %res4 = add <8 x i16> %res3, %res2
2187     ret <8 x i16> %res4
2188 }
2189
2190 declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(ptr %ptr, <4 x i64>, i8)
2191
2192 define void @test_int_x86_avx512_mask_pmov_qw_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2193 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
2194 ; X86:       # %bb.0:
2195 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2196 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2197 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2198 ; X86-NEXT:    vpmovqw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x34,0x00]
2199 ; X86-NEXT:    vpmovqw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0x00]
2200 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2201 ; X86-NEXT:    retl # encoding: [0xc3]
2202 ;
2203 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
2204 ; X64:       # %bb.0:
2205 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2206 ; X64-NEXT:    vpmovqw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x34,0x07]
2207 ; X64-NEXT:    vpmovqw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0x07]
2208 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2209 ; X64-NEXT:    retq # encoding: [0xc3]
2210     call void @llvm.x86.avx512.mask.pmov.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2211     call void @llvm.x86.avx512.mask.pmov.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2212     ret void
2213 }
2214
2215 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8)
2216
2217 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2218 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
2219 ; X86:       # %bb.0:
2220 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2221 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2222 ; X86-NEXT:    vpmovsqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc2]
2223 ; X86-NEXT:    vpmovsqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1]
2224 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2225 ; X86-NEXT:    vpmovsqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc0]
2226 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2227 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2228 ; X86-NEXT:    retl # encoding: [0xc3]
2229 ;
2230 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
2231 ; X64:       # %bb.0:
2232 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2233 ; X64-NEXT:    vpmovsqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc2]
2234 ; X64-NEXT:    vpmovsqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1]
2235 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2236 ; X64-NEXT:    vpmovsqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc0]
2237 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2238 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2239 ; X64-NEXT:    retq # encoding: [0xc3]
2240     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
2241     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
2242     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2243     %res3 = add <8 x i16> %res0, %res1
2244     %res4 = add <8 x i16> %res3, %res2
2245     ret <8 x i16> %res4
2246 }
2247
2248 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(ptr %ptr, <4 x i64>, i8)
2249
2250 define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2251 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
2252 ; X86:       # %bb.0:
2253 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2254 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2255 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2256 ; X86-NEXT:    vpmovsqw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x24,0x00]
2257 ; X86-NEXT:    vpmovsqw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0x00]
2258 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2259 ; X86-NEXT:    retl # encoding: [0xc3]
2260 ;
2261 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
2262 ; X64:       # %bb.0:
2263 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2264 ; X64-NEXT:    vpmovsqw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07]
2265 ; X64-NEXT:    vpmovsqw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0x07]
2266 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2267 ; X64-NEXT:    retq # encoding: [0xc3]
2268     call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2269     call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2270     ret void
2271 }
2272
2273 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8)
2274
2275 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2276 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
2277 ; X86:       # %bb.0:
2278 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2279 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2280 ; X86-NEXT:    vpmovusqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc2]
2281 ; X86-NEXT:    vpmovusqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1]
2282 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2283 ; X86-NEXT:    vpmovusqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc0]
2284 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2285 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2286 ; X86-NEXT:    retl # encoding: [0xc3]
2287 ;
2288 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
2289 ; X64:       # %bb.0:
2290 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2291 ; X64-NEXT:    vpmovusqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc2]
2292 ; X64-NEXT:    vpmovusqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1]
2293 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2294 ; X64-NEXT:    vpmovusqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc0]
2295 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2296 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2297 ; X64-NEXT:    retq # encoding: [0xc3]
2298     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
2299     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
2300     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2301     %res3 = add <8 x i16> %res0, %res1
2302     %res4 = add <8 x i16> %res3, %res2
2303     ret <8 x i16> %res4
2304 }
2305
2306 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(ptr %ptr, <4 x i64>, i8)
2307
2308 define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2309 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
2310 ; X86:       # %bb.0:
2311 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2312 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2313 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2314 ; X86-NEXT:    vpmovusqw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x14,0x00]
2315 ; X86-NEXT:    vpmovusqw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0x00]
2316 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2317 ; X86-NEXT:    retl # encoding: [0xc3]
2318 ;
2319 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
2320 ; X64:       # %bb.0:
2321 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2322 ; X64-NEXT:    vpmovusqw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07]
2323 ; X64-NEXT:    vpmovusqw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0x07]
2324 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2325 ; X64-NEXT:    retq # encoding: [0xc3]
2326     call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2327     call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2328     ret void
2329 }
2330
2331 declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8)
2332
2333 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2334 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
2335 ; X86:       # %bb.0:
2336 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2337 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2338 ; X86-NEXT:    vpmovqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc2]
2339 ; X86-NEXT:    vpmovqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1]
2340 ; X86-NEXT:    vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2341 ; X86-NEXT:    vpmovqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc0]
2342 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2343 ; X86-NEXT:    retl # encoding: [0xc3]
2344 ;
2345 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
2346 ; X64:       # %bb.0:
2347 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2348 ; X64-NEXT:    vpmovqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc2]
2349 ; X64-NEXT:    vpmovqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1]
2350 ; X64-NEXT:    vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2351 ; X64-NEXT:    vpmovqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc0]
2352 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2353 ; X64-NEXT:    retq # encoding: [0xc3]
2354     %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
2355     %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
2356     %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2357     %res3 = add <4 x i32> %res0, %res1
2358     %res4 = add <4 x i32> %res3, %res2
2359     ret <4 x i32> %res4
2360 }
2361
2362 declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(ptr %ptr, <2 x i64>, i8)
2363
2364 define void @test_int_x86_avx512_mask_pmov_qd_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2365 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
2366 ; X86:       # %bb.0:
2367 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2368 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2369 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2370 ; X86-NEXT:    vpmovqd %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x35,0x00]
2371 ; X86-NEXT:    vpmovqd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0x00]
2372 ; X86-NEXT:    retl # encoding: [0xc3]
2373 ;
2374 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
2375 ; X64:       # %bb.0:
2376 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2377 ; X64-NEXT:    vpmovqd %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x35,0x07]
2378 ; X64-NEXT:    vpmovqd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0x07]
2379 ; X64-NEXT:    retq # encoding: [0xc3]
2380     call void @llvm.x86.avx512.mask.pmov.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2381     call void @llvm.x86.avx512.mask.pmov.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2382     ret void
2383 }
2384
2385 declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8)
2386
2387 define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2388 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
2389 ; X86:       # %bb.0:
2390 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2391 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2392 ; X86-NEXT:    vpmovsqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc2]
2393 ; X86-NEXT:    vpmovsqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1]
2394 ; X86-NEXT:    vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2395 ; X86-NEXT:    vpmovsqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc0]
2396 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2397 ; X86-NEXT:    retl # encoding: [0xc3]
2398 ;
2399 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
2400 ; X64:       # %bb.0:
2401 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2402 ; X64-NEXT:    vpmovsqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc2]
2403 ; X64-NEXT:    vpmovsqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1]
2404 ; X64-NEXT:    vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2405 ; X64-NEXT:    vpmovsqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc0]
2406 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2407 ; X64-NEXT:    retq # encoding: [0xc3]
2408     %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
2409     %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
2410     %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2411     %res3 = add <4 x i32> %res0, %res1
2412     %res4 = add <4 x i32> %res3, %res2
2413     ret <4 x i32> %res4
2414 }
2415
2416 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(ptr %ptr, <2 x i64>, i8)
2417
2418 define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2419 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
2420 ; X86:       # %bb.0:
2421 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2422 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2423 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2424 ; X86-NEXT:    vpmovsqd %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x25,0x00]
2425 ; X86-NEXT:    vpmovsqd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0x00]
2426 ; X86-NEXT:    retl # encoding: [0xc3]
2427 ;
2428 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
2429 ; X64:       # %bb.0:
2430 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2431 ; X64-NEXT:    vpmovsqd %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07]
2432 ; X64-NEXT:    vpmovsqd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0x07]
2433 ; X64-NEXT:    retq # encoding: [0xc3]
2434     call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2435     call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2436     ret void
2437 }
2438
2439 declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8)
2440
2441 define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2442 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
2443 ; X86:       # %bb.0:
2444 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2445 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2446 ; X86-NEXT:    vpmovusqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc2]
2447 ; X86-NEXT:    vpmovusqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1]
2448 ; X86-NEXT:    vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2449 ; X86-NEXT:    vpmovusqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc0]
2450 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2451 ; X86-NEXT:    retl # encoding: [0xc3]
2452 ;
2453 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
2454 ; X64:       # %bb.0:
2455 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2456 ; X64-NEXT:    vpmovusqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc2]
2457 ; X64-NEXT:    vpmovusqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1]
2458 ; X64-NEXT:    vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2459 ; X64-NEXT:    vpmovusqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc0]
2460 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2461 ; X64-NEXT:    retq # encoding: [0xc3]
2462     %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
2463     %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
2464     %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2465     %res3 = add <4 x i32> %res0, %res1
2466     %res4 = add <4 x i32> %res3, %res2
2467     ret <4 x i32> %res4
2468 }
2469
2470 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(ptr %ptr, <2 x i64>, i8)
2471
2472 define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2473 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
2474 ; X86:       # %bb.0:
2475 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2476 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2477 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2478 ; X86-NEXT:    vpmovusqd %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x15,0x00]
2479 ; X86-NEXT:    vpmovusqd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0x00]
2480 ; X86-NEXT:    retl # encoding: [0xc3]
2481 ;
2482 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
2483 ; X64:       # %bb.0:
2484 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2485 ; X64-NEXT:    vpmovusqd %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07]
2486 ; X64-NEXT:    vpmovusqd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0x07]
2487 ; X64-NEXT:    retq # encoding: [0xc3]
2488     call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2489     call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2490     ret void
2491 }
2492
2493 define <4 x i32>@test_int_x86_avx512_pmov_qd_256(<4 x i64> %x0) {
2494 ; CHECK-LABEL: test_int_x86_avx512_pmov_qd_256:
2495 ; CHECK:       # %bb.0:
2496 ; CHECK-NEXT:    vpmovqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc0]
2497 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2498 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2499   %1 = trunc <4 x i64> %x0 to <4 x i32>
2500   ret <4 x i32> %1
2501 }
2502
2503 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2504 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
2505 ; X86:       # %bb.0:
2506 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2507 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2508 ; X86-NEXT:    vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
2509 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2510 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2511 ; X86-NEXT:    retl # encoding: [0xc3]
2512 ;
2513 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
2514 ; X64:       # %bb.0:
2515 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2516 ; X64-NEXT:    vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
2517 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2518 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2519 ; X64-NEXT:    retq # encoding: [0xc3]
2520   %1 = trunc <4 x i64> %x0 to <4 x i32>
2521   %2 = bitcast i8 %x2 to <8 x i1>
2522   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2523   %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1
2524   ret <4 x i32> %3
2525 }
2526
2527 define <4 x i32>@test_int_x86_avx512_maskz_pmov_qd_256(<4 x i64> %x0, i8 %x2) {
2528 ; X86-LABEL: test_int_x86_avx512_maskz_pmov_qd_256:
2529 ; X86:       # %bb.0:
2530 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2531 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2532 ; X86-NEXT:    vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
2533 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2534 ; X86-NEXT:    retl # encoding: [0xc3]
2535 ;
2536 ; X64-LABEL: test_int_x86_avx512_maskz_pmov_qd_256:
2537 ; X64:       # %bb.0:
2538 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2539 ; X64-NEXT:    vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
2540 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2541 ; X64-NEXT:    retq # encoding: [0xc3]
2542   %1 = trunc <4 x i64> %x0 to <4 x i32>
2543   %2 = bitcast i8 %x2 to <8 x i1>
2544   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2545   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
2546   ret <4 x i32> %3
2547 }
2548
2549 declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(ptr %ptr, <4 x i64>, i8)
2550
2551 define void @test_int_x86_avx512_mask_pmov_qd_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2552 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
2553 ; X86:       # %bb.0:
2554 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2555 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2556 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2557 ; X86-NEXT:    vpmovqd %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x35,0x00]
2558 ; X86-NEXT:    vpmovqd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0x00]
2559 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2560 ; X86-NEXT:    retl # encoding: [0xc3]
2561 ;
2562 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
2563 ; X64:       # %bb.0:
2564 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2565 ; X64-NEXT:    vpmovqd %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x35,0x07]
2566 ; X64-NEXT:    vpmovqd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0x07]
2567 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2568 ; X64-NEXT:    retq # encoding: [0xc3]
2569     call void @llvm.x86.avx512.mask.pmov.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2570     call void @llvm.x86.avx512.mask.pmov.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2571     ret void
2572 }
2573
2574 declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8)
2575
2576 define <4 x i32>@test_int_x86_avx512_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1) {
2577 ; CHECK-LABEL: test_int_x86_avx512_pmovs_qd_256:
2578 ; CHECK:       # %bb.0:
2579 ; CHECK-NEXT:    vpmovsqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x25,0xc0]
2580 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2581 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2582   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
2583   ret <4 x i32> %res
2584 }
2585
2586 define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2587 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
2588 ; X86:       # %bb.0:
2589 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2590 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2591 ; X86-NEXT:    vpmovsqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0xc1]
2592 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2593 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2594 ; X86-NEXT:    retl # encoding: [0xc3]
2595 ;
2596 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
2597 ; X64:       # %bb.0:
2598 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2599 ; X64-NEXT:    vpmovsqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0xc1]
2600 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2601 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2602 ; X64-NEXT:    retq # encoding: [0xc3]
2603   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
2604   ret <4 x i32> %res
2605 }
2606
2607 define <4 x i32>@test_int_x86_avx512_maskz_pmovs_qd_256(<4 x i64> %x0, i8 %x2) {
2608 ; X86-LABEL: test_int_x86_avx512_maskz_pmovs_qd_256:
2609 ; X86:       # %bb.0:
2610 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2611 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2612 ; X86-NEXT:    vpmovsqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x25,0xc0]
2613 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2614 ; X86-NEXT:    retl # encoding: [0xc3]
2615 ;
2616 ; X64-LABEL: test_int_x86_avx512_maskz_pmovs_qd_256:
2617 ; X64:       # %bb.0:
2618 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2619 ; X64-NEXT:    vpmovsqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x25,0xc0]
2620 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2621 ; X64-NEXT:    retq # encoding: [0xc3]
2622   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2623   ret <4 x i32> %res
2624 }
2625
2626 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(ptr %ptr, <4 x i64>, i8)
2627
2628 define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2629 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
2630 ; X86:       # %bb.0:
2631 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2632 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2633 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2634 ; X86-NEXT:    vpmovsqd %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x25,0x00]
2635 ; X86-NEXT:    vpmovsqd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0x00]
2636 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2637 ; X86-NEXT:    retl # encoding: [0xc3]
2638 ;
2639 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
2640 ; X64:       # %bb.0:
2641 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2642 ; X64-NEXT:    vpmovsqd %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07]
2643 ; X64-NEXT:    vpmovsqd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0x07]
2644 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2645 ; X64-NEXT:    retq # encoding: [0xc3]
2646     call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2647     call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2648     ret void
2649 }
2650
2651 declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8)
2652
2653 define <4 x i32>@test_int_x86_avx512_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1) {
2654 ; CHECK-LABEL: test_int_x86_avx512_pmovus_qd_256:
2655 ; CHECK:       # %bb.0:
2656 ; CHECK-NEXT:    vpmovusqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x15,0xc0]
2657 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2658 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2659   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
2660   ret <4 x i32> %res
2661 }
2662
2663 define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2664 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
2665 ; X86:       # %bb.0:
2666 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2667 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2668 ; X86-NEXT:    vpmovusqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0xc1]
2669 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2670 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2671 ; X86-NEXT:    retl # encoding: [0xc3]
2672 ;
2673 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
2674 ; X64:       # %bb.0:
2675 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2676 ; X64-NEXT:    vpmovusqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0xc1]
2677 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2678 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2679 ; X64-NEXT:    retq # encoding: [0xc3]
2680   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
2681   ret <4 x i32> %res
2682 }
2683
2684 define <4 x i32>@test_int_x86_avx512_maskz_pmovus_qd_256(<4 x i64> %x0, i8 %x2) {
2685 ; X86-LABEL: test_int_x86_avx512_maskz_pmovus_qd_256:
2686 ; X86:       # %bb.0:
2687 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2688 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2689 ; X86-NEXT:    vpmovusqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x15,0xc0]
2690 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2691 ; X86-NEXT:    retl # encoding: [0xc3]
2692 ;
2693 ; X64-LABEL: test_int_x86_avx512_maskz_pmovus_qd_256:
2694 ; X64:       # %bb.0:
2695 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2696 ; X64-NEXT:    vpmovusqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x15,0xc0]
2697 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2698 ; X64-NEXT:    retq # encoding: [0xc3]
2699   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2700   ret <4 x i32> %res
2701 }
2702
2703 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(ptr %ptr, <4 x i64>, i8)
2704
2705 define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2706 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
2707 ; X86:       # %bb.0:
2708 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2709 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2710 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2711 ; X86-NEXT:    vpmovusqd %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x15,0x00]
2712 ; X86-NEXT:    vpmovusqd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0x00]
2713 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2714 ; X86-NEXT:    retl # encoding: [0xc3]
2715 ;
2716 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
2717 ; X64:       # %bb.0:
2718 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2719 ; X64-NEXT:    vpmovusqd %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07]
2720 ; X64-NEXT:    vpmovusqd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0x07]
2721 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2722 ; X64-NEXT:    retq # encoding: [0xc3]
2723     call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2724     call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2725     ret void
2726 }
2727
2728 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8)
2729
2730 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2731 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_128:
2732 ; X86:       # %bb.0:
2733 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2734 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2735 ; X86-NEXT:    vpmovdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc2]
2736 ; X86-NEXT:    vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
2737 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2738 ; X86-NEXT:    vpmovdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc0]
2739 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2740 ; X86-NEXT:    retl # encoding: [0xc3]
2741 ;
2742 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_128:
2743 ; X64:       # %bb.0:
2744 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2745 ; X64-NEXT:    vpmovdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc2]
2746 ; X64-NEXT:    vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
2747 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2748 ; X64-NEXT:    vpmovdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc0]
2749 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2750 ; X64-NEXT:    retq # encoding: [0xc3]
2751     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
2752     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
2753     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2754     %res3 = add <16 x i8> %res0, %res1
2755     %res4 = add <16 x i8> %res3, %res2
2756     ret <16 x i8> %res4
2757 }
2758
2759 declare void @llvm.x86.avx512.mask.pmov.db.mem.128(ptr %ptr, <4 x i32>, i8)
2760
2761 define void @test_int_x86_avx512_mask_pmov_db_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
2762 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
2763 ; X86:       # %bb.0:
2764 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2765 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2766 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2767 ; X86-NEXT:    vpmovdb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x31,0x00]
2768 ; X86-NEXT:    vpmovdb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0x00]
2769 ; X86-NEXT:    retl # encoding: [0xc3]
2770 ;
2771 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
2772 ; X64:       # %bb.0:
2773 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2774 ; X64-NEXT:    vpmovdb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x31,0x07]
2775 ; X64-NEXT:    vpmovdb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0x07]
2776 ; X64-NEXT:    retq # encoding: [0xc3]
2777     call void @llvm.x86.avx512.mask.pmov.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
2778     call void @llvm.x86.avx512.mask.pmov.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
2779     ret void
2780 }
2781
2782 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8)
2783
2784 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2785 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
2786 ; X86:       # %bb.0:
2787 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2788 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2789 ; X86-NEXT:    vpmovsdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc2]
2790 ; X86-NEXT:    vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
2791 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2792 ; X86-NEXT:    vpmovsdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc0]
2793 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2794 ; X86-NEXT:    retl # encoding: [0xc3]
2795 ;
2796 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
2797 ; X64:       # %bb.0:
2798 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2799 ; X64-NEXT:    vpmovsdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc2]
2800 ; X64-NEXT:    vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
2801 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2802 ; X64-NEXT:    vpmovsdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc0]
2803 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2804 ; X64-NEXT:    retq # encoding: [0xc3]
2805     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
2806     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
2807     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2808     %res3 = add <16 x i8> %res0, %res1
2809     %res4 = add <16 x i8> %res3, %res2
2810     ret <16 x i8> %res4
2811 }
2812
2813 declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(ptr %ptr, <4 x i32>, i8)
2814
2815 define void @test_int_x86_avx512_mask_pmovs_db_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
2816 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
2817 ; X86:       # %bb.0:
2818 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2819 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2820 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2821 ; X86-NEXT:    vpmovsdb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x21,0x00]
2822 ; X86-NEXT:    vpmovsdb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0x00]
2823 ; X86-NEXT:    retl # encoding: [0xc3]
2824 ;
2825 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
2826 ; X64:       # %bb.0:
2827 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2828 ; X64-NEXT:    vpmovsdb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07]
2829 ; X64-NEXT:    vpmovsdb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0x07]
2830 ; X64-NEXT:    retq # encoding: [0xc3]
2831     call void @llvm.x86.avx512.mask.pmovs.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
2832     call void @llvm.x86.avx512.mask.pmovs.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
2833     ret void
2834 }
2835
2836 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8)
2837
2838 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2839 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
2840 ; X86:       # %bb.0:
2841 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2842 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2843 ; X86-NEXT:    vpmovusdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc2]
2844 ; X86-NEXT:    vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
2845 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2846 ; X86-NEXT:    vpmovusdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc0]
2847 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2848 ; X86-NEXT:    retl # encoding: [0xc3]
2849 ;
2850 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
2851 ; X64:       # %bb.0:
2852 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2853 ; X64-NEXT:    vpmovusdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc2]
2854 ; X64-NEXT:    vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
2855 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2856 ; X64-NEXT:    vpmovusdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc0]
2857 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2858 ; X64-NEXT:    retq # encoding: [0xc3]
2859     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
2860     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
2861     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2862     %res3 = add <16 x i8> %res0, %res1
2863     %res4 = add <16 x i8> %res3, %res2
2864     ret <16 x i8> %res4
2865 }
2866
2867 declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(ptr %ptr, <4 x i32>, i8)
2868
2869 define void @test_int_x86_avx512_mask_pmovus_db_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
2870 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
2871 ; X86:       # %bb.0:
2872 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2873 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2874 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2875 ; X86-NEXT:    vpmovusdb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x11,0x00]
2876 ; X86-NEXT:    vpmovusdb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0x00]
2877 ; X86-NEXT:    retl # encoding: [0xc3]
2878 ;
2879 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
2880 ; X64:       # %bb.0:
2881 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2882 ; X64-NEXT:    vpmovusdb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07]
2883 ; X64-NEXT:    vpmovusdb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0x07]
2884 ; X64-NEXT:    retq # encoding: [0xc3]
2885     call void @llvm.x86.avx512.mask.pmovus.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
2886     call void @llvm.x86.avx512.mask.pmovus.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
2887     ret void
2888 }
2889
2890 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8)
2891
2892 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2893 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_256:
2894 ; X86:       # %bb.0:
2895 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2896 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2897 ; X86-NEXT:    vpmovdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc2]
2898 ; X86-NEXT:    vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
2899 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2900 ; X86-NEXT:    vpmovdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc0]
2901 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2902 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2903 ; X86-NEXT:    retl # encoding: [0xc3]
2904 ;
2905 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_256:
2906 ; X64:       # %bb.0:
2907 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2908 ; X64-NEXT:    vpmovdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc2]
2909 ; X64-NEXT:    vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
2910 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2911 ; X64-NEXT:    vpmovdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc0]
2912 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2913 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2914 ; X64-NEXT:    retq # encoding: [0xc3]
2915     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
2916     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
2917     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2918     %res3 = add <16 x i8> %res0, %res1
2919     %res4 = add <16 x i8> %res3, %res2
2920     ret <16 x i8> %res4
2921 }
2922
2923 declare void @llvm.x86.avx512.mask.pmov.db.mem.256(ptr %ptr, <8 x i32>, i8)
2924
2925 define void @test_int_x86_avx512_mask_pmov_db_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
2926 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
2927 ; X86:       # %bb.0:
2928 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2929 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2930 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2931 ; X86-NEXT:    vpmovdb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x31,0x00]
2932 ; X86-NEXT:    vpmovdb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0x00]
2933 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2934 ; X86-NEXT:    retl # encoding: [0xc3]
2935 ;
2936 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
2937 ; X64:       # %bb.0:
2938 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2939 ; X64-NEXT:    vpmovdb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x31,0x07]
2940 ; X64-NEXT:    vpmovdb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0x07]
2941 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2942 ; X64-NEXT:    retq # encoding: [0xc3]
2943     call void @llvm.x86.avx512.mask.pmov.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
2944     call void @llvm.x86.avx512.mask.pmov.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
2945     ret void
2946 }
2947
2948 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8)
2949
2950 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2951 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
2952 ; X86:       # %bb.0:
2953 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2954 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2955 ; X86-NEXT:    vpmovsdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc2]
2956 ; X86-NEXT:    vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
2957 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2958 ; X86-NEXT:    vpmovsdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc0]
2959 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2960 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2961 ; X86-NEXT:    retl # encoding: [0xc3]
2962 ;
2963 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
2964 ; X64:       # %bb.0:
2965 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2966 ; X64-NEXT:    vpmovsdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc2]
2967 ; X64-NEXT:    vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
2968 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2969 ; X64-NEXT:    vpmovsdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc0]
2970 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2971 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2972 ; X64-NEXT:    retq # encoding: [0xc3]
2973     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
2974     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
2975     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2976     %res3 = add <16 x i8> %res0, %res1
2977     %res4 = add <16 x i8> %res3, %res2
2978     ret <16 x i8> %res4
2979 }
2980
2981 declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(ptr %ptr, <8 x i32>, i8)
2982
2983 define void @test_int_x86_avx512_mask_pmovs_db_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
2984 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
2985 ; X86:       # %bb.0:
2986 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2987 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2988 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2989 ; X86-NEXT:    vpmovsdb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x21,0x00]
2990 ; X86-NEXT:    vpmovsdb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0x00]
2991 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2992 ; X86-NEXT:    retl # encoding: [0xc3]
2993 ;
2994 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
2995 ; X64:       # %bb.0:
2996 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2997 ; X64-NEXT:    vpmovsdb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07]
2998 ; X64-NEXT:    vpmovsdb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0x07]
2999 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3000 ; X64-NEXT:    retq # encoding: [0xc3]
3001     call void @llvm.x86.avx512.mask.pmovs.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
3002     call void @llvm.x86.avx512.mask.pmovs.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
3003     ret void
3004 }
3005
3006 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8)
3007
3008 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3009 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
3010 ; X86:       # %bb.0:
3011 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3012 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3013 ; X86-NEXT:    vpmovusdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc2]
3014 ; X86-NEXT:    vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
3015 ; X86-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
3016 ; X86-NEXT:    vpmovusdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc0]
3017 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
3018 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3019 ; X86-NEXT:    retl # encoding: [0xc3]
3020 ;
3021 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
3022 ; X64:       # %bb.0:
3023 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3024 ; X64-NEXT:    vpmovusdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc2]
3025 ; X64-NEXT:    vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
3026 ; X64-NEXT:    vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
3027 ; X64-NEXT:    vpmovusdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc0]
3028 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
3029 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3030 ; X64-NEXT:    retq # encoding: [0xc3]
3031     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
3032     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
3033     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3034     %res3 = add <16 x i8> %res0, %res1
3035     %res4 = add <16 x i8> %res3, %res2
3036     ret <16 x i8> %res4
3037 }
3038
3039 declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(ptr %ptr, <8 x i32>, i8)
3040
3041 define void @test_int_x86_avx512_mask_pmovus_db_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
3042 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
3043 ; X86:       # %bb.0:
3044 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3045 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3046 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3047 ; X86-NEXT:    vpmovusdb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x11,0x00]
3048 ; X86-NEXT:    vpmovusdb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0x00]
3049 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3050 ; X86-NEXT:    retl # encoding: [0xc3]
3051 ;
3052 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
3053 ; X64:       # %bb.0:
3054 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3055 ; X64-NEXT:    vpmovusdb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07]
3056 ; X64-NEXT:    vpmovusdb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0x07]
3057 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3058 ; X64-NEXT:    retq # encoding: [0xc3]
3059     call void @llvm.x86.avx512.mask.pmovus.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
3060     call void @llvm.x86.avx512.mask.pmovus.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
3061     ret void
3062 }
3063
3064 declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8)
3065
3066 define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3067 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
3068 ; X86:       # %bb.0:
3069 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3070 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3071 ; X86-NEXT:    vpmovdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc2]
3072 ; X86-NEXT:    vpmovdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1]
3073 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3074 ; X86-NEXT:    vpmovdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc0]
3075 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3076 ; X86-NEXT:    retl # encoding: [0xc3]
3077 ;
3078 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
3079 ; X64:       # %bb.0:
3080 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3081 ; X64-NEXT:    vpmovdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc2]
3082 ; X64-NEXT:    vpmovdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1]
3083 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3084 ; X64-NEXT:    vpmovdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc0]
3085 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3086 ; X64-NEXT:    retq # encoding: [0xc3]
3087     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3088     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3089     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3090     %res3 = add <8 x i16> %res0, %res1
3091     %res4 = add <8 x i16> %res3, %res2
3092     ret <8 x i16> %res4
3093 }
3094
3095 declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(ptr %ptr, <4 x i32>, i8)
3096
3097 define void @test_int_x86_avx512_mask_pmov_dw_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
3098 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
3099 ; X86:       # %bb.0:
3100 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3101 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3102 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3103 ; X86-NEXT:    vpmovdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x00]
3104 ; X86-NEXT:    vpmovdw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0x00]
3105 ; X86-NEXT:    retl # encoding: [0xc3]
3106 ;
3107 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
3108 ; X64:       # %bb.0:
3109 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3110 ; X64-NEXT:    vpmovdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x07]
3111 ; X64-NEXT:    vpmovdw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0x07]
3112 ; X64-NEXT:    retq # encoding: [0xc3]
3113     call void @llvm.x86.avx512.mask.pmov.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
3114     call void @llvm.x86.avx512.mask.pmov.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
3115     ret void
3116 }
3117
3118 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8)
3119
3120 define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3121 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
3122 ; X86:       # %bb.0:
3123 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3124 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3125 ; X86-NEXT:    vpmovsdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc2]
3126 ; X86-NEXT:    vpmovsdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1]
3127 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3128 ; X86-NEXT:    vpmovsdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc0]
3129 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3130 ; X86-NEXT:    retl # encoding: [0xc3]
3131 ;
3132 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
3133 ; X64:       # %bb.0:
3134 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3135 ; X64-NEXT:    vpmovsdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc2]
3136 ; X64-NEXT:    vpmovsdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1]
3137 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3138 ; X64-NEXT:    vpmovsdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc0]
3139 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3140 ; X64-NEXT:    retq # encoding: [0xc3]
3141     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3142     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3143     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3144     %res3 = add <8 x i16> %res0, %res1
3145     %res4 = add <8 x i16> %res3, %res2
3146     ret <8 x i16> %res4
3147 }
3148
3149 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(ptr %ptr, <4 x i32>, i8)
3150
3151 define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
3152 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
3153 ; X86:       # %bb.0:
3154 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3155 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3156 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3157 ; X86-NEXT:    vpmovsdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x23,0x00]
3158 ; X86-NEXT:    vpmovsdw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0x00]
3159 ; X86-NEXT:    retl # encoding: [0xc3]
3160 ;
3161 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
3162 ; X64:       # %bb.0:
3163 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3164 ; X64-NEXT:    vpmovsdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07]
3165 ; X64-NEXT:    vpmovsdw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0x07]
3166 ; X64-NEXT:    retq # encoding: [0xc3]
3167     call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
3168     call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
3169     ret void
3170 }
3171
3172 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8)
3173
3174 define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3175 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
3176 ; X86:       # %bb.0:
3177 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3178 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3179 ; X86-NEXT:    vpmovusdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc2]
3180 ; X86-NEXT:    vpmovusdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1]
3181 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3182 ; X86-NEXT:    vpmovusdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc0]
3183 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3184 ; X86-NEXT:    retl # encoding: [0xc3]
3185 ;
3186 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
3187 ; X64:       # %bb.0:
3188 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3189 ; X64-NEXT:    vpmovusdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc2]
3190 ; X64-NEXT:    vpmovusdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1]
3191 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3192 ; X64-NEXT:    vpmovusdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc0]
3193 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3194 ; X64-NEXT:    retq # encoding: [0xc3]
3195     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3196     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3197     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3198     %res3 = add <8 x i16> %res0, %res1
3199     %res4 = add <8 x i16> %res3, %res2
3200     ret <8 x i16> %res4
3201 }
3202
3203 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(ptr %ptr, <4 x i32>, i8)
3204
3205 define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
3206 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
3207 ; X86:       # %bb.0:
3208 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3209 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3210 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3211 ; X86-NEXT:    vpmovusdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x13,0x00]
3212 ; X86-NEXT:    vpmovusdw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0x00]
3213 ; X86-NEXT:    retl # encoding: [0xc3]
3214 ;
3215 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
3216 ; X64:       # %bb.0:
3217 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3218 ; X64-NEXT:    vpmovusdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07]
3219 ; X64-NEXT:    vpmovusdw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0x07]
3220 ; X64-NEXT:    retq # encoding: [0xc3]
3221     call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
3222     call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
3223     ret void
3224 }
3225
3226 declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
3227
3228 define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3229 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
3230 ; X86:       # %bb.0:
3231 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3232 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3233 ; X86-NEXT:    vpmovdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc2]
3234 ; X86-NEXT:    vpmovdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1]
3235 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3236 ; X86-NEXT:    vpmovdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc0]
3237 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3238 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3239 ; X86-NEXT:    retl # encoding: [0xc3]
3240 ;
3241 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
3242 ; X64:       # %bb.0:
3243 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3244 ; X64-NEXT:    vpmovdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc2]
3245 ; X64-NEXT:    vpmovdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1]
3246 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3247 ; X64-NEXT:    vpmovdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc0]
3248 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3249 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3250 ; X64-NEXT:    retq # encoding: [0xc3]
3251     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3252     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3253     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3254     %res3 = add <8 x i16> %res0, %res1
3255     %res4 = add <8 x i16> %res3, %res2
3256     ret <8 x i16> %res4
3257 }
3258
3259 declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(ptr %ptr, <8 x i32>, i8)
3260
3261 define void @test_int_x86_avx512_mask_pmov_dw_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
3262 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
3263 ; X86:       # %bb.0:
3264 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3265 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3266 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3267 ; X86-NEXT:    vpmovdw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x33,0x00]
3268 ; X86-NEXT:    vpmovdw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0x00]
3269 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3270 ; X86-NEXT:    retl # encoding: [0xc3]
3271 ;
3272 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
3273 ; X64:       # %bb.0:
3274 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3275 ; X64-NEXT:    vpmovdw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x33,0x07]
3276 ; X64-NEXT:    vpmovdw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0x07]
3277 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3278 ; X64-NEXT:    retq # encoding: [0xc3]
3279     call void @llvm.x86.avx512.mask.pmov.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
3280     call void @llvm.x86.avx512.mask.pmov.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
3281     ret void
3282 }
3283
3284 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8)
3285
3286 define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3287 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
3288 ; X86:       # %bb.0:
3289 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3290 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3291 ; X86-NEXT:    vpmovsdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc2]
3292 ; X86-NEXT:    vpmovsdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1]
3293 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3294 ; X86-NEXT:    vpmovsdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc0]
3295 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3296 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3297 ; X86-NEXT:    retl # encoding: [0xc3]
3298 ;
3299 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
3300 ; X64:       # %bb.0:
3301 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3302 ; X64-NEXT:    vpmovsdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc2]
3303 ; X64-NEXT:    vpmovsdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1]
3304 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3305 ; X64-NEXT:    vpmovsdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc0]
3306 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3307 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3308 ; X64-NEXT:    retq # encoding: [0xc3]
3309     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3310     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3311     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3312     %res3 = add <8 x i16> %res0, %res1
3313     %res4 = add <8 x i16> %res3, %res2
3314     ret <8 x i16> %res4
3315 }
3316
3317 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(ptr %ptr, <8 x i32>, i8)
3318
3319 define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
3320 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
3321 ; X86:       # %bb.0:
3322 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3323 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3324 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3325 ; X86-NEXT:    vpmovsdw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x23,0x00]
3326 ; X86-NEXT:    vpmovsdw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0x00]
3327 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3328 ; X86-NEXT:    retl # encoding: [0xc3]
3329 ;
3330 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
3331 ; X64:       # %bb.0:
3332 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3333 ; X64-NEXT:    vpmovsdw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07]
3334 ; X64-NEXT:    vpmovsdw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0x07]
3335 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3336 ; X64-NEXT:    retq # encoding: [0xc3]
3337     call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
3338     call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
3339     ret void
3340 }
3341
3342 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8)
3343
3344 define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3345 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
3346 ; X86:       # %bb.0:
3347 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3348 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3349 ; X86-NEXT:    vpmovusdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc2]
3350 ; X86-NEXT:    vpmovusdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1]
3351 ; X86-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3352 ; X86-NEXT:    vpmovusdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc0]
3353 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3354 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3355 ; X86-NEXT:    retl # encoding: [0xc3]
3356 ;
3357 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
3358 ; X64:       # %bb.0:
3359 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3360 ; X64-NEXT:    vpmovusdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc2]
3361 ; X64-NEXT:    vpmovusdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1]
3362 ; X64-NEXT:    vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3363 ; X64-NEXT:    vpmovusdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc0]
3364 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3365 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3366 ; X64-NEXT:    retq # encoding: [0xc3]
3367     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3368     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3369     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3370     %res3 = add <8 x i16> %res0, %res1
3371     %res4 = add <8 x i16> %res3, %res2
3372     ret <8 x i16> %res4
3373 }
3374
3375 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(ptr %ptr, <8 x i32>, i8)
3376
3377 define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
3378 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
3379 ; X86:       # %bb.0:
3380 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3381 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3382 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3383 ; X86-NEXT:    vpmovusdw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x13,0x00]
3384 ; X86-NEXT:    vpmovusdw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0x00]
3385 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3386 ; X86-NEXT:    retl # encoding: [0xc3]
3387 ;
3388 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
3389 ; X64:       # %bb.0:
3390 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3391 ; X64-NEXT:    vpmovusdw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07]
3392 ; X64-NEXT:    vpmovusdw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0x07]
3393 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3394 ; X64-NEXT:    retq # encoding: [0xc3]
3395     call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
3396     call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
3397     ret void
3398 }
3399
3400 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
3401
3402 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3403 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
3404 ; X86:       # %bb.0:
3405 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3406 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3407 ; X86-NEXT:    vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3408 ; X86-NEXT:    vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
3409 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3410 ; X86-NEXT:    retl # encoding: [0xc3]
3411 ;
3412 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
3413 ; X64:       # %bb.0:
3414 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3415 ; X64-NEXT:    vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3416 ; X64-NEXT:    vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
3417 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3418 ; X64-NEXT:    retq # encoding: [0xc3]
3419   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3420   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3421   %res2 = add <4 x i32> %res, %res1
3422   ret <4 x i32> %res2
3423 }
3424
3425 define <4 x i32>@test_int_x86_avx512_cvt_pd2dq_128_zext(<2 x double> %x0, <4 x i32> %x1) {
3426 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2dq_128_zext:
3427 ; CHECK:       # %bb.0:
3428 ; CHECK-NEXT:    vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
3429 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3430   %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3431   %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3432   ret <4 x i32> %res3
3433 }
3434
3435 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3436 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128_zext:
3437 ; X86:       # %bb.0:
3438 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3439 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3440 ; X86-NEXT:    vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3441 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3442 ; X86-NEXT:    retl # encoding: [0xc3]
3443 ;
3444 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128_zext:
3445 ; X64:       # %bb.0:
3446 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3447 ; X64-NEXT:    vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3448 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3449 ; X64-NEXT:    retq # encoding: [0xc3]
3450   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3451   %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3452   ret <4 x i32> %res1
3453 }
3454
3455 declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8)
3456
3457 define <4 x float>@test_int_x86_avx512_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1) {
3458 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2ps:
3459 ; CHECK:       # %bb.0:
3460 ; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
3461 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3462   %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1)
3463   ret <4 x float> %res
3464 }
3465
3466 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
3467 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
3468 ; X86:       # %bb.0:
3469 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3470 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3471 ; X86-NEXT:    vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3472 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3473 ; X86-NEXT:    retl # encoding: [0xc3]
3474 ;
3475 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
3476 ; X64:       # %bb.0:
3477 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3478 ; X64-NEXT:    vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3479 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3480 ; X64-NEXT:    retq # encoding: [0xc3]
3481   %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2)
3482   ret <4 x float> %res
3483 }
3484
3485 define <4 x float>@test_int_x86_avx512_cvt_pd2ps_zext(<2 x double> %x0, <4 x float> %x1) {
3486 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2ps_zext:
3487 ; CHECK:       # %bb.0:
3488 ; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
3489 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3490   %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1)
3491   %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3492   ret <4 x float> %res3
3493 }
3494
3495 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_zext(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
3496 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_zext:
3497 ; X86:       # %bb.0:
3498 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3499 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3500 ; X86-NEXT:    vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3501 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3502 ; X86-NEXT:    retl # encoding: [0xc3]
3503 ;
3504 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_zext:
3505 ; X64:       # %bb.0:
3506 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3507 ; X64-NEXT:    vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3508 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3509 ; X64-NEXT:    retq # encoding: [0xc3]
3510   %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2)
3511   %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3512   ret <4 x float> %res1
3513 }
3514
3515 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i8)
3516
3517 define <4 x i32>@test_int_x86_avx512_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1) {
3518 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2udq_128:
3519 ; CHECK:       # %bb.0:
3520 ; CHECK-NEXT:    vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
3521 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3522   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3523   ret <4 x i32> %res
3524 }
3525
3526 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3527 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
3528 ; X86:       # %bb.0:
3529 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3530 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3531 ; X86-NEXT:    vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3532 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3533 ; X86-NEXT:    retl # encoding: [0xc3]
3534 ;
3535 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
3536 ; X64:       # %bb.0:
3537 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3538 ; X64-NEXT:    vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3539 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3540 ; X64-NEXT:    retq # encoding: [0xc3]
3541   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3542   ret <4 x i32> %res
3543 }
3544
3545 define <4 x i32>@test_int_x86_avx512_cvt_pd2udq_128_zext(<2 x double> %x0, <4 x i32> %x1) {
3546 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2udq_128_zext:
3547 ; CHECK:       # %bb.0:
3548 ; CHECK-NEXT:    vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
3549 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3550   %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3551   %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3552   ret <4 x i32> %res3
3553 }
3554
3555 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3556 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128_zext:
3557 ; X86:       # %bb.0:
3558 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3559 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3560 ; X86-NEXT:    vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3561 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3562 ; X86-NEXT:    retl # encoding: [0xc3]
3563 ;
3564 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128_zext:
3565 ; X64:       # %bb.0:
3566 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3567 ; X64-NEXT:    vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3568 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3569 ; X64-NEXT:    retq # encoding: [0xc3]
3570   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3571   %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3572   ret <4 x i32> %res1
3573 }
3574
3575 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i8)
3576
3577 define <4 x i32>@test_int_x86_avx512_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1) {
3578 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2udq_256:
3579 ; CHECK:       # %bb.0:
3580 ; CHECK-NEXT:    vcvtpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x79,0xc0]
3581 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3582 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3583   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
3584   ret <4 x i32> %res
3585 }
3586
3587 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
3588 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
3589 ; X86:       # %bb.0:
3590 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3591 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3592 ; X86-NEXT:    vcvtpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x79,0xc8]
3593 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3594 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3595 ; X86-NEXT:    retl # encoding: [0xc3]
3596 ;
3597 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
3598 ; X64:       # %bb.0:
3599 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3600 ; X64-NEXT:    vcvtpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x79,0xc8]
3601 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3602 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3603 ; X64-NEXT:    retq # encoding: [0xc3]
3604   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
3605   ret <4 x i32> %res
3606 }
3607
3608 declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8)
3609
3610 define <4 x i32>@test_int_x86_avx512_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1) {
3611 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2dq_128:
3612 ; CHECK:       # %bb.0:
3613 ; CHECK-NEXT:    vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
3614 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3615   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
3616   ret <4 x i32> %res
3617 }
3618
3619 define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
3620 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
3621 ; X86:       # %bb.0:
3622 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3623 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3624 ; X86-NEXT:    vcvtps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x5b,0xc8]
3625 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3626 ; X86-NEXT:    retl # encoding: [0xc3]
3627 ;
3628 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
3629 ; X64:       # %bb.0:
3630 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3631 ; X64-NEXT:    vcvtps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x5b,0xc8]
3632 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3633 ; X64-NEXT:    retq # encoding: [0xc3]
3634   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
3635   ret <4 x i32> %res
3636 }
3637
3638 declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8)
3639
3640 define <8 x i32>@test_int_x86_avx512_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1) {
3641 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2dq_256:
3642 ; CHECK:       # %bb.0:
3643 ; CHECK-NEXT:    vcvtps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5b,0xc0]
3644 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3645   %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
3646   ret <8 x i32> %res
3647 }
3648
3649 define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
3650 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
3651 ; X86:       # %bb.0:
3652 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3653 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3654 ; X86-NEXT:    vcvtps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x5b,0xc8]
3655 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3656 ; X86-NEXT:    retl # encoding: [0xc3]
3657 ;
3658 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
3659 ; X64:       # %bb.0:
3660 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3661 ; X64-NEXT:    vcvtps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x5b,0xc8]
3662 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3663 ; X64-NEXT:    retq # encoding: [0xc3]
3664   %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
3665   ret <8 x i32> %res
3666 }
3667
3668 declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8)
3669
3670 define <4 x i32>@test_int_x86_avx512_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1) {
3671 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2udq_128:
3672 ; CHECK:       # %bb.0:
3673 ; CHECK-NEXT:    vcvtps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x79,0xc0]
3674 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3675   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
3676   ret <4 x i32> %res
3677 }
3678
3679 define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
3680 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
3681 ; X86:       # %bb.0:
3682 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3683 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3684 ; X86-NEXT:    vcvtps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x79,0xc8]
3685 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3686 ; X86-NEXT:    retl # encoding: [0xc3]
3687 ;
3688 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
3689 ; X64:       # %bb.0:
3690 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3691 ; X64-NEXT:    vcvtps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x79,0xc8]
3692 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3693 ; X64-NEXT:    retq # encoding: [0xc3]
3694   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
3695   ret <4 x i32> %res
3696 }
3697
3698 declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8)
3699
3700 define <8 x i32>@test_int_x86_avx512_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1) {
3701 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2udq_256:
3702 ; CHECK:       # %bb.0:
3703 ; CHECK-NEXT:    vcvtps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x79,0xc0]
3704 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3705   %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
3706   ret <8 x i32> %res
3707 }
3708
3709 define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
3710 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
3711 ; X86:       # %bb.0:
3712 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3713 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3714 ; X86-NEXT:    vcvtps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x79,0xc8]
3715 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3716 ; X86-NEXT:    retl # encoding: [0xc3]
3717 ;
3718 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
3719 ; X64:       # %bb.0:
3720 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3721 ; X64-NEXT:    vcvtps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x79,0xc8]
3722 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3723 ; X64-NEXT:    retq # encoding: [0xc3]
3724   %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
3725   ret <8 x i32> %res
3726 }
3727
3728 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i8)
3729
3730 define <4 x i32>@test_int_x86_avx512_ask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1) {
3731 ; CHECK-LABEL: test_int_x86_avx512_ask_cvtt_pd2dq_128:
3732 ; CHECK:       # %bb.0:
3733 ; CHECK-NEXT:    vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
3734 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3735   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3736   ret <4 x i32> %res
3737 }
3738
3739 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3740 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
3741 ; X86:       # %bb.0:
3742 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3743 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3744 ; X86-NEXT:    vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3745 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3746 ; X86-NEXT:    retl # encoding: [0xc3]
3747 ;
3748 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
3749 ; X64:       # %bb.0:
3750 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3751 ; X64-NEXT:    vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3752 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3753 ; X64-NEXT:    retq # encoding: [0xc3]
3754   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3755   ret <4 x i32> %res
3756 }
3757
3758 define <4 x i32>@test_int_x86_avx512_cvtt_pd2dq_128_zext(<2 x double> %x0, <4 x i32> %x1) {
3759 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2dq_128_zext:
3760 ; CHECK:       # %bb.0:
3761 ; CHECK-NEXT:    vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
3762 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3763   %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3764   %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3765   ret <4 x i32> %res3
3766 }
3767
3768 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3769 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128_zext:
3770 ; X86:       # %bb.0:
3771 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3772 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3773 ; X86-NEXT:    vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3774 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3775 ; X86-NEXT:    retl # encoding: [0xc3]
3776 ;
3777 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128_zext:
3778 ; X64:       # %bb.0:
3779 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3780 ; X64-NEXT:    vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3781 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3782 ; X64-NEXT:    retq # encoding: [0xc3]
3783   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3784   %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3785   ret <4 x i32> %res1
3786 }
3787
3788 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>, i8)
3789
3790 define <4 x i32>@test_int_x86_avx512_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1) {
3791 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2udq_128:
3792 ; CHECK:       # %bb.0:
3793 ; CHECK-NEXT:    vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
3794 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3795   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3796   ret <4 x i32> %res
3797 }
3798
3799 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3800 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
3801 ; X86:       # %bb.0:
3802 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3803 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3804 ; X86-NEXT:    vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3805 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3806 ; X86-NEXT:    retl # encoding: [0xc3]
3807 ;
3808 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
3809 ; X64:       # %bb.0:
3810 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3811 ; X64-NEXT:    vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3812 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3813 ; X64-NEXT:    retq # encoding: [0xc3]
3814   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3815   ret <4 x i32> %res
3816 }
3817
3818 define <4 x i32>@test_int_x86_avx512_cvtt_pd2udq_128_zext(<2 x double> %x0, <4 x i32> %x1) {
3819 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2udq_128_zext:
3820 ; CHECK:       # %bb.0:
3821 ; CHECK-NEXT:    vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
3822 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3823   %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3824   %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3825   ret <4 x i32> %res3
3826 }
3827
3828 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3829 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128_zext:
3830 ; X86:       # %bb.0:
3831 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3832 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3833 ; X86-NEXT:    vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3834 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3835 ; X86-NEXT:    retl # encoding: [0xc3]
3836 ;
3837 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128_zext:
3838 ; X64:       # %bb.0:
3839 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3840 ; X64-NEXT:    vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3841 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3842 ; X64-NEXT:    retq # encoding: [0xc3]
3843   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3844   %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3845   ret <4 x i32> %res1
3846 }
3847
3848 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8)
3849
3850 define <4 x i32>@test_int_x86_avx512_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1) {
3851 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2udq_256:
3852 ; CHECK:       # %bb.0:
3853 ; CHECK-NEXT:    vcvttpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0]
3854 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3855 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3856   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
3857   ret <4 x i32> %res
3858 }
3859
3860 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
3861 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
3862 ; X86:       # %bb.0:
3863 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3864 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3865 ; X86-NEXT:    vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8]
3866 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3867 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3868 ; X86-NEXT:    retl # encoding: [0xc3]
3869 ;
3870 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
3871 ; X64:       # %bb.0:
3872 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3873 ; X64-NEXT:    vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8]
3874 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3875 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3876 ; X64-NEXT:    retq # encoding: [0xc3]
3877   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
3878   ret <4 x i32> %res
3879 }
3880
3881 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8)
3882
3883 define <4 x i32>@test_int_x86_avx512_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1) {
3884 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2udq_128:
3885 ; CHECK:       # %bb.0:
3886 ; CHECK-NEXT:    vcvttps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0]
3887 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3888   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
3889   ret <4 x i32> %res
3890 }
3891
3892 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
3893 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
3894 ; X86:       # %bb.0:
3895 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3896 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3897 ; X86-NEXT:    vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8]
3898 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3899 ; X86-NEXT:    retl # encoding: [0xc3]
3900 ;
3901 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
3902 ; X64:       # %bb.0:
3903 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3904 ; X64-NEXT:    vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8]
3905 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3906 ; X64-NEXT:    retq # encoding: [0xc3]
3907   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
3908   ret <4 x i32> %res
3909 }
3910
3911 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
3912
3913 define <8 x i32>@test_int_x86_avx512_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1) {
3914 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2udq_256:
3915 ; CHECK:       # %bb.0:
3916 ; CHECK-NEXT:    vcvttps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0]
3917 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3918   %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
3919   ret <8 x i32> %res
3920 }
3921
3922 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
3923 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
3924 ; X86:       # %bb.0:
3925 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3926 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3927 ; X86-NEXT:    vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8]
3928 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3929 ; X86-NEXT:    retl # encoding: [0xc3]
3930 ;
3931 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
3932 ; X64:       # %bb.0:
3933 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3934 ; X64-NEXT:    vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8]
3935 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3936 ; X64-NEXT:    retq # encoding: [0xc3]
3937   %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
3938   ret <8 x i32> %res
3939 }
3940
3941 declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
3942
3943 define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
3944 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_pd_128:
3945 ; X86:       # %bb.0:
3946 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3947 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3948 ; X86-NEXT:    vrndscalepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x09,0xc8,0x04]
3949 ; X86-NEXT:    vrndscalepd $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x09,0xc0,0x58]
3950 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
3951 ; X86-NEXT:    retl # encoding: [0xc3]
3952 ;
3953 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_pd_128:
3954 ; X64:       # %bb.0:
3955 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3956 ; X64-NEXT:    vrndscalepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x09,0xc8,0x04]
3957 ; X64-NEXT:    vrndscalepd $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x09,0xc0,0x58]
3958 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
3959 ; X64-NEXT:    retq # encoding: [0xc3]
3960   %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
3961   %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)
3962   %res2 = fadd <2 x double> %res, %res1
3963   ret <2 x double> %res2
3964 }
3965
3966 declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
3967
3968 define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
3969 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_pd_256:
3970 ; X86:       # %bb.0:
3971 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3972 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3973 ; X86-NEXT:    vrndscalepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x09,0xc8,0x04]
3974 ; X86-NEXT:    vrndscalepd $88, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x09,0xc0,0x58]
3975 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
3976 ; X86-NEXT:    retl # encoding: [0xc3]
3977 ;
3978 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_pd_256:
3979 ; X64:       # %bb.0:
3980 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3981 ; X64-NEXT:    vrndscalepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x09,0xc8,0x04]
3982 ; X64-NEXT:    vrndscalepd $88, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x09,0xc0,0x58]
3983 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
3984 ; X64-NEXT:    retq # encoding: [0xc3]
3985   %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
3986   %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)
3987   %res2 = fadd <4 x double> %res, %res1
3988   ret <4 x double> %res2
3989 }
3990
3991 declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
3992
3993 define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
3994 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_ps_128:
3995 ; X86:       # %bb.0:
3996 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3997 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3998 ; X86-NEXT:    vrndscaleps $88, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x08,0xc8,0x58]
3999 ; X86-NEXT:    vroundps $4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x04]
4000 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
4001 ; X86-NEXT:    retl # encoding: [0xc3]
4002 ;
4003 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_ps_128:
4004 ; X64:       # %bb.0:
4005 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4006 ; X64-NEXT:    vrndscaleps $88, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x08,0xc8,0x58]
4007 ; X64-NEXT:    vroundps $4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x04]
4008 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
4009 ; X64-NEXT:    retq # encoding: [0xc3]
4010   %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)
4011   %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)
4012   %res2 = fadd <4 x float> %res, %res1
4013   ret <4 x float> %res2
4014 }
4015
4016 declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
4017
4018 define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4019 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_ps_256:
4020 ; X86:       # %bb.0:
4021 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4022 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4023 ; X86-NEXT:    vrndscaleps $5, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x08,0xc8,0x05]
4024 ; X86-NEXT:    vrndscaleps $66, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x08,0xc0,0x42]
4025 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
4026 ; X86-NEXT:    retl # encoding: [0xc3]
4027 ;
4028 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_ps_256:
4029 ; X64:       # %bb.0:
4030 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4031 ; X64-NEXT:    vrndscaleps $5, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x08,0xc8,0x05]
4032 ; X64-NEXT:    vrndscaleps $66, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x08,0xc0,0x42]
4033 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
4034 ; X64-NEXT:    retq # encoding: [0xc3]
4035   %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)
4036   %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)
4037   %res2 = fadd <8 x float> %res, %res1
4038   ret <8 x float> %res2
4039 }
4040
4041 declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8)
4042
4043 define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
4044 ; X86-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
4045 ; X86:       # %bb.0:
4046 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4047 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4048 ; X86-NEXT:    vgetmantpd $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x26,0xc8,0x0b]
4049 ; X86-NEXT:    vgetmantpd $12, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x26,0xd0,0x0c]
4050 ; X86-NEXT:    vgetmantpd $13, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x26,0xc0,0x0d]
4051 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
4052 ; X86-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
4053 ; X86-NEXT:    retl # encoding: [0xc3]
4054 ;
4055 ; X64-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
4056 ; X64:       # %bb.0:
4057 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4058 ; X64-NEXT:    vgetmantpd $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x26,0xc8,0x0b]
4059 ; X64-NEXT:    vgetmantpd $12, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x26,0xd0,0x0c]
4060 ; X64-NEXT:    vgetmantpd $13, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x26,0xc0,0x0d]
4061 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
4062 ; X64-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
4063 ; X64-NEXT:    retq # encoding: [0xc3]
4064   %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %x3)
4065   %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 12, <2 x double> zeroinitializer, i8 %x3)
4066   %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 13, <2 x double> %x2, i8 -1)
4067   %res3 = fadd <2 x double> %res, %res1
4068   %res4 = fadd <2 x double> %res2, %res3
4069   ret <2 x double> %res4
4070 }
4071
4072 declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8)
4073
4074 define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
4075 ; X86-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
4076 ; X86:       # %bb.0:
4077 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4078 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4079 ; X86-NEXT:    vgetmantpd $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x26,0xc8,0x0b]
4080 ; X86-NEXT:    vgetmantpd $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x26,0xc0,0x0c]
4081 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
4082 ; X86-NEXT:    retl # encoding: [0xc3]
4083 ;
4084 ; X64-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
4085 ; X64:       # %bb.0:
4086 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4087 ; X64-NEXT:    vgetmantpd $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x26,0xc8,0x0b]
4088 ; X64-NEXT:    vgetmantpd $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x26,0xc0,0x0c]
4089 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
4090 ; X64-NEXT:    retq # encoding: [0xc3]
4091   %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %x3)
4092   %res1 = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 12, <4 x double> %x2, i8 -1)
4093   %res2 = fadd <4 x double> %res, %res1
4094   ret <4 x double> %res2
4095 }
4096
4097 declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x float>, i8)
4098
4099 define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
4100 ; X86-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
4101 ; X86:       # %bb.0:
4102 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4103 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4104 ; X86-NEXT:    vgetmantps $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x26,0xc8,0x0b]
4105 ; X86-NEXT:    vgetmantps $12, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x26,0xc0,0x0c]
4106 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
4107 ; X86-NEXT:    retl # encoding: [0xc3]
4108 ;
4109 ; X64-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
4110 ; X64:       # %bb.0:
4111 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4112 ; X64-NEXT:    vgetmantps $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x26,0xc8,0x0b]
4113 ; X64-NEXT:    vgetmantps $12, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x26,0xc0,0x0c]
4114 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
4115 ; X64-NEXT:    retq # encoding: [0xc3]
4116   %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 %x3)
4117   %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 12, <4 x float> %x2, i8 -1)
4118   %res2 = fadd <4 x float> %res, %res1
4119   ret <4 x float> %res2
4120 }
4121
4122 declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x float>, i8)
4123
4124 define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4125 ; X86-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
4126 ; X86:       # %bb.0:
4127 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4128 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4129 ; X86-NEXT:    vgetmantps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x26,0xc8,0x0b]
4130 ; X86-NEXT:    vgetmantps $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x26,0xc0,0x0c]
4131 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
4132 ; X86-NEXT:    retl # encoding: [0xc3]
4133 ;
4134 ; X64-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
4135 ; X64:       # %bb.0:
4136 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4137 ; X64-NEXT:    vgetmantps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x26,0xc8,0x0b]
4138 ; X64-NEXT:    vgetmantps $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x26,0xc0,0x0c]
4139 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
4140 ; X64-NEXT:    retq # encoding: [0xc3]
4141   %res = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
4142   %res1 = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 12, <8 x float> %x2, i8 -1)
4143   %res2 = fadd <8 x float> %res, %res1
4144   ret <8 x float> %res2
4145 }
4146
4147 declare <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32)
4148
4149 define <4 x i32>@test_int_x86_avx512_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
4150 ; CHECK-LABEL: test_int_x86_avx512_pternlog_d_128:
4151 ; CHECK:       # %bb.0:
4152 ; CHECK-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21]
4153 ; CHECK-NEXT:    # xmm0 = ~(xmm1 | (xmm0 ^ xmm2))
4154 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4155   %1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
4156   ret <4 x i32> %1
4157 }
4158
4159 define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
4160 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
4161 ; X86:       # %bb.0:
4162 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4163 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4164 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
4165 ; X86-NEXT:    # xmm0 {%k1} = ~(xmm1 | (xmm0 ^ xmm2))
4166 ; X86-NEXT:    retl # encoding: [0xc3]
4167 ;
4168 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
4169 ; X64:       # %bb.0:
4170 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4171 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
4172 ; X64-NEXT:    # xmm0 {%k1} = ~(xmm1 | (xmm0 ^ xmm2))
4173 ; X64-NEXT:    retq # encoding: [0xc3]
4174   %1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
4175   %2 = bitcast i8 %x4 to <8 x i1>
4176   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4177   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
4178   ret <4 x i32> %3
4179 }
4180
4181 declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
4182
4183 define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
4184 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
4185 ; X86:       # %bb.0:
4186 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4187 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4188 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
4189 ; X86-NEXT:    # xmm0 {%k1} {z} = ~(xmm1 | (xmm0 ^ xmm2))
4190 ; X86-NEXT:    retl # encoding: [0xc3]
4191 ;
4192 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
4193 ; X64:       # %bb.0:
4194 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4195 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
4196 ; X64-NEXT:    # xmm0 {%k1} {z} = ~(xmm1 | (xmm0 ^ xmm2))
4197 ; X64-NEXT:    retq # encoding: [0xc3]
4198   %1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
4199   %2 = bitcast i8 %x4 to <8 x i1>
4200   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4201   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
4202   ret <4 x i32> %3
4203 }
4204
4205 declare <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32)
4206
4207 define <8 x i32>@test_int_x86_avx512_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
4208 ; CHECK-LABEL: test_int_x86_avx512_pternlog_d_256:
4209 ; CHECK:       # %bb.0:
4210 ; CHECK-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21]
4211 ; CHECK-NEXT:    # ymm0 = ~(ymm1 | (ymm0 ^ ymm2))
4212 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4213   %1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
4214   ret <8 x i32> %1
4215 }
4216
4217 define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
4218 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
4219 ; X86:       # %bb.0:
4220 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4221 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4222 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
4223 ; X86-NEXT:    # ymm0 {%k1} = ~(ymm1 | (ymm0 ^ ymm2))
4224 ; X86-NEXT:    retl # encoding: [0xc3]
4225 ;
4226 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
4227 ; X64:       # %bb.0:
4228 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4229 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
4230 ; X64-NEXT:    # ymm0 {%k1} = ~(ymm1 | (ymm0 ^ ymm2))
4231 ; X64-NEXT:    retq # encoding: [0xc3]
4232   %1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
4233   %2 = bitcast i8 %x4 to <8 x i1>
4234   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
4235   ret <8 x i32> %3
4236 }
4237
4238 declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
4239
4240 define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
4241 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
4242 ; X86:       # %bb.0:
4243 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4244 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4245 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
4246 ; X86-NEXT:    # ymm0 {%k1} {z} = ~(ymm1 | (ymm0 ^ ymm2))
4247 ; X86-NEXT:    retl # encoding: [0xc3]
4248 ;
4249 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
4250 ; X64:       # %bb.0:
4251 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4252 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
4253 ; X64-NEXT:    # ymm0 {%k1} {z} = ~(ymm1 | (ymm0 ^ ymm2))
4254 ; X64-NEXT:    retq # encoding: [0xc3]
4255   %1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
4256   %2 = bitcast i8 %x4 to <8 x i1>
4257   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
4258   ret <8 x i32> %3
4259 }
4260
4261 declare <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32)
4262
4263 define <2 x i64>@test_int_x86_avx512_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
4264 ; CHECK-LABEL: test_int_x86_avx512_pternlog_q_128:
4265 ; CHECK:       # %bb.0:
4266 ; CHECK-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21]
4267 ; CHECK-NEXT:    # xmm0 = ~(xmm1 | (xmm0 ^ xmm2))
4268 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4269   %1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
4270   ret <2 x i64> %1
4271 }
4272
4273 define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
4274 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
4275 ; X86:       # %bb.0:
4276 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4277 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4278 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
4279 ; X86-NEXT:    # xmm0 {%k1} = ~(xmm1 | (xmm0 ^ xmm2))
4280 ; X86-NEXT:    retl # encoding: [0xc3]
4281 ;
4282 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
4283 ; X64:       # %bb.0:
4284 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4285 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
4286 ; X64-NEXT:    # xmm0 {%k1} = ~(xmm1 | (xmm0 ^ xmm2))
4287 ; X64-NEXT:    retq # encoding: [0xc3]
4288   %1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
4289   %2 = bitcast i8 %x4 to <8 x i1>
4290   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
4291   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
4292   ret <2 x i64> %3
4293 }
4294
4295 define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
4296 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
4297 ; X86:       # %bb.0:
4298 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4299 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4300 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
4301 ; X86-NEXT:    # xmm0 {%k1} {z} = ~(xmm1 | (xmm0 ^ xmm2))
4302 ; X86-NEXT:    retl # encoding: [0xc3]
4303 ;
4304 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
4305 ; X64:       # %bb.0:
4306 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4307 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
4308 ; X64-NEXT:    # xmm0 {%k1} {z} = ~(xmm1 | (xmm0 ^ xmm2))
4309 ; X64-NEXT:    retq # encoding: [0xc3]
4310   %1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
4311   %2 = bitcast i8 %x4 to <8 x i1>
4312   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
4313   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> zeroinitializer
4314   ret <2 x i64> %3
4315 }
4316
4317 declare <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32)
4318
4319 define <4 x i64>@test_int_x86_avx512_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
4320 ; CHECK-LABEL: test_int_x86_avx512_pternlog_q_256:
4321 ; CHECK:       # %bb.0:
4322 ; CHECK-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21]
4323 ; CHECK-NEXT:    # ymm0 = ~(ymm1 | (ymm0 ^ ymm2))
4324 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4325   %1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
4326   ret <4 x i64> %1
4327 }
4328
4329 define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
4330 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
4331 ; X86:       # %bb.0:
4332 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4333 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4334 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
4335 ; X86-NEXT:    # ymm0 {%k1} = ~(ymm1 | (ymm0 ^ ymm2))
4336 ; X86-NEXT:    retl # encoding: [0xc3]
4337 ;
4338 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
4339 ; X64:       # %bb.0:
4340 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4341 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
4342 ; X64-NEXT:    # ymm0 {%k1} = ~(ymm1 | (ymm0 ^ ymm2))
4343 ; X64-NEXT:    retq # encoding: [0xc3]
4344   %1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
4345   %2 = bitcast i8 %x4 to <8 x i1>
4346   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4347   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
4348   ret <4 x i64> %3
4349 }
4350
4351 define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
4352 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
4353 ; X86:       # %bb.0:
4354 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4355 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4356 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
4357 ; X86-NEXT:    # ymm0 {%k1} {z} = ~(ymm1 | (ymm0 ^ ymm2))
4358 ; X86-NEXT:    retl # encoding: [0xc3]
4359 ;
4360 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
4361 ; X64:       # %bb.0:
4362 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4363 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
4364 ; X64-NEXT:    # ymm0 {%k1} {z} = ~(ymm1 | (ymm0 ^ ymm2))
4365 ; X64-NEXT:    retq # encoding: [0xc3]
4366   %1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
4367   %2 = bitcast i8 %x4 to <8 x i1>
4368   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4369   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> zeroinitializer
4370   ret <4 x i64> %3
4371 }
4372
4373 define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %src) {
4374 ; X86-LABEL: test_x86_vcvtps2ph_128:
4375 ; X86:       # %bb.0:
4376 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4377 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4378 ; X86-NEXT:    vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
4379 ; X86-NEXT:    vcvtps2ph $10, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x0a]
4380 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xfd,0xd3]
4381 ; X86-NEXT:    vcvtps2ph $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x0b]
4382 ; X86-NEXT:    vpaddw %xmm2, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc2]
4383 ; X86-NEXT:    retl # encoding: [0xc3]
4384 ;
4385 ; X64-LABEL: test_x86_vcvtps2ph_128:
4386 ; X64:       # %bb.0:
4387 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4388 ; X64-NEXT:    vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
4389 ; X64-NEXT:    vcvtps2ph $10, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x0a]
4390 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xfd,0xd3]
4391 ; X64-NEXT:    vcvtps2ph $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x0b]
4392 ; X64-NEXT:    vpaddw %xmm2, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc2]
4393 ; X64-NEXT:    retq # encoding: [0xc3]
4394   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
4395   %res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 10, <8 x i16> zeroinitializer, i8 %mask)
4396   %res3 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 11, <8 x i16> %src, i8 %mask)
4397   %res0 = add <8 x i16> %res1, %res2
4398   %res = add <8 x i16> %res3, %res0
4399   ret <8 x i16> %res
4400 }
4401
4402 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float>, i32, <8 x i16>, i8) nounwind readonly
4403
4404 define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %src) {
4405 ; X86-LABEL: test_x86_vcvtps2ph_256:
4406 ; X86:       # %bb.0:
4407 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4408 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4409 ; X86-NEXT:    vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
4410 ; X86-NEXT:    vcvtps2ph $11, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x0b]
4411 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xfd,0xd3]
4412 ; X86-NEXT:    vcvtps2ph $12, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x0c]
4413 ; X86-NEXT:    vpaddw %xmm2, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc2]
4414 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
4415 ; X86-NEXT:    retl # encoding: [0xc3]
4416 ;
4417 ; X64-LABEL: test_x86_vcvtps2ph_256:
4418 ; X64:       # %bb.0:
4419 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4420 ; X64-NEXT:    vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
4421 ; X64-NEXT:    vcvtps2ph $11, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x0b]
4422 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xfd,0xd3]
4423 ; X64-NEXT:    vcvtps2ph $12, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x0c]
4424 ; X64-NEXT:    vpaddw %xmm2, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc2]
4425 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
4426 ; X64-NEXT:    retq # encoding: [0xc3]
4427   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
4428   %res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 11, <8 x i16> zeroinitializer, i8 %mask)
4429   %res3 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 12, <8 x i16> %src, i8 %mask)
4430   %res0 = add <8 x i16> %res1, %res2
4431   %res = add <8 x i16> %res3, %res0
4432   ret <8 x i16> %res
4433 }
4434
4435 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly
4436
4437 define <8 x float> @test_rsqrt_ps_256_rr(<8 x float> %a0) {
4438 ; CHECK-LABEL: test_rsqrt_ps_256_rr:
4439 ; CHECK:       # %bb.0:
4440 ; CHECK-NEXT:    vrsqrt14ps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x4e,0xc0]
4441 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4442   %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
4443   ret <8 x float> %res
4444 }
4445
4446 define <8 x float> @test_rsqrt_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
4447 ; X86-LABEL: test_rsqrt_ps_256_rrkz:
4448 ; X86:       # %bb.0:
4449 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4450 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4451 ; X86-NEXT:    vrsqrt14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4e,0xc0]
4452 ; X86-NEXT:    retl # encoding: [0xc3]
4453 ;
4454 ; X64-LABEL: test_rsqrt_ps_256_rrkz:
4455 ; X64:       # %bb.0:
4456 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4457 ; X64-NEXT:    vrsqrt14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4e,0xc0]
4458 ; X64-NEXT:    retq # encoding: [0xc3]
4459   %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
4460   ret <8 x float> %res
4461 }
4462
4463 define <8 x float> @test_rsqrt_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4464 ; X86-LABEL: test_rsqrt_ps_256_rrk:
4465 ; X86:       # %bb.0:
4466 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4467 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4468 ; X86-NEXT:    vrsqrt14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4e,0xc8]
4469 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4470 ; X86-NEXT:    retl # encoding: [0xc3]
4471 ;
4472 ; X64-LABEL: test_rsqrt_ps_256_rrk:
4473 ; X64:       # %bb.0:
4474 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4475 ; X64-NEXT:    vrsqrt14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4e,0xc8]
4476 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4477 ; X64-NEXT:    retq # encoding: [0xc3]
4478   %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
4479   ret <8 x float> %res
4480 }
4481
4482 define <4 x float> @test_rsqrt_ps_128_rr(<4 x float> %a0) {
4483 ; CHECK-LABEL: test_rsqrt_ps_128_rr:
4484 ; CHECK:       # %bb.0:
4485 ; CHECK-NEXT:    vrsqrt14ps %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x4e,0xc0]
4486 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4487   %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
4488   ret <4 x float> %res
4489 }
4490
4491 define <4 x float> @test_rsqrt_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
4492 ; X86-LABEL: test_rsqrt_ps_128_rrkz:
4493 ; X86:       # %bb.0:
4494 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4495 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4496 ; X86-NEXT:    vrsqrt14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4e,0xc0]
4497 ; X86-NEXT:    retl # encoding: [0xc3]
4498 ;
4499 ; X64-LABEL: test_rsqrt_ps_128_rrkz:
4500 ; X64:       # %bb.0:
4501 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4502 ; X64-NEXT:    vrsqrt14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4e,0xc0]
4503 ; X64-NEXT:    retq # encoding: [0xc3]
4504   %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
4505   ret <4 x float> %res
4506 }
4507
4508 define <4 x float> @test_rsqrt_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4509 ; X86-LABEL: test_rsqrt_ps_128_rrk:
4510 ; X86:       # %bb.0:
4511 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4512 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4513 ; X86-NEXT:    vrsqrt14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4e,0xc8]
4514 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4515 ; X86-NEXT:    retl # encoding: [0xc3]
4516 ;
4517 ; X64-LABEL: test_rsqrt_ps_128_rrk:
4518 ; X64:       # %bb.0:
4519 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4520 ; X64-NEXT:    vrsqrt14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4e,0xc8]
4521 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4522 ; X64-NEXT:    retq # encoding: [0xc3]
4523   %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
4524   ret <4 x float> %res
4525 }
4526
4527 declare <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
4528 declare <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
4529
4530 define <8 x float> @test_rcp_ps_256_rr(<8 x float> %a0) {
4531 ; CHECK-LABEL: test_rcp_ps_256_rr:
4532 ; CHECK:       # %bb.0:
4533 ; CHECK-NEXT:    vrcp14ps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x4c,0xc0]
4534 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4535   %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
4536   ret <8 x float> %res
4537 }
4538
4539 define <8 x float> @test_rcp_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
4540 ; X86-LABEL: test_rcp_ps_256_rrkz:
4541 ; X86:       # %bb.0:
4542 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4543 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4544 ; X86-NEXT:    vrcp14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4c,0xc0]
4545 ; X86-NEXT:    retl # encoding: [0xc3]
4546 ;
4547 ; X64-LABEL: test_rcp_ps_256_rrkz:
4548 ; X64:       # %bb.0:
4549 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4550 ; X64-NEXT:    vrcp14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4c,0xc0]
4551 ; X64-NEXT:    retq # encoding: [0xc3]
4552   %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
4553   ret <8 x float> %res
4554 }
4555
4556 define <8 x float> @test_rcp_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4557 ; X86-LABEL: test_rcp_ps_256_rrk:
4558 ; X86:       # %bb.0:
4559 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4560 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4561 ; X86-NEXT:    vrcp14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4c,0xc8]
4562 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4563 ; X86-NEXT:    retl # encoding: [0xc3]
4564 ;
4565 ; X64-LABEL: test_rcp_ps_256_rrk:
4566 ; X64:       # %bb.0:
4567 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4568 ; X64-NEXT:    vrcp14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4c,0xc8]
4569 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4570 ; X64-NEXT:    retq # encoding: [0xc3]
4571   %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
4572   ret <8 x float> %res
4573 }
4574
4575 define <4 x float> @test_rcp_ps_128_rr(<4 x float> %a0) {
4576 ; CHECK-LABEL: test_rcp_ps_128_rr:
4577 ; CHECK:       # %bb.0:
4578 ; CHECK-NEXT:    vrcp14ps %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x4c,0xc0]
4579 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4580   %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
4581   ret <4 x float> %res
4582 }
4583
4584 define <4 x float> @test_rcp_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
4585 ; X86-LABEL: test_rcp_ps_128_rrkz:
4586 ; X86:       # %bb.0:
4587 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4588 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4589 ; X86-NEXT:    vrcp14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4c,0xc0]
4590 ; X86-NEXT:    retl # encoding: [0xc3]
4591 ;
4592 ; X64-LABEL: test_rcp_ps_128_rrkz:
4593 ; X64:       # %bb.0:
4594 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4595 ; X64-NEXT:    vrcp14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4c,0xc0]
4596 ; X64-NEXT:    retq # encoding: [0xc3]
4597   %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
4598   ret <4 x float> %res
4599 }
4600
4601 define <4 x float> @test_rcp_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4602 ; X86-LABEL: test_rcp_ps_128_rrk:
4603 ; X86:       # %bb.0:
4604 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4605 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4606 ; X86-NEXT:    vrcp14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4c,0xc8]
4607 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4608 ; X86-NEXT:    retl # encoding: [0xc3]
4609 ;
4610 ; X64-LABEL: test_rcp_ps_128_rrk:
4611 ; X64:       # %bb.0:
4612 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4613 ; X64-NEXT:    vrcp14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4c,0xc8]
4614 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4615 ; X64-NEXT:    retq # encoding: [0xc3]
4616   %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
4617   ret <4 x float> %res
4618 }
4619
4620 declare <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
4621 declare <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
4622
4623 define <4 x double> @test_rsqrt_pd_256_rr(<4 x double> %a0) {
4624 ; CHECK-LABEL: test_rsqrt_pd_256_rr:
4625 ; CHECK:       # %bb.0:
4626 ; CHECK-NEXT:    vrsqrt14pd %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x4e,0xc0]
4627 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4628   %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
4629   ret <4 x double> %res
4630 }
4631
4632 define <4 x double> @test_rsqrt_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
4633 ; X86-LABEL: test_rsqrt_pd_256_rrkz:
4634 ; X86:       # %bb.0:
4635 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4636 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4637 ; X86-NEXT:    vrsqrt14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4e,0xc0]
4638 ; X86-NEXT:    retl # encoding: [0xc3]
4639 ;
4640 ; X64-LABEL: test_rsqrt_pd_256_rrkz:
4641 ; X64:       # %bb.0:
4642 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4643 ; X64-NEXT:    vrsqrt14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4e,0xc0]
4644 ; X64-NEXT:    retq # encoding: [0xc3]
4645   %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
4646   ret <4 x double> %res
4647 }
4648
4649 define <4 x double> @test_rsqrt_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
4650 ; X86-LABEL: test_rsqrt_pd_256_rrk:
4651 ; X86:       # %bb.0:
4652 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4653 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4654 ; X86-NEXT:    vrsqrt14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4e,0xc8]
4655 ; X86-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4656 ; X86-NEXT:    retl # encoding: [0xc3]
4657 ;
4658 ; X64-LABEL: test_rsqrt_pd_256_rrk:
4659 ; X64:       # %bb.0:
4660 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4661 ; X64-NEXT:    vrsqrt14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4e,0xc8]
4662 ; X64-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4663 ; X64-NEXT:    retq # encoding: [0xc3]
4664   %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
4665   ret <4 x double> %res
4666 }
4667
4668 define <2 x double> @test_rsqrt_pd_128_rr(<2 x double> %a0) {
4669 ; CHECK-LABEL: test_rsqrt_pd_128_rr:
4670 ; CHECK:       # %bb.0:
4671 ; CHECK-NEXT:    vrsqrt14pd %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x4e,0xc0]
4672 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4673   %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
4674   ret <2 x double> %res
4675 }
4676
4677 define <2 x double> @test_rsqrt_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
4678 ; X86-LABEL: test_rsqrt_pd_128_rrkz:
4679 ; X86:       # %bb.0:
4680 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4681 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4682 ; X86-NEXT:    vrsqrt14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4e,0xc0]
4683 ; X86-NEXT:    retl # encoding: [0xc3]
4684 ;
4685 ; X64-LABEL: test_rsqrt_pd_128_rrkz:
4686 ; X64:       # %bb.0:
4687 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4688 ; X64-NEXT:    vrsqrt14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4e,0xc0]
4689 ; X64-NEXT:    retq # encoding: [0xc3]
4690   %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
4691   ret <2 x double> %res
4692 }
4693
4694 define <2 x double> @test_rsqrt_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4695 ; X86-LABEL: test_rsqrt_pd_128_rrk:
4696 ; X86:       # %bb.0:
4697 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4698 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4699 ; X86-NEXT:    vrsqrt14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4e,0xc8]
4700 ; X86-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4701 ; X86-NEXT:    retl # encoding: [0xc3]
4702 ;
4703 ; X64-LABEL: test_rsqrt_pd_128_rrk:
4704 ; X64:       # %bb.0:
4705 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4706 ; X64-NEXT:    vrsqrt14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4e,0xc8]
4707 ; X64-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4708 ; X64-NEXT:    retq # encoding: [0xc3]
4709   %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
4710   ret <2 x double> %res
4711 }
4712
4713 declare <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
4714 declare <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
4715
4716 define <4 x double> @test_rcp_pd_256_rr(<4 x double> %a0) {
4717 ; CHECK-LABEL: test_rcp_pd_256_rr:
4718 ; CHECK:       # %bb.0:
4719 ; CHECK-NEXT:    vrcp14pd %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x4c,0xc0]
4720 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4721   %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
4722   ret <4 x double> %res
4723 }
4724
4725 define <4 x double> @test_rcp_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
4726 ; X86-LABEL: test_rcp_pd_256_rrkz:
4727 ; X86:       # %bb.0:
4728 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4729 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4730 ; X86-NEXT:    vrcp14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4c,0xc0]
4731 ; X86-NEXT:    retl # encoding: [0xc3]
4732 ;
4733 ; X64-LABEL: test_rcp_pd_256_rrkz:
4734 ; X64:       # %bb.0:
4735 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4736 ; X64-NEXT:    vrcp14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4c,0xc0]
4737 ; X64-NEXT:    retq # encoding: [0xc3]
4738   %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
4739   ret <4 x double> %res
4740 }
4741
4742 define <4 x double> @test_rcp_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
4743 ; X86-LABEL: test_rcp_pd_256_rrk:
4744 ; X86:       # %bb.0:
4745 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4746 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4747 ; X86-NEXT:    vrcp14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4c,0xc8]
4748 ; X86-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4749 ; X86-NEXT:    retl # encoding: [0xc3]
4750 ;
4751 ; X64-LABEL: test_rcp_pd_256_rrk:
4752 ; X64:       # %bb.0:
4753 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4754 ; X64-NEXT:    vrcp14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4c,0xc8]
4755 ; X64-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4756 ; X64-NEXT:    retq # encoding: [0xc3]
4757   %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
4758   ret <4 x double> %res
4759 }
4760
4761 define <2 x double> @test_rcp_pd_128_rr(<2 x double> %a0) {
4762 ; CHECK-LABEL: test_rcp_pd_128_rr:
4763 ; CHECK:       # %bb.0:
4764 ; CHECK-NEXT:    vrcp14pd %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x4c,0xc0]
4765 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4766   %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
4767   ret <2 x double> %res
4768 }
4769
4770 define <2 x double> @test_rcp_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
4771 ; X86-LABEL: test_rcp_pd_128_rrkz:
4772 ; X86:       # %bb.0:
4773 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4774 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4775 ; X86-NEXT:    vrcp14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4c,0xc0]
4776 ; X86-NEXT:    retl # encoding: [0xc3]
4777 ;
4778 ; X64-LABEL: test_rcp_pd_128_rrkz:
4779 ; X64:       # %bb.0:
4780 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4781 ; X64-NEXT:    vrcp14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4c,0xc0]
4782 ; X64-NEXT:    retq # encoding: [0xc3]
4783   %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
4784   ret <2 x double> %res
4785 }
4786
4787 define <2 x double> @test_rcp_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4788 ; X86-LABEL: test_rcp_pd_128_rrk:
4789 ; X86:       # %bb.0:
4790 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4791 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4792 ; X86-NEXT:    vrcp14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4c,0xc8]
4793 ; X86-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4794 ; X86-NEXT:    retl # encoding: [0xc3]
4795 ;
4796 ; X64-LABEL: test_rcp_pd_128_rrk:
4797 ; X64:       # %bb.0:
4798 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4799 ; X64-NEXT:    vrcp14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4c,0xc8]
4800 ; X64-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4801 ; X64-NEXT:    retq # encoding: [0xc3]
4802   %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
4803   ret <2 x double> %res
4804 }
4805
4806 declare <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
4807 declare <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
4808
4809 declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>)
4810
4811 define <4 x double>@test_int_x86_avx512_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) {
4812 ; CHECK-LABEL: test_int_x86_avx512_permvar_df_256:
4813 ; CHECK:       # %bb.0:
4814 ; CHECK-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0]
4815 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4816   %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
4817   ret <4 x double> %1
4818 }
4819
4820 define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
4821 ; X86-LABEL: test_int_x86_avx512_mask_permvar_df_256:
4822 ; X86:       # %bb.0:
4823 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4824 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4825 ; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
4826 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
4827 ; X86-NEXT:    retl # encoding: [0xc3]
4828 ;
4829 ; X64-LABEL: test_int_x86_avx512_mask_permvar_df_256:
4830 ; X64:       # %bb.0:
4831 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4832 ; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
4833 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
4834 ; X64-NEXT:    retq # encoding: [0xc3]
4835   %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
4836   %2 = bitcast i8 %x3 to <8 x i1>
4837   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4838   %3 = select <4 x i1> %extract1, <4 x double> %1, <4 x double> %x2
4839   ret <4 x double> %3
4840 }
4841
4842 define <4 x double>@test_int_x86_avx512_maskz_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, i8 %x3) {
4843 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_df_256:
4844 ; X86:       # %bb.0:
4845 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4846 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4847 ; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
4848 ; X86-NEXT:    retl # encoding: [0xc3]
4849 ;
4850 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_df_256:
4851 ; X64:       # %bb.0:
4852 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4853 ; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
4854 ; X64-NEXT:    retq # encoding: [0xc3]
4855   %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
4856   %2 = bitcast i8 %x3 to <8 x i1>
4857   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4858   %3 = select <4 x i1> %extract1, <4 x double> %1, <4 x double> zeroinitializer
4859   ret <4 x double> %3
4860 }
4861
4862 declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>)
4863
4864 define <4 x i64>@test_int_x86_avx512_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
4865 ; CHECK-LABEL: test_int_x86_avx512_permvar_di_256:
4866 ; CHECK:       # %bb.0:
4867 ; CHECK-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0]
4868 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4869   %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
4870   ret <4 x i64> %1
4871 }
4872
4873 define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
4874 ; X86-LABEL: test_int_x86_avx512_mask_permvar_di_256:
4875 ; X86:       # %bb.0:
4876 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4877 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4878 ; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
4879 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4880 ; X86-NEXT:    retl # encoding: [0xc3]
4881 ;
4882 ; X64-LABEL: test_int_x86_avx512_mask_permvar_di_256:
4883 ; X64:       # %bb.0:
4884 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4885 ; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
4886 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4887 ; X64-NEXT:    retq # encoding: [0xc3]
4888   %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
4889   %2 = bitcast i8 %x3 to <8 x i1>
4890   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4891   %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
4892   ret <4 x i64> %3
4893 }
4894
4895 define <4 x i64>@test_int_x86_avx512_maskz_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) {
4896 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_di_256:
4897 ; X86:       # %bb.0:
4898 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4899 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4900 ; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
4901 ; X86-NEXT:    retl # encoding: [0xc3]
4902 ;
4903 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_di_256:
4904 ; X64:       # %bb.0:
4905 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4906 ; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
4907 ; X64-NEXT:    retq # encoding: [0xc3]
4908   %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
4909   %2 = bitcast i8 %x3 to <8 x i1>
4910   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4911   %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> zeroinitializer
4912   ret <4 x i64> %3
4913 }
4914
4915 declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
4916
4917 define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
4918 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128:
4919 ; X86:       # %bb.0:
4920 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4921 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4922 ; X86-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4923 ; X86-NEXT:    vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05]
4924 ; X86-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4925 ; X86-NEXT:    vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04]
4926 ; X86-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xdc]
4927 ; X86-NEXT:    vfixupimmpd $3, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03]
4928 ; X86-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4929 ; X86-NEXT:    retl # encoding: [0xc3]
4930 ;
4931 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128:
4932 ; X64:       # %bb.0:
4933 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4934 ; X64-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4935 ; X64-NEXT:    vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05]
4936 ; X64-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4937 ; X64-NEXT:    vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04]
4938 ; X64-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xdc]
4939 ; X64-NEXT:    vfixupimmpd $3, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03]
4940 ; X64-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4941 ; X64-NEXT:    retq # encoding: [0xc3]
4942   %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1,<2 x i64> %x2, i32 5, i8 %x4)
4943   %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> zeroinitializer, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 %x4)
4944   %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 3, i8 -1)
4945   %res3 = fadd <2 x double> %res, %res1
4946   %res4 = fadd <2 x double> %res3, %res2
4947   ret <2 x double> %res4
4948 }
4949
4950 declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
4951
4952 define <2 x double>@test_int_x86_avx512_maskz_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
4953 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128:
4954 ; X86:       # %bb.0:
4955 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4956 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4957 ; X86-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4958 ; X86-NEXT:    vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05]
4959 ; X86-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x57,0xd2]
4960 ; X86-NEXT:    vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03]
4961 ; X86-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4962 ; X86-NEXT:    retl # encoding: [0xc3]
4963 ;
4964 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128:
4965 ; X64:       # %bb.0:
4966 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4967 ; X64-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4968 ; X64-NEXT:    vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05]
4969 ; X64-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x57,0xd2]
4970 ; X64-NEXT:    vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03]
4971 ; X64-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4972 ; X64-NEXT:    retq # encoding: [0xc3]
4973   %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4)
4974   %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 3, i8 %x4)
4975   ;%res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 -1)
4976   %res3 = fadd <2 x double> %res, %res1
4977   ;%res4 = fadd <2 x double> %res3, %res2
4978   ret <2 x double> %res3
4979 }
4980
4981 declare <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8)
4982
4983 define <4 x double>@test_int_x86_avx512_mask_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) {
4984 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256:
4985 ; X86:       # %bb.0:
4986 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4987 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4988 ; X86-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
4989 ; X86-NEXT:    vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
4990 ; X86-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4991 ; X86-NEXT:    vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
4992 ; X86-NEXT:    vaddpd %ymm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdc]
4993 ; X86-NEXT:    vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
4994 ; X86-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
4995 ; X86-NEXT:    retl # encoding: [0xc3]
4996 ;
4997 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256:
4998 ; X64:       # %bb.0:
4999 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5000 ; X64-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
5001 ; X64-NEXT:    vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
5002 ; X64-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
5003 ; X64-NEXT:    vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
5004 ; X64-NEXT:    vaddpd %ymm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdc]
5005 ; X64-NEXT:    vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
5006 ; X64-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
5007 ; X64-NEXT:    retq # encoding: [0xc3]
5008   %res = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 4, i8 %x4)
5009   %res1 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> zeroinitializer, <4 x double> %x1, <4 x i64> %x2 , i32 5, i8 %x4)
5010   %res2 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1)
5011   %res3 = fadd <4 x double> %res, %res1
5012   %res4 = fadd <4 x double> %res3, %res2
5013   ret <4 x double> %res4
5014 }
5015
5016 declare <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8)
5017
5018 define <4 x double>@test_int_x86_avx512_maskz_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) {
5019 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256:
5020 ; X86:       # %bb.0:
5021 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5022 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5023 ; X86-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
5024 ; X86-NEXT:    vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
5025 ; X86-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
5026 ; X86-NEXT:    vmovapd %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8]
5027 ; X86-NEXT:    vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
5028 ; X86-NEXT:    vaddpd %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdd]
5029 ; X86-NEXT:    vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
5030 ; X86-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
5031 ; X86-NEXT:    retl # encoding: [0xc3]
5032 ;
5033 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256:
5034 ; X64:       # %bb.0:
5035 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5036 ; X64-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
5037 ; X64-NEXT:    vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
5038 ; X64-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
5039 ; X64-NEXT:    vmovapd %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8]
5040 ; X64-NEXT:    vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
5041 ; X64-NEXT:    vaddpd %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdd]
5042 ; X64-NEXT:    vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
5043 ; X64-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
5044 ; X64-NEXT:    retq # encoding: [0xc3]
5045   %res = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 5, i8 %x4)
5046   %res1 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> zeroinitializer, i32 4, i8 %x4)
5047   %res2 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1)
5048   %res3 = fadd <4 x double> %res, %res1
5049   %res4 = fadd <4 x double> %res3, %res2
5050   ret <4 x double> %res4
5051 }
5052
5053 declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8)
5054
5055 define <4 x float>@test_int_x86_avx512_mask_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
5056 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128:
5057 ; X86:       # %bb.0:
5058 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5059 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5060 ; X86-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
5061 ; X86-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05]
5062 ; X86-NEXT:    vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5063 ; X86-NEXT:    vmovaps %xmm0, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe8]
5064 ; X86-NEXT:    vfixupimmps $6, %xmm4, %xmm1, %xmm5 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xec,0x06]
5065 ; X86-NEXT:    vaddps %xmm5, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xdd]
5066 ; X86-NEXT:    vfixupimmps $7, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xc2,0x07]
5067 ; X86-NEXT:    vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
5068 ; X86-NEXT:    retl # encoding: [0xc3]
5069 ;
5070 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128:
5071 ; X64:       # %bb.0:
5072 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5073 ; X64-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
5074 ; X64-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05]
5075 ; X64-NEXT:    vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5076 ; X64-NEXT:    vmovaps %xmm0, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe8]
5077 ; X64-NEXT:    vfixupimmps $6, %xmm4, %xmm1, %xmm5 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xec,0x06]
5078 ; X64-NEXT:    vaddps %xmm5, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xdd]
5079 ; X64-NEXT:    vfixupimmps $7, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xc2,0x07]
5080 ; X64-NEXT:    vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
5081 ; X64-NEXT:    retq # encoding: [0xc3]
5082   %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4)
5083   %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 6, i8 %x4)
5084   %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 7, i8 -1)
5085   %res3 = fadd <4 x float> %res, %res1
5086   %res4 = fadd <4 x float> %res3, %res2
5087   ret <4 x float> %res4
5088 }
5089
5090 declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8)
5091
5092 define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
5093 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128:
5094 ; X86:       # %bb.0:
5095 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5096 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5097 ; X86-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
5098 ; X86-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05]
5099 ; X86-NEXT:    vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5100 ; X86-NEXT:    vmovaps %xmm0, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe8]
5101 ; X86-NEXT:    vfixupimmps $6, %xmm4, %xmm1, %xmm5 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xec,0x06]
5102 ; X86-NEXT:    vaddps %xmm5, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xdd]
5103 ; X86-NEXT:    vfixupimmps $7, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xc2,0x07]
5104 ; X86-NEXT:    vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
5105 ; X86-NEXT:    retl # encoding: [0xc3]
5106 ;
5107 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128:
5108 ; X64:       # %bb.0:
5109 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5110 ; X64-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
5111 ; X64-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05]
5112 ; X64-NEXT:    vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5113 ; X64-NEXT:    vmovaps %xmm0, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe8]
5114 ; X64-NEXT:    vfixupimmps $6, %xmm4, %xmm1, %xmm5 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xec,0x06]
5115 ; X64-NEXT:    vaddps %xmm5, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xdd]
5116 ; X64-NEXT:    vfixupimmps $7, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xc2,0x07]
5117 ; X64-NEXT:    vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
5118 ; X64-NEXT:    retq # encoding: [0xc3]
5119   %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4)
5120   %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 6, i8 %x4)
5121   %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 7, i8 -1)
5122   %res3 = fadd <4 x float> %res, %res1
5123   %res4 = fadd <4 x float> %res3, %res2
5124   ret <4 x float> %res4
5125 }
5126
5127 declare <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8)
5128
5129 define <8 x float>@test_int_x86_avx512_mask_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) {
5130 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256:
5131 ; X86:       # %bb.0:
5132 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5133 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5134 ; X86-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5135 ; X86-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05]
5136 ; X86-NEXT:    vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5137 ; X86-NEXT:    vmovaps %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe8]
5138 ; X86-NEXT:    vfixupimmps $6, %ymm4, %ymm1, %ymm5 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xec,0x06]
5139 ; X86-NEXT:    vaddps %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xdd]
5140 ; X86-NEXT:    vfixupimmps $7, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xc2,0x07]
5141 ; X86-NEXT:    vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
5142 ; X86-NEXT:    retl # encoding: [0xc3]
5143 ;
5144 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256:
5145 ; X64:       # %bb.0:
5146 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5147 ; X64-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5148 ; X64-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05]
5149 ; X64-NEXT:    vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5150 ; X64-NEXT:    vmovaps %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe8]
5151 ; X64-NEXT:    vfixupimmps $6, %ymm4, %ymm1, %ymm5 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xec,0x06]
5152 ; X64-NEXT:    vaddps %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xdd]
5153 ; X64-NEXT:    vfixupimmps $7, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xc2,0x07]
5154 ; X64-NEXT:    vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
5155 ; X64-NEXT:    retq # encoding: [0xc3]
5156   %res = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4)
5157   %res1 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 6, i8 %x4)
5158   %res2 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 7, i8 -1)
5159   %res3 = fadd <8 x float> %res, %res1
5160   %res4 = fadd <8 x float> %res3, %res2
5161   ret <8 x float> %res4
5162 }
5163
5164 declare <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8)
5165
5166 define <8 x float>@test_int_x86_avx512_maskz_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) {
5167 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256:
5168 ; X86:       # %bb.0:
5169 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5170 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5171 ; X86-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5172 ; X86-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05]
5173 ; X86-NEXT:    vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5174 ; X86-NEXT:    vmovaps %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe8]
5175 ; X86-NEXT:    vfixupimmps $6, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xec,0x06]
5176 ; X86-NEXT:    vaddps %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xdd]
5177 ; X86-NEXT:    vfixupimmps $7, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xc2,0x07]
5178 ; X86-NEXT:    vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
5179 ; X86-NEXT:    retl # encoding: [0xc3]
5180 ;
5181 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256:
5182 ; X64:       # %bb.0:
5183 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5184 ; X64-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5185 ; X64-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05]
5186 ; X64-NEXT:    vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5187 ; X64-NEXT:    vmovaps %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe8]
5188 ; X64-NEXT:    vfixupimmps $6, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xec,0x06]
5189 ; X64-NEXT:    vaddps %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xdd]
5190 ; X64-NEXT:    vfixupimmps $7, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xc2,0x07]
5191 ; X64-NEXT:    vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
5192 ; X64-NEXT:    retq # encoding: [0xc3]
5193   %res = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4)
5194   %res1 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 6, i8 %x4)
5195   %res2 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 7, i8 -1)
5196   %res3 = fadd <8 x float> %res, %res1
5197   %res4 = fadd <8 x float> %res3, %res2
5198   ret <8 x float> %res4
5199 }
5200
5201 define <2 x i64> @test_x86_avx512_psra_q_128(<2 x i64> %a0, <2 x i64> %a1) {
5202 ; CHECK-LABEL: test_x86_avx512_psra_q_128:
5203 ; CHECK:       # %bb.0:
5204 ; CHECK-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xc1]
5205 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5206   %res = call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
5207   ret <2 x i64> %res
5208 }
5209 define <2 x i64> @test_x86_avx512_mask_psra_q_128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %passthru, i8 %mask) {
5210 ; X86-LABEL: test_x86_avx512_mask_psra_q_128:
5211 ; X86:       # %bb.0:
5212 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5213 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5214 ; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
5215 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5216 ; X86-NEXT:    retl # encoding: [0xc3]
5217 ;
5218 ; X64-LABEL: test_x86_avx512_mask_psra_q_128:
5219 ; X64:       # %bb.0:
5220 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5221 ; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
5222 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5223 ; X64-NEXT:    retq # encoding: [0xc3]
5224   %res = call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
5225   %mask.cast = bitcast i8 %mask to <8 x i1>
5226   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5227   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> %passthru
5228   ret <2 x i64> %res2
5229 }
5230 define <2 x i64> @test_x86_avx512_maskz_psra_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
5231 ; X86-LABEL: test_x86_avx512_maskz_psra_q_128:
5232 ; X86:       # %bb.0:
5233 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5234 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5235 ; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
5236 ; X86-NEXT:    retl # encoding: [0xc3]
5237 ;
5238 ; X64-LABEL: test_x86_avx512_maskz_psra_q_128:
5239 ; X64:       # %bb.0:
5240 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5241 ; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
5242 ; X64-NEXT:    retq # encoding: [0xc3]
5243   %res = call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
5244   %mask.cast = bitcast i8 %mask to <8 x i1>
5245   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5246   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> zeroinitializer
5247   ret <2 x i64> %res2
5248 }
5249 declare <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64>, <2 x i64>) nounwind readnone
5250
5251
5252 define <4 x i64> @test_x86_avx512_psra_q_256(<4 x i64> %a0, <2 x i64> %a1) {
5253 ; CHECK-LABEL: test_x86_avx512_psra_q_256:
5254 ; CHECK:       # %bb.0:
5255 ; CHECK-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xc1]
5256 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5257   %res = call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
5258   ret <4 x i64> %res
5259 }
5260 define <4 x i64> @test_x86_avx512_mask_psra_q_256(<4 x i64> %a0, <2 x i64> %a1, <4 x i64> %passthru, i8 %mask) {
5261 ; X86-LABEL: test_x86_avx512_mask_psra_q_256:
5262 ; X86:       # %bb.0:
5263 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5264 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5265 ; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
5266 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5267 ; X86-NEXT:    retl # encoding: [0xc3]
5268 ;
5269 ; X64-LABEL: test_x86_avx512_mask_psra_q_256:
5270 ; X64:       # %bb.0:
5271 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5272 ; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
5273 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5274 ; X64-NEXT:    retq # encoding: [0xc3]
5275   %res = call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
5276   %mask.cast = bitcast i8 %mask to <8 x i1>
5277   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5278   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> %passthru
5279   ret <4 x i64> %res2
5280 }
5281 define <4 x i64> @test_x86_avx512_maskz_psra_q_256(<4 x i64> %a0, <2 x i64> %a1, <4 x i64> %passthru, i8 %mask) {
5282 ; X86-LABEL: test_x86_avx512_maskz_psra_q_256:
5283 ; X86:       # %bb.0:
5284 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5285 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5286 ; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
5287 ; X86-NEXT:    retl # encoding: [0xc3]
5288 ;
5289 ; X64-LABEL: test_x86_avx512_maskz_psra_q_256:
5290 ; X64:       # %bb.0:
5291 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5292 ; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
5293 ; X64-NEXT:    retq # encoding: [0xc3]
5294   %res = call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
5295   %mask.cast = bitcast i8 %mask to <8 x i1>
5296   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5297   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> zeroinitializer
5298   ret <4 x i64> %res2
5299 }
5300 declare <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64>, <2 x i64>) nounwind readnone
5301
5302
5303 define <2 x i64> @test_x86_avx512_psrai_q_128(<2 x i64> %a0) {
5304 ; CHECK-LABEL: test_x86_avx512_psrai_q_128:
5305 ; CHECK:       # %bb.0:
5306 ; CHECK-NEXT:    vpsraq $7, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xe0,0x07]
5307 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5308   %res = call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
5309   ret <2 x i64> %res
5310 }
5311 define <2 x i64> @test_x86_avx512_mask_psrai_q_128(<2 x i64> %a0, <2 x i64> %passthru, i8 %mask) {
5312 ; X86-LABEL: test_x86_avx512_mask_psrai_q_128:
5313 ; X86:       # %bb.0:
5314 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5315 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5316 ; X86-NEXT:    vpsraq $7, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x07]
5317 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5318 ; X86-NEXT:    retl # encoding: [0xc3]
5319 ;
5320 ; X64-LABEL: test_x86_avx512_mask_psrai_q_128:
5321 ; X64:       # %bb.0:
5322 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5323 ; X64-NEXT:    vpsraq $7, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x07]
5324 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5325 ; X64-NEXT:    retq # encoding: [0xc3]
5326   %res = call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
5327   %mask.cast = bitcast i8 %mask to <8 x i1>
5328   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5329   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> %passthru
5330   ret <2 x i64> %res2
5331 }
5332 define <2 x i64> @test_x86_avx512_maskz_psrai_q_128(<2 x i64> %a0, i8 %mask) {
5333 ; X86-LABEL: test_x86_avx512_maskz_psrai_q_128:
5334 ; X86:       # %bb.0:
5335 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5336 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5337 ; X86-NEXT:    vpsraq $7, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x07]
5338 ; X86-NEXT:    retl # encoding: [0xc3]
5339 ;
5340 ; X64-LABEL: test_x86_avx512_maskz_psrai_q_128:
5341 ; X64:       # %bb.0:
5342 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5343 ; X64-NEXT:    vpsraq $7, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x07]
5344 ; X64-NEXT:    retq # encoding: [0xc3]
5345   %res = call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
5346   %mask.cast = bitcast i8 %mask to <8 x i1>
5347   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5348   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> zeroinitializer
5349   ret <2 x i64> %res2
5350 }
5351 declare <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64>, i32) nounwind readnone
5352
5353
5354 define <4 x i64> @test_x86_avx512_psrai_q_256(<4 x i64> %a0) {
5355 ; CHECK-LABEL: test_x86_avx512_psrai_q_256:
5356 ; CHECK:       # %bb.0:
5357 ; CHECK-NEXT:    vpsraq $7, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xe0,0x07]
5358 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5359   %res = call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
5360   ret <4 x i64> %res
5361 }
5362 define <4 x i64> @test_x86_avx512_mask_psrai_q_256(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
5363 ; X86-LABEL: test_x86_avx512_mask_psrai_q_256:
5364 ; X86:       # %bb.0:
5365 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5366 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5367 ; X86-NEXT:    vpsraq $7, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x07]
5368 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5369 ; X86-NEXT:    retl # encoding: [0xc3]
5370 ;
5371 ; X64-LABEL: test_x86_avx512_mask_psrai_q_256:
5372 ; X64:       # %bb.0:
5373 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5374 ; X64-NEXT:    vpsraq $7, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x07]
5375 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5376 ; X64-NEXT:    retq # encoding: [0xc3]
5377   %res = call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
5378   %mask.cast = bitcast i8 %mask to <8 x i1>
5379   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5380   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> %passthru
5381   ret <4 x i64> %res2
5382 }
5383 define <4 x i64> @test_x86_avx512_maskz_psrai_q_256(<4 x i64> %a0, i8 %mask) {
5384 ; X86-LABEL: test_x86_avx512_maskz_psrai_q_256:
5385 ; X86:       # %bb.0:
5386 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5387 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5388 ; X86-NEXT:    vpsraq $7, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x07]
5389 ; X86-NEXT:    retl # encoding: [0xc3]
5390 ;
5391 ; X64-LABEL: test_x86_avx512_maskz_psrai_q_256:
5392 ; X64:       # %bb.0:
5393 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5394 ; X64-NEXT:    vpsraq $7, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x07]
5395 ; X64-NEXT:    retq # encoding: [0xc3]
5396   %res = call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
5397   %mask.cast = bitcast i8 %mask to <8 x i1>
5398   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5399   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> zeroinitializer
5400   ret <4 x i64> %res2
5401 }
5402 declare <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64>, i32) nounwind readnone
5403
5404 define <2 x i64> @test_x86_avx512_psrav_q_128(<2 x i64> %a0, <2 x i64> %a1) {
5405 ; CHECK-LABEL: test_x86_avx512_psrav_q_128:
5406 ; CHECK:       # %bb.0:
5407 ; CHECK-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xc1]
5408 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5409   %res = call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %a0, <2 x i64> %a1)
5410   ret <2 x i64> %res
5411 }
5412
5413 define <2 x i64> @test_x86_avx512_mask_psrav_q_128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, i8 %mask) {
5414 ; X86-LABEL: test_x86_avx512_mask_psrav_q_128:
5415 ; X86:       # %bb.0:
5416 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5417 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5418 ; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
5419 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5420 ; X86-NEXT:    retl # encoding: [0xc3]
5421 ;
5422 ; X64-LABEL: test_x86_avx512_mask_psrav_q_128:
5423 ; X64:       # %bb.0:
5424 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5425 ; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
5426 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5427 ; X64-NEXT:    retq # encoding: [0xc3]
5428   %res = call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %a0, <2 x i64> %a1)
5429   %mask.cast = bitcast i8 %mask to <8 x i1>
5430   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5431   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> %a2
5432   ret <2 x i64> %res2
5433 }
5434
5435 define <2 x i64> @test_x86_avx512_maskz_psrav_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
5436 ; X86-LABEL: test_x86_avx512_maskz_psrav_q_128:
5437 ; X86:       # %bb.0:
5438 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5439 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5440 ; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
5441 ; X86-NEXT:    retl # encoding: [0xc3]
5442 ;
5443 ; X64-LABEL: test_x86_avx512_maskz_psrav_q_128:
5444 ; X64:       # %bb.0:
5445 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5446 ; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
5447 ; X64-NEXT:    retq # encoding: [0xc3]
5448   %res = call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %a0, <2 x i64> %a1)
5449   %mask.cast = bitcast i8 %mask to <8 x i1>
5450   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5451   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> zeroinitializer
5452   ret <2 x i64> %res2
5453 }
5454
5455 declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) nounwind readnone
5456
5457 define <4 x i64> @test_x86_avx512_psrav_q_256(<4 x i64> %a0, <4 x i64> %a1) {
5458 ; CHECK-LABEL: test_x86_avx512_psrav_q_256:
5459 ; CHECK:       # %bb.0:
5460 ; CHECK-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xc1]
5461 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5462   %res = call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %a0, <4 x i64> %a1)
5463   ret <4 x i64> %res
5464 }
5465
5466 define <4 x i64> @test_x86_avx512_mask_psrav_q_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, i8 %mask) {
5467 ; X86-LABEL: test_x86_avx512_mask_psrav_q_256:
5468 ; X86:       # %bb.0:
5469 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5470 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5471 ; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
5472 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5473 ; X86-NEXT:    retl # encoding: [0xc3]
5474 ;
5475 ; X64-LABEL: test_x86_avx512_mask_psrav_q_256:
5476 ; X64:       # %bb.0:
5477 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5478 ; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
5479 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5480 ; X64-NEXT:    retq # encoding: [0xc3]
5481   %res = call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %a0, <4 x i64> %a1)
5482   %mask.cast = bitcast i8 %mask to <8 x i1>
5483   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5484   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> %a2
5485   ret <4 x i64> %res2
5486 }
5487
5488 define <4 x i64> @test_x86_avx512_maskz_psrav_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
5489 ; X86-LABEL: test_x86_avx512_maskz_psrav_q_256:
5490 ; X86:       # %bb.0:
5491 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5492 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5493 ; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
5494 ; X86-NEXT:    retl # encoding: [0xc3]
5495 ;
5496 ; X64-LABEL: test_x86_avx512_maskz_psrav_q_256:
5497 ; X64:       # %bb.0:
5498 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5499 ; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
5500 ; X64-NEXT:    retq # encoding: [0xc3]
5501   %res = call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %a0, <4 x i64> %a1)
5502   %mask.cast = bitcast i8 %mask to <8 x i1>
5503   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5504   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> zeroinitializer
5505   ret <4 x i64> %res2
5506 }
5507
5508 declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) nounwind readnone
5509
5510 define <8 x float> @test_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
5511 ; CHECK-LABEL: test_vfmadd256_ps:
5512 ; CHECK:       # %bb.0:
5513 ; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
5514 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
5515 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5516   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
5517   ret <8 x float> %1
5518 }
5519
5520 define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
5521 ; X86-LABEL: test_mask_vfmadd256_ps:
5522 ; X86:       # %bb.0:
5523 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5524 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5525 ; X86-NEXT:    vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
5526 ; X86-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
5527 ; X86-NEXT:    retl # encoding: [0xc3]
5528 ;
5529 ; X64-LABEL: test_mask_vfmadd256_ps:
5530 ; X64:       # %bb.0:
5531 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5532 ; X64-NEXT:    vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
5533 ; X64-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
5534 ; X64-NEXT:    retq # encoding: [0xc3]
5535   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
5536   %2 = bitcast i8 %mask to <8 x i1>
5537   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %a0
5538   ret <8 x float> %3
5539 }
5540
5541 define <4 x float> @test_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
5542 ; CHECK-LABEL: test_vfmadd128_ps:
5543 ; CHECK:       # %bb.0:
5544 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
5545 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
5546 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5547   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
5548   ret <4 x float> %1
5549 }
5550
5551 define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
5552 ; X86-LABEL: test_mask_vfmadd128_ps:
5553 ; X86:       # %bb.0:
5554 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5555 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5556 ; X86-NEXT:    vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
5557 ; X86-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
5558 ; X86-NEXT:    retl # encoding: [0xc3]
5559 ;
5560 ; X64-LABEL: test_mask_vfmadd128_ps:
5561 ; X64:       # %bb.0:
5562 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5563 ; X64-NEXT:    vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
5564 ; X64-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
5565 ; X64-NEXT:    retq # encoding: [0xc3]
5566   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
5567   %2 = bitcast i8 %mask to <8 x i1>
5568   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5569   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
5570   ret <4 x float> %3
5571 }
5572
5573 define <4 x double> @test_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
5574 ; CHECK-LABEL: test_fmadd256_pd:
5575 ; CHECK:       # %bb.0:
5576 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
5577 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
5578 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5579   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
5580   ret <4 x double> %1
5581 }
5582
5583 define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
5584 ; X86-LABEL: test_mask_fmadd256_pd:
5585 ; X86:       # %bb.0:
5586 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5587 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5588 ; X86-NEXT:    vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
5589 ; X86-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
5590 ; X86-NEXT:    retl # encoding: [0xc3]
5591 ;
5592 ; X64-LABEL: test_mask_fmadd256_pd:
5593 ; X64:       # %bb.0:
5594 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5595 ; X64-NEXT:    vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
5596 ; X64-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
5597 ; X64-NEXT:    retq # encoding: [0xc3]
5598   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
5599   %2 = bitcast i8 %mask to <8 x i1>
5600   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5601   %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %a
5602   ret <4 x double> %3
5603 }
5604
5605 define <2 x double> @test_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
5606 ; CHECK-LABEL: test_fmadd128_pd:
5607 ; CHECK:       # %bb.0:
5608 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
5609 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
5610 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5611   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
5612   ret <2 x double> %1
5613 }
5614
5615 define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
5616 ; X86-LABEL: test_mask_fmadd128_pd:
5617 ; X86:       # %bb.0:
5618 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5619 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5620 ; X86-NEXT:    vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
5621 ; X86-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
5622 ; X86-NEXT:    retl # encoding: [0xc3]
5623 ;
5624 ; X64-LABEL: test_mask_fmadd128_pd:
5625 ; X64:       # %bb.0:
5626 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5627 ; X64-NEXT:    vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
5628 ; X64-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
5629 ; X64-NEXT:    retq # encoding: [0xc3]
5630   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
5631   %2 = bitcast i8 %mask to <8 x i1>
5632   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
5633   %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %a
5634   ret <2 x double> %3
5635 }
5636
5637 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5638 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
5639 ; X86:       # %bb.0:
5640 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5641 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5642 ; X86-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
5643 ; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
5644 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5645 ; X86-NEXT:    retl # encoding: [0xc3]
5646 ;
5647 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
5648 ; X64:       # %bb.0:
5649 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5650 ; X64-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
5651 ; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
5652 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5653 ; X64-NEXT:    retq # encoding: [0xc3]
5654   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
5655   %2 = bitcast i8 %x3 to <8 x i1>
5656   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
5657   %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %x2
5658   ret <2 x double> %3
5659 }
5660
5661 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5662 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
5663 ; X86:       # %bb.0:
5664 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5665 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5666 ; X86-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
5667 ; X86-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
5668 ; X86-NEXT:    retl # encoding: [0xc3]
5669 ;
5670 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
5671 ; X64:       # %bb.0:
5672 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5673 ; X64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
5674 ; X64-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
5675 ; X64-NEXT:    retq # encoding: [0xc3]
5676   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
5677   %2 = bitcast i8 %x3 to <8 x i1>
5678   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
5679   %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> zeroinitializer
5680   ret <2 x double> %3
5681 }
5682
5683 define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
5684 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
5685 ; X86:       # %bb.0:
5686 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5687 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5688 ; X86-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
5689 ; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
5690 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5691 ; X86-NEXT:    retl # encoding: [0xc3]
5692 ;
5693 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
5694 ; X64:       # %bb.0:
5695 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5696 ; X64-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
5697 ; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
5698 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5699 ; X64-NEXT:    retq # encoding: [0xc3]
5700   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
5701   %2 = bitcast i8 %x3 to <8 x i1>
5702   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5703   %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %x2
5704   ret <4 x double> %3
5705 }
5706
5707 define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
5708 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
5709 ; X86:       # %bb.0:
5710 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5711 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5712 ; X86-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
5713 ; X86-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
5714 ; X86-NEXT:    retl # encoding: [0xc3]
5715 ;
5716 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
5717 ; X64:       # %bb.0:
5718 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5719 ; X64-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
5720 ; X64-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
5721 ; X64-NEXT:    retq # encoding: [0xc3]
5722   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
5723   %2 = bitcast i8 %x3 to <8 x i1>
5724   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5725   %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> zeroinitializer
5726   ret <4 x double> %3
5727 }
5728
5729 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5730 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
5731 ; X86:       # %bb.0:
5732 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5733 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5734 ; X86-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
5735 ; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
5736 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5737 ; X86-NEXT:    retl # encoding: [0xc3]
5738 ;
5739 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
5740 ; X64:       # %bb.0:
5741 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5742 ; X64-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
5743 ; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
5744 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5745 ; X64-NEXT:    retq # encoding: [0xc3]
5746   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
5747   %2 = bitcast i8 %x3 to <8 x i1>
5748   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5749   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %x2
5750   ret <4 x float> %3
5751 }
5752
5753 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5754 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
5755 ; X86:       # %bb.0:
5756 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5757 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5758 ; X86-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
5759 ; X86-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
5760 ; X86-NEXT:    retl # encoding: [0xc3]
5761 ;
5762 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
5763 ; X64:       # %bb.0:
5764 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5765 ; X64-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
5766 ; X64-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
5767 ; X64-NEXT:    retq # encoding: [0xc3]
5768   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
5769   %2 = bitcast i8 %x3 to <8 x i1>
5770   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5771   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> zeroinitializer
5772   ret <4 x float> %3
5773 }
5774
5775 define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
5776 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
5777 ; X86:       # %bb.0:
5778 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5779 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5780 ; X86-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
5781 ; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
5782 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5783 ; X86-NEXT:    retl # encoding: [0xc3]
5784 ;
5785 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
5786 ; X64:       # %bb.0:
5787 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5788 ; X64-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
5789 ; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
5790 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5791 ; X64-NEXT:    retq # encoding: [0xc3]
5792   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
5793   %2 = bitcast i8 %x3 to <8 x i1>
5794   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %x2
5795   ret <8 x float> %3
5796 }
5797
5798 define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
5799 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
5800 ; X86:       # %bb.0:
5801 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5802 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5803 ; X86-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
5804 ; X86-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
5805 ; X86-NEXT:    retl # encoding: [0xc3]
5806 ;
5807 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
5808 ; X64:       # %bb.0:
5809 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5810 ; X64-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
5811 ; X64-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
5812 ; X64-NEXT:    retq # encoding: [0xc3]
5813   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
5814   %2 = bitcast i8 %x3 to <8 x i1>
5815   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer
5816   ret <8 x float> %3
5817 }
5818
5819 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5820 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
5821 ; X86:       # %bb.0:
5822 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5823 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5824 ; X86-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
5825 ; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
5826 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5827 ; X86-NEXT:    retl # encoding: [0xc3]
5828 ;
5829 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
5830 ; X64:       # %bb.0:
5831 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5832 ; X64-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
5833 ; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
5834 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5835 ; X64-NEXT:    retq # encoding: [0xc3]
5836   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
5837   %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %1)
5838   %3 = bitcast i8 %x3 to <8 x i1>
5839   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <2 x i32> <i32 0, i32 1>
5840   %4 = select <2 x i1> %extract, <2 x double> %2, <2 x double> %x2
5841   ret <2 x double> %4
5842 }
5843
5844 define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
5845 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
5846 ; X86:       # %bb.0:
5847 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5848 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5849 ; X86-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
5850 ; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
5851 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5852 ; X86-NEXT:    retl # encoding: [0xc3]
5853 ;
5854 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
5855 ; X64:       # %bb.0:
5856 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5857 ; X64-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
5858 ; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
5859 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5860 ; X64-NEXT:    retq # encoding: [0xc3]
5861   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
5862   %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %1)
5863   %3 = bitcast i8 %x3 to <8 x i1>
5864   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5865   %4 = select <4 x i1> %extract, <4 x double> %2, <4 x double> %x2
5866   ret <4 x double> %4
5867 }
5868
5869 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5870 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
5871 ; X86:       # %bb.0:
5872 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5873 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5874 ; X86-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
5875 ; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
5876 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5877 ; X86-NEXT:    retl # encoding: [0xc3]
5878 ;
5879 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
5880 ; X64:       # %bb.0:
5881 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5882 ; X64-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
5883 ; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
5884 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5885 ; X64-NEXT:    retq # encoding: [0xc3]
5886   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
5887   %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %1)
5888   %3 = bitcast i8 %x3 to <8 x i1>
5889   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5890   %4 = select <4 x i1> %extract, <4 x float> %2, <4 x float> %x2
5891   ret <4 x float> %4
5892 }
5893
5894 define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
5895 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
5896 ; X86:       # %bb.0:
5897 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5898 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5899 ; X86-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
5900 ; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
5901 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5902 ; X86-NEXT:    retl # encoding: [0xc3]
5903 ;
5904 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
5905 ; X64:       # %bb.0:
5906 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5907 ; X64-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
5908 ; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
5909 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5910 ; X64-NEXT:    retq # encoding: [0xc3]
5911   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
5912   %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %1)
5913   %3 = bitcast i8 %x3 to <8 x i1>
5914   %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %x2
5915   ret <8 x float> %4
5916 }
5917
5918 define <8 x float> @test_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
5919 ; CHECK-LABEL: test_vfnmadd256_ps:
5920 ; CHECK:       # %bb.0:
5921 ; CHECK-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2]
5922 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) + ymm2
5923 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5924   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5925   %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %a2)
5926   ret <8 x float> %2
5927 }
5928
5929 define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
5930 ; X86-LABEL: test_mask_vfnmadd256_ps:
5931 ; X86:       # %bb.0:
5932 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5933 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5934 ; X86-NEXT:    vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
5935 ; X86-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
5936 ; X86-NEXT:    retl # encoding: [0xc3]
5937 ;
5938 ; X64-LABEL: test_mask_vfnmadd256_ps:
5939 ; X64:       # %bb.0:
5940 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5941 ; X64-NEXT:    vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
5942 ; X64-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
5943 ; X64-NEXT:    retq # encoding: [0xc3]
5944   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5945   %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %a2)
5946   %3 = bitcast i8 %mask to <8 x i1>
5947   %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %a0
5948   ret <8 x float> %4
5949 }
5950
5951 define <4 x float> @test_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
5952 ; CHECK-LABEL: test_vfnmadd128_ps:
5953 ; CHECK:       # %bb.0:
5954 ; CHECK-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2]
5955 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
5956 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5957   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5958   %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %a2)
5959   ret <4 x float> %2
5960 }
5961
5962 define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
5963 ; X86-LABEL: test_mask_vfnmadd128_ps:
5964 ; X86:       # %bb.0:
5965 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5966 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5967 ; X86-NEXT:    vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
5968 ; X86-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
5969 ; X86-NEXT:    retl # encoding: [0xc3]
5970 ;
5971 ; X64-LABEL: test_mask_vfnmadd128_ps:
5972 ; X64:       # %bb.0:
5973 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5974 ; X64-NEXT:    vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
5975 ; X64-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
5976 ; X64-NEXT:    retq # encoding: [0xc3]
5977   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5978   %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %a2)
5979   %3 = bitcast i8 %mask to <8 x i1>
5980   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5981   %4 = select <4 x i1> %extract, <4 x float> %2, <4 x float> %a0
5982   ret <4 x float> %4
5983 }
5984
5985 define <4 x double> @test_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
5986 ; CHECK-LABEL: test_vfnmadd256_pd:
5987 ; CHECK:       # %bb.0:
5988 ; CHECK-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
5989 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) + ymm2
5990 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5991   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
5992   %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %a2)
5993   ret <4 x double> %2
5994 }
5995
5996 define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
5997 ; X86-LABEL: test_mask_vfnmadd256_pd:
5998 ; X86:       # %bb.0:
5999 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6000 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6001 ; X86-NEXT:    vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
6002 ; X86-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
6003 ; X86-NEXT:    retl # encoding: [0xc3]
6004 ;
6005 ; X64-LABEL: test_mask_vfnmadd256_pd:
6006 ; X64:       # %bb.0:
6007 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6008 ; X64-NEXT:    vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
6009 ; X64-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
6010 ; X64-NEXT:    retq # encoding: [0xc3]
6011   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
6012   %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %a2)
6013   %3 = bitcast i8 %mask to <8 x i1>
6014   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6015   %4 = select <4 x i1> %extract, <4 x double> %2, <4 x double> %a0
6016   ret <4 x double> %4
6017 }
6018
6019 define <2 x double> @test_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
6020 ; CHECK-LABEL: test_vfnmadd128_pd:
6021 ; CHECK:       # %bb.0:
6022 ; CHECK-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
6023 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
6024 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6025   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
6026   %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %a2)
6027   ret <2 x double> %2
6028 }
6029
6030 define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
6031 ; X86-LABEL: test_mask_vfnmadd128_pd:
6032 ; X86:       # %bb.0:
6033 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6034 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6035 ; X86-NEXT:    vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
6036 ; X86-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
6037 ; X86-NEXT:    retl # encoding: [0xc3]
6038 ;
6039 ; X64-LABEL: test_mask_vfnmadd128_pd:
6040 ; X64:       # %bb.0:
6041 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6042 ; X64-NEXT:    vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
6043 ; X64-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
6044 ; X64-NEXT:    retq # encoding: [0xc3]
6045   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
6046   %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %a2)
6047   %3 = bitcast i8 %mask to <8 x i1>
6048   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <2 x i32> <i32 0, i32 1>
6049   %4 = select <2 x i1> %extract, <2 x double> %2, <2 x double> %a0
6050   ret <2 x double> %4
6051 }
6052
6053 define <8 x float> @test_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
6054 ; CHECK-LABEL: test_vfnmsub256_ps:
6055 ; CHECK:       # %bb.0:
6056 ; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2]
6057 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) - ymm2
6058 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6059   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
6060   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
6061   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %2)
6062   ret <8 x float> %3
6063 }
6064
6065 define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
6066 ; X86-LABEL: test_mask_vfnmsub256_ps:
6067 ; X86:       # %bb.0:
6068 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6069 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6070 ; X86-NEXT:    vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
6071 ; X86-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
6072 ; X86-NEXT:    retl # encoding: [0xc3]
6073 ;
6074 ; X64-LABEL: test_mask_vfnmsub256_ps:
6075 ; X64:       # %bb.0:
6076 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6077 ; X64-NEXT:    vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
6078 ; X64-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
6079 ; X64-NEXT:    retq # encoding: [0xc3]
6080   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
6081   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
6082   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %2)
6083   %4 = bitcast i8 %mask to <8 x i1>
6084   %5 = select <8 x i1> %4, <8 x float> %3, <8 x float> %a0
6085   ret <8 x float> %5
6086 }
6087
6088 define <4 x float> @test_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
6089 ; CHECK-LABEL: test_vfnmsub128_ps:
6090 ; CHECK:       # %bb.0:
6091 ; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2]
6092 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
6093 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6094   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
6095   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
6096   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %2)
6097   ret <4 x float> %3
6098 }
6099
6100 define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
6101 ; X86-LABEL: test_mask_vfnmsub128_ps:
6102 ; X86:       # %bb.0:
6103 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6104 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6105 ; X86-NEXT:    vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
6106 ; X86-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
6107 ; X86-NEXT:    retl # encoding: [0xc3]
6108 ;
6109 ; X64-LABEL: test_mask_vfnmsub128_ps:
6110 ; X64:       # %bb.0:
6111 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6112 ; X64-NEXT:    vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
6113 ; X64-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
6114 ; X64-NEXT:    retq # encoding: [0xc3]
6115   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
6116   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
6117   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %2)
6118   %4 = bitcast i8 %mask to <8 x i1>
6119   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6120   %5 = select <4 x i1> %extract, <4 x float> %3, <4 x float> %a0
6121   ret <4 x float> %5
6122 }
6123
6124 define <4 x double> @test_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
6125 ; CHECK-LABEL: test_vfnmsub256_pd:
6126 ; CHECK:       # %bb.0:
6127 ; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
6128 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) - ymm2
6129 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6130   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
6131   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6132   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %2)
6133   ret <4 x double> %3
6134 }
6135
6136 define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
6137 ; X86-LABEL: test_mask_vfnmsub256_pd:
6138 ; X86:       # %bb.0:
6139 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6140 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6141 ; X86-NEXT:    vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
6142 ; X86-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
6143 ; X86-NEXT:    retl # encoding: [0xc3]
6144 ;
6145 ; X64-LABEL: test_mask_vfnmsub256_pd:
6146 ; X64:       # %bb.0:
6147 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6148 ; X64-NEXT:    vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
6149 ; X64-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
6150 ; X64-NEXT:    retq # encoding: [0xc3]
6151   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
6152   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6153   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %2)
6154   %4 = bitcast i8 %mask to <8 x i1>
6155   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6156   %5 = select <4 x i1> %extract, <4 x double> %3, <4 x double> %a0
6157   ret <4 x double> %5
6158 }
6159
6160 define <2 x double> @test_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
6161 ; CHECK-LABEL: test_vfnmsub128_pd:
6162 ; CHECK:       # %bb.0:
6163 ; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
6164 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
6165 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6166   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
6167   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6168   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %2)
6169   ret <2 x double> %3
6170 }
6171
6172 define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
6173 ; X86-LABEL: test_mask_vfnmsub128_pd:
6174 ; X86:       # %bb.0:
6175 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6176 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6177 ; X86-NEXT:    vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
6178 ; X86-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
6179 ; X86-NEXT:    retl # encoding: [0xc3]
6180 ;
6181 ; X64-LABEL: test_mask_vfnmsub128_pd:
6182 ; X64:       # %bb.0:
6183 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6184 ; X64-NEXT:    vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
6185 ; X64-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
6186 ; X64-NEXT:    retq # encoding: [0xc3]
6187   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
6188   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6189   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %2)
6190   %4 = bitcast i8 %mask to <8 x i1>
6191   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <2 x i32> <i32 0, i32 1>
6192   %5 = select <2 x i1> %extract, <2 x double> %3, <2 x double> %a0
6193   ret <2 x double> %5
6194 }
6195
6196 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6197 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
6198 ; X86:       # %bb.0:
6199 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6200 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6201 ; X86-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
6202 ; X86-NEXT:    # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
6203 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6204 ; X86-NEXT:    retl # encoding: [0xc3]
6205 ;
6206 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
6207 ; X64:       # %bb.0:
6208 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6209 ; X64-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
6210 ; X64-NEXT:    # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
6211 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6212 ; X64-NEXT:    retq # encoding: [0xc3]
6213   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x0
6214   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6215   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %x1, <2 x double> %2)
6216   %4 = bitcast i8 %x3 to <8 x i1>
6217   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <2 x i32> <i32 0, i32 1>
6218   %5 = select <2 x i1> %extract, <2 x double> %3, <2 x double> %x2
6219   ret <2 x double> %5
6220 }
6221
6222 define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6223 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
6224 ; X86:       # %bb.0:
6225 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6226 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6227 ; X86-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
6228 ; X86-NEXT:    # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
6229 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6230 ; X86-NEXT:    retl # encoding: [0xc3]
6231 ;
6232 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
6233 ; X64:       # %bb.0:
6234 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6235 ; X64-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
6236 ; X64-NEXT:    # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
6237 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6238 ; X64-NEXT:    retq # encoding: [0xc3]
6239   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x0
6240   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6241   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %x1, <4 x double> %2)
6242   %4 = bitcast i8 %x3 to <8 x i1>
6243   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6244   %5 = select <4 x i1> %extract, <4 x double> %3, <4 x double> %x2
6245   ret <4 x double> %5
6246 }
6247
6248 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6249 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
6250 ; X86:       # %bb.0:
6251 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6252 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6253 ; X86-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
6254 ; X86-NEXT:    # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
6255 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6256 ; X86-NEXT:    retl # encoding: [0xc3]
6257 ;
6258 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
6259 ; X64:       # %bb.0:
6260 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6261 ; X64-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
6262 ; X64-NEXT:    # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
6263 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6264 ; X64-NEXT:    retq # encoding: [0xc3]
6265   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0
6266   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6267   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %x1, <4 x float> %2)
6268   %4 = bitcast i8 %x3 to <8 x i1>
6269   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6270   %5 = select <4 x i1> %extract, <4 x float> %3, <4 x float> %x2
6271   ret <4 x float> %5
6272 }
6273
6274 define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6275 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
6276 ; X86:       # %bb.0:
6277 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6278 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6279 ; X86-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
6280 ; X86-NEXT:    # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
6281 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6282 ; X86-NEXT:    retl # encoding: [0xc3]
6283 ;
6284 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
6285 ; X64:       # %bb.0:
6286 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6287 ; X64-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
6288 ; X64-NEXT:    # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
6289 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6290 ; X64-NEXT:    retq # encoding: [0xc3]
6291   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0
6292   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6293   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %x1, <8 x float> %2)
6294   %4 = bitcast i8 %x3 to <8 x i1>
6295   %5 = select <8 x i1> %4, <8 x float> %3, <8 x float> %x2
6296   ret <8 x float> %5
6297 }
6298
6299 define <8 x float> @test_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
6300 ; CHECK-LABEL: test_fmaddsub256_ps:
6301 ; CHECK:       # %bb.0:
6302 ; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
6303 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
6304 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6305   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
6306   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6307   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %2)
6308   %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6309   ret <8 x float> %4
6310 }
6311
6312 define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
6313 ; X86-LABEL: test_mask_fmaddsub256_ps:
6314 ; X86:       # %bb.0:
6315 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6316 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6317 ; X86-NEXT:    vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
6318 ; X86-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
6319 ; X86-NEXT:    retl # encoding: [0xc3]
6320 ;
6321 ; X64-LABEL: test_mask_fmaddsub256_ps:
6322 ; X64:       # %bb.0:
6323 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6324 ; X64-NEXT:    vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
6325 ; X64-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
6326 ; X64-NEXT:    retq # encoding: [0xc3]
6327   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
6328   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6329   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %2)
6330   %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6331   %5 = bitcast i8 %mask to <8 x i1>
6332   %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> %a
6333   ret <8 x float> %6
6334 }
6335
6336 define <4 x float> @test_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
6337 ; CHECK-LABEL: test_fmaddsub128_ps:
6338 ; CHECK:       # %bb.0:
6339 ; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
6340 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
6341 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6342   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
6343   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6344   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %2)
6345   %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6346   ret <4 x float> %4
6347 }
6348
6349 define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
6350 ; X86-LABEL: test_mask_fmaddsub128_ps:
6351 ; X86:       # %bb.0:
6352 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6353 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6354 ; X86-NEXT:    vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
6355 ; X86-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
6356 ; X86-NEXT:    retl # encoding: [0xc3]
6357 ;
6358 ; X64-LABEL: test_mask_fmaddsub128_ps:
6359 ; X64:       # %bb.0:
6360 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6361 ; X64-NEXT:    vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
6362 ; X64-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
6363 ; X64-NEXT:    retq # encoding: [0xc3]
6364   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
6365   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6366   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %2)
6367   %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6368   %5 = bitcast i8 %mask to <8 x i1>
6369   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6370   %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> %a
6371   ret <4 x float> %6
6372 }
6373
6374 define <4 x double> @test_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
6375 ; CHECK-LABEL: test_vfmaddsub256_pd:
6376 ; CHECK:       # %bb.0:
6377 ; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
6378 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
6379 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6380   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
6381   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6382   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
6383   %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6384   ret <4 x double> %4
6385 }
6386
6387 define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
6388 ; X86-LABEL: test_mask_vfmaddsub256_pd:
6389 ; X86:       # %bb.0:
6390 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6391 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6392 ; X86-NEXT:    vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
6393 ; X86-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
6394 ; X86-NEXT:    retl # encoding: [0xc3]
6395 ;
6396 ; X64-LABEL: test_mask_vfmaddsub256_pd:
6397 ; X64:       # %bb.0:
6398 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6399 ; X64-NEXT:    vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
6400 ; X64-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
6401 ; X64-NEXT:    retq # encoding: [0xc3]
6402   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
6403   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6404   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
6405   %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6406   %5 = bitcast i8 %mask to <8 x i1>
6407   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6408   %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> %a0
6409   ret <4 x double> %6
6410 }
6411
6412 define <2 x double> @test_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
6413 ; CHECK-LABEL: test_vfmaddsub128_pd:
6414 ; CHECK:       # %bb.0:
6415 ; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
6416 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
6417 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6418   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6419   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6420   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
6421   %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6422   ret <2 x double> %4
6423 }
6424
6425 define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
6426 ; X86-LABEL: test_mask_vfmaddsub128_pd:
6427 ; X86:       # %bb.0:
6428 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6429 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6430 ; X86-NEXT:    vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
6431 ; X86-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
6432 ; X86-NEXT:    retl # encoding: [0xc3]
6433 ;
6434 ; X64-LABEL: test_mask_vfmaddsub128_pd:
6435 ; X64:       # %bb.0:
6436 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6437 ; X64-NEXT:    vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
6438 ; X64-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
6439 ; X64-NEXT:    retq # encoding: [0xc3]
6440   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6441   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6442   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
6443   %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6444   %5 = bitcast i8 %mask to <8 x i1>
6445   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6446   %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> %a0
6447   ret <2 x double> %6
6448 }
6449
6450 define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6451 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
6452 ; X86:       # %bb.0:
6453 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6454 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6455 ; X86-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
6456 ; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
6457 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6458 ; X86-NEXT:    retl # encoding: [0xc3]
6459 ;
6460 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
6461 ; X64:       # %bb.0:
6462 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6463 ; X64-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
6464 ; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
6465 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6466 ; X64-NEXT:    retq # encoding: [0xc3]
6467   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
6468   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6469   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %2)
6470   %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6471   %5 = bitcast i8 %x3 to <8 x i1>
6472   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6473   %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> %x2
6474   ret <2 x double> %6
6475 }
6476
6477 define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6478 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
6479 ; X86:       # %bb.0:
6480 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6481 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6482 ; X86-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
6483 ; X86-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
6484 ; X86-NEXT:    retl # encoding: [0xc3]
6485 ;
6486 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
6487 ; X64:       # %bb.0:
6488 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6489 ; X64-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
6490 ; X64-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
6491 ; X64-NEXT:    retq # encoding: [0xc3]
6492   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
6493   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6494   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %2)
6495   %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6496   %5 = bitcast i8 %x3 to <8 x i1>
6497   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6498   %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> zeroinitializer
6499   ret <2 x double> %6
6500 }
6501
6502 define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6503 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
6504 ; X86:       # %bb.0:
6505 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6506 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6507 ; X86-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
6508 ; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
6509 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6510 ; X86-NEXT:    retl # encoding: [0xc3]
6511 ;
6512 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
6513 ; X64:       # %bb.0:
6514 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6515 ; X64-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
6516 ; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
6517 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6518 ; X64-NEXT:    retq # encoding: [0xc3]
6519   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
6520   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6521   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %2)
6522   %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6523   %5 = bitcast i8 %x3 to <8 x i1>
6524   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6525   %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> %x2
6526   ret <4 x double> %6
6527 }
6528
6529 define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6530 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
6531 ; X86:       # %bb.0:
6532 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6533 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6534 ; X86-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
6535 ; X86-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
6536 ; X86-NEXT:    retl # encoding: [0xc3]
6537 ;
6538 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
6539 ; X64:       # %bb.0:
6540 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6541 ; X64-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
6542 ; X64-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
6543 ; X64-NEXT:    retq # encoding: [0xc3]
6544   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
6545   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6546   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %2)
6547   %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6548   %5 = bitcast i8 %x3 to <8 x i1>
6549   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6550   %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> zeroinitializer
6551   ret <4 x double> %6
6552 }
6553
6554 define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6555 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
6556 ; X86:       # %bb.0:
6557 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6558 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6559 ; X86-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
6560 ; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
6561 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6562 ; X86-NEXT:    retl # encoding: [0xc3]
6563 ;
6564 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
6565 ; X64:       # %bb.0:
6566 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6567 ; X64-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
6568 ; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
6569 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6570 ; X64-NEXT:    retq # encoding: [0xc3]
6571   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
6572   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6573   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %2)
6574   %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6575   %5 = bitcast i8 %x3 to <8 x i1>
6576   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6577   %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> %x2
6578   ret <4 x float> %6
6579 }
6580
6581 define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6582 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
6583 ; X86:       # %bb.0:
6584 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6585 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6586 ; X86-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
6587 ; X86-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
6588 ; X86-NEXT:    retl # encoding: [0xc3]
6589 ;
6590 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
6591 ; X64:       # %bb.0:
6592 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6593 ; X64-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
6594 ; X64-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
6595 ; X64-NEXT:    retq # encoding: [0xc3]
6596   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
6597   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6598   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %2)
6599   %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6600   %5 = bitcast i8 %x3 to <8 x i1>
6601   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6602   %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> zeroinitializer
6603   ret <4 x float> %6
6604 }
6605
6606 define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6607 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
6608 ; X86:       # %bb.0:
6609 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6610 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6611 ; X86-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
6612 ; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
6613 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6614 ; X86-NEXT:    retl # encoding: [0xc3]
6615 ;
6616 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
6617 ; X64:       # %bb.0:
6618 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6619 ; X64-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
6620 ; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
6621 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6622 ; X64-NEXT:    retq # encoding: [0xc3]
6623   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
6624   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6625   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %2)
6626   %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6627   %5 = bitcast i8 %x3 to <8 x i1>
6628   %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> %x2
6629   ret <8 x float> %6
6630 }
6631
6632 define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6633 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
6634 ; X86:       # %bb.0:
6635 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6636 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6637 ; X86-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
6638 ; X86-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
6639 ; X86-NEXT:    retl # encoding: [0xc3]
6640 ;
6641 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
6642 ; X64:       # %bb.0:
6643 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6644 ; X64-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
6645 ; X64-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
6646 ; X64-NEXT:    retq # encoding: [0xc3]
6647   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
6648   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6649   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %2)
6650   %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6651   %5 = bitcast i8 %x3 to <8 x i1>
6652   %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> zeroinitializer
6653   ret <8 x float> %6
6654 }
6655
6656 define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6657 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
6658 ; X86:       # %bb.0:
6659 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6660 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6661 ; X86-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
6662 ; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
6663 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6664 ; X86-NEXT:    retl # encoding: [0xc3]
6665 ;
6666 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
6667 ; X64:       # %bb.0:
6668 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6669 ; X64-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
6670 ; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
6671 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6672 ; X64-NEXT:    retq # encoding: [0xc3]
6673   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
6674   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6675   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %2)
6676   %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3>
6677   %5 = bitcast i8 %x3 to <8 x i1>
6678   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6679   %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> %x2
6680   ret <2 x double> %6
6681 }
6682
6683 define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6684 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
6685 ; X86:       # %bb.0:
6686 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6687 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6688 ; X86-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
6689 ; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
6690 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6691 ; X86-NEXT:    retl # encoding: [0xc3]
6692 ;
6693 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
6694 ; X64:       # %bb.0:
6695 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6696 ; X64-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
6697 ; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
6698 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6699 ; X64-NEXT:    retq # encoding: [0xc3]
6700   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
6701   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6702   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %2)
6703   %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6704   %5 = bitcast i8 %x3 to <8 x i1>
6705   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6706   %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> %x2
6707   ret <4 x double> %6
6708 }
6709
6710 define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6711 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
6712 ; X86:       # %bb.0:
6713 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6714 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6715 ; X86-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
6716 ; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
6717 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6718 ; X86-NEXT:    retl # encoding: [0xc3]
6719 ;
6720 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
6721 ; X64:       # %bb.0:
6722 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6723 ; X64-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
6724 ; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
6725 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6726 ; X64-NEXT:    retq # encoding: [0xc3]
6727   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
6728   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6729   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %2)
6730   %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6731   %5 = bitcast i8 %x3 to <8 x i1>
6732   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6733   %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> %x2
6734   ret <4 x float> %6
6735 }
6736
6737 define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6738 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
6739 ; X86:       # %bb.0:
6740 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6741 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6742 ; X86-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
6743 ; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
6744 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6745 ; X86-NEXT:    retl # encoding: [0xc3]
6746 ;
6747 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
6748 ; X64:       # %bb.0:
6749 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6750 ; X64-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
6751 ; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
6752 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6753 ; X64-NEXT:    retq # encoding: [0xc3]
6754   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
6755   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6756   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %2)
6757   %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6758   %5 = bitcast i8 %x3 to <8 x i1>
6759   %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> %x2
6760   ret <8 x float> %6
6761 }
6762
6763 define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2, i8 %mask) {
6764 ; X86-LABEL: test_mask_vfmadd128_ps_rmk:
6765 ; X86:       # %bb.0:
6766 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6767 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6768 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6769 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
6770 ; X86-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
6771 ; X86-NEXT:    retl # encoding: [0xc3]
6772 ;
6773 ; X64-LABEL: test_mask_vfmadd128_ps_rmk:
6774 ; X64:       # %bb.0:
6775 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6776 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
6777 ; X64-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
6778 ; X64-NEXT:    retq # encoding: [0xc3]
6779   %a2 = load <4 x float>, ptr %ptr_a2
6780   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6781   %2 = bitcast i8 %mask to <8 x i1>
6782   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6783   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6784   ret <4 x float> %3
6785 }
6786
6787 define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2, i8 %mask) {
6788 ; X86-LABEL: test_mask_vfmadd128_ps_rmka:
6789 ; X86:       # %bb.0:
6790 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6791 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6792 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6793 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
6794 ; X86-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
6795 ; X86-NEXT:    retl # encoding: [0xc3]
6796 ;
6797 ; X64-LABEL: test_mask_vfmadd128_ps_rmka:
6798 ; X64:       # %bb.0:
6799 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6800 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
6801 ; X64-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
6802 ; X64-NEXT:    retq # encoding: [0xc3]
6803   %a2 = load <4 x float>, ptr %ptr_a2, align 8
6804   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6805   %2 = bitcast i8 %mask to <8 x i1>
6806   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6807   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6808   ret <4 x float> %3
6809 }
6810
6811 define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2) {
6812 ; X86-LABEL: test_mask_vfmadd128_ps_rmkz:
6813 ; X86:       # %bb.0:
6814 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6815 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
6816 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6817 ; X86-NEXT:    retl # encoding: [0xc3]
6818 ;
6819 ; X64-LABEL: test_mask_vfmadd128_ps_rmkz:
6820 ; X64:       # %bb.0:
6821 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
6822 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6823 ; X64-NEXT:    retq # encoding: [0xc3]
6824   %a2 = load <4 x float>, ptr %ptr_a2
6825   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6826   ret <4 x float> %1
6827 }
6828
6829 define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2) {
6830 ; X86-LABEL: test_mask_vfmadd128_ps_rmkza:
6831 ; X86:       # %bb.0:
6832 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6833 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
6834 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6835 ; X86-NEXT:    retl # encoding: [0xc3]
6836 ;
6837 ; X64-LABEL: test_mask_vfmadd128_ps_rmkza:
6838 ; X64:       # %bb.0:
6839 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
6840 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6841 ; X64-NEXT:    retq # encoding: [0xc3]
6842   %a2 = load <4 x float>, ptr %ptr_a2, align 4
6843   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6844   ret <4 x float> %1
6845 }
6846
6847 define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2, i8 %mask) {
6848 ; X86-LABEL: test_mask_vfmadd128_ps_rmb:
6849 ; X86:       # %bb.0:
6850 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6851 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6852 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6853 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
6854 ; X86-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
6855 ; X86-NEXT:    retl # encoding: [0xc3]
6856 ;
6857 ; X64-LABEL: test_mask_vfmadd128_ps_rmb:
6858 ; X64:       # %bb.0:
6859 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6860 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
6861 ; X64-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
6862 ; X64-NEXT:    retq # encoding: [0xc3]
6863   %q = load float, ptr %ptr_a2
6864   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6865   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6866   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6867   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6868   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6869   %2 = bitcast i8 %mask to <8 x i1>
6870   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6871   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6872   ret <4 x float> %3
6873 }
6874
6875 define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2, i8 %mask) {
6876 ; X86-LABEL: test_mask_vfmadd128_ps_rmba:
6877 ; X86:       # %bb.0:
6878 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6879 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6880 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6881 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
6882 ; X86-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
6883 ; X86-NEXT:    retl # encoding: [0xc3]
6884 ;
6885 ; X64-LABEL: test_mask_vfmadd128_ps_rmba:
6886 ; X64:       # %bb.0:
6887 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6888 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
6889 ; X64-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
6890 ; X64-NEXT:    retq # encoding: [0xc3]
6891   %q = load float, ptr %ptr_a2, align 4
6892   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6893   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6894   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6895   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6896   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6897   %2 = bitcast i8 %mask to <8 x i1>
6898   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6899   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6900   ret <4 x float> %3
6901 }
6902
6903 define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2) {
6904 ; X86-LABEL: test_mask_vfmadd128_ps_rmbz:
6905 ; X86:       # %bb.0:
6906 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6907 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
6908 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6909 ; X86-NEXT:    retl # encoding: [0xc3]
6910 ;
6911 ; X64-LABEL: test_mask_vfmadd128_ps_rmbz:
6912 ; X64:       # %bb.0:
6913 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
6914 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6915 ; X64-NEXT:    retq # encoding: [0xc3]
6916   %q = load float, ptr %ptr_a2
6917   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6918   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6919   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6920   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6921   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6922   ret <4 x float> %1
6923 }
6924
6925 define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2) {
6926 ; X86-LABEL: test_mask_vfmadd128_ps_rmbza:
6927 ; X86:       # %bb.0:
6928 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6929 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
6930 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6931 ; X86-NEXT:    retl # encoding: [0xc3]
6932 ;
6933 ; X64-LABEL: test_mask_vfmadd128_ps_rmbza:
6934 ; X64:       # %bb.0:
6935 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
6936 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6937 ; X64-NEXT:    retq # encoding: [0xc3]
6938   %q = load float, ptr %ptr_a2, align 4
6939   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6940   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6941   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6942   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6943   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6944   ret <4 x float> %1
6945 }
6946
6947 define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, ptr %ptr_a2, i8 %mask) {
6948 ; X86-LABEL: test_mask_vfmadd128_pd_rmk:
6949 ; X86:       # %bb.0:
6950 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6951 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6952 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6953 ; X86-NEXT:    vfmadd213pd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x00]
6954 ; X86-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
6955 ; X86-NEXT:    retl # encoding: [0xc3]
6956 ;
6957 ; X64-LABEL: test_mask_vfmadd128_pd_rmk:
6958 ; X64:       # %bb.0:
6959 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6960 ; X64-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
6961 ; X64-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
6962 ; X64-NEXT:    retq # encoding: [0xc3]
6963   %a2 = load <2 x double>, ptr %ptr_a2
6964   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6965   %2 = bitcast i8 %mask to <8 x i1>
6966   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
6967   %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %a0
6968   ret <2 x double> %3
6969 }
6970
6971 define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, ptr %ptr_a2) {
6972 ; X86-LABEL: test_mask_vfmadd128_pd_rmkz:
6973 ; X86:       # %bb.0:
6974 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6975 ; X86-NEXT:    vfmadd213pd (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x00]
6976 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6977 ; X86-NEXT:    retl # encoding: [0xc3]
6978 ;
6979 ; X64-LABEL: test_mask_vfmadd128_pd_rmkz:
6980 ; X64:       # %bb.0:
6981 ; X64-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
6982 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6983 ; X64-NEXT:    retq # encoding: [0xc3]
6984   %a2 = load <2 x double>, ptr %ptr_a2
6985   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6986   ret <2 x double> %1
6987 }
6988
6989 define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, ptr %ptr_a2, i8 %mask) {
6990 ; X86-LABEL: test_mask_vfmadd256_pd_rmk:
6991 ; X86:       # %bb.0:
6992 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6993 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6994 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6995 ; X86-NEXT:    vfmadd213pd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x00]
6996 ; X86-NEXT:    # ymm0 {%k1} = (ymm1 * ymm0) + mem
6997 ; X86-NEXT:    retl # encoding: [0xc3]
6998 ;
6999 ; X64-LABEL: test_mask_vfmadd256_pd_rmk:
7000 ; X64:       # %bb.0:
7001 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
7002 ; X64-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
7003 ; X64-NEXT:    # ymm0 {%k1} = (ymm1 * ymm0) + mem
7004 ; X64-NEXT:    retq # encoding: [0xc3]
7005   %a2 = load <4 x double>, ptr %ptr_a2
7006   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
7007   %2 = bitcast i8 %mask to <8 x i1>
7008   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7009   %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %a0
7010   ret <4 x double> %3
7011 }
7012
7013 define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, ptr %ptr_a2) {
7014 ; X86-LABEL: test_mask_vfmadd256_pd_rmkz:
7015 ; X86:       # %bb.0:
7016 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7017 ; X86-NEXT:    vfmadd213pd (%eax), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x00]
7018 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
7019 ; X86-NEXT:    retl # encoding: [0xc3]
7020 ;
7021 ; X64-LABEL: test_mask_vfmadd256_pd_rmkz:
7022 ; X64:       # %bb.0:
7023 ; X64-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
7024 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
7025 ; X64-NEXT:    retq # encoding: [0xc3]
7026   %a2 = load <4 x double>, ptr %ptr_a2
7027   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
7028   ret <4 x double> %1
7029 }
7030
7031 define <8 x i32> @combine_vpermi2d_vpermps(<16 x i32> noundef %a) {
7032 ; X86-LABEL: combine_vpermi2d_vpermps:
7033 ; X86:       # %bb.0:
7034 ; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [14,13,6,3,5,15,0,1]
7035 ; X86-NEXT:    # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x0d,A,A,A,A]
7036 ; X86-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
7037 ; X86-NEXT:    vpermps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x16,0xc0]
7038 ; X86-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
7039 ; X86-NEXT:    retl # encoding: [0xc3]
7040 ;
7041 ; X64-LABEL: combine_vpermi2d_vpermps:
7042 ; X64:       # %bb.0:
7043 ; X64-NEXT:    vmovaps {{.*#+}} ymm1 = [14,13,6,3,5,15,0,1]
7044 ; X64-NEXT:    # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x0d,A,A,A,A]
7045 ; X64-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
7046 ; X64-NEXT:    vpermps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x16,0xc0]
7047 ; X64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
7048 ; X64-NEXT:    retq # encoding: [0xc3]
7049   %1 = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7050   %2 = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7051   %3 = tail call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %1, <8 x i32> <i32 14, i32 13, i32 6, i32 3, i32 5, i32 15, i32 0, i32 1>, <8 x i32> %2)
7052   ret <8 x i32> %3
7053 }
7054
7055 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
7056 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
7057 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
7058 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
7059 declare <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double>, <2 x double>, <2 x i1>)
7060 declare <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float>, <4 x float>, <4 x i1>)
7061 declare <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64>, <2 x i64>, <2 x i1>)
7062 declare <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32>, <4 x i32>, <4 x i1>)
7063 declare <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double>, <2 x double>, <2 x i1>)
7064 declare <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float>, <4 x float>, <4 x i1>)
7065 declare <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64>, <2 x i64>, <2 x i1>)
7066 declare <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32>, <4 x i32>, <4 x i1>)
7067 declare <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double>, <4 x double>, <4 x i1>)
7068 declare <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float>, <8 x float>, <8 x i1>)
7069 declare <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64>, <4 x i64>, <4 x i1>)
7070 declare <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32>, <8 x i32>, <8 x i1>)
7071 declare <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double>, <4 x double>, <4 x i1>)
7072 declare <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float>, <8 x float>, <8 x i1>)
7073 declare <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64>, <4 x i64>, <4 x i1>)
7074 declare <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32>, <8 x i32>, <8 x i1>)