test/CodeGen/X86/avx512vl-intrinsics.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
   4
   5 define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
   6 ; X86-LABEL: test_mask_compress_pd_128:
   7 ; X86:       # %bb.0:
   8 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
  10 ; X86-NEXT:    vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
  11 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  12 ; X86-NEXT:    retl # encoding: [0xc3]
  13 ;
  14 ; X64-LABEL: test_mask_compress_pd_128:
  15 ; X64:       # %bb.0:
  16 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  17 ; X64-NEXT:    vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
  18 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  19 ; X64-NEXT:    retq # encoding: [0xc3]
  20   %1 = bitcast i8 %mask to <8 x i1>
  21   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
  22   %2 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> %passthru, <2 x i1> %extract)
  23   ret <2 x double> %2
  24 }
  25
  26 define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
  27 ; X86-LABEL: test_maskz_compress_pd_128:
  28 ; X86:       # %bb.0:
  29 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  30 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
  31 ; X86-NEXT:    vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
  32 ; X86-NEXT:    retl # encoding: [0xc3]
  33 ;
  34 ; X64-LABEL: test_maskz_compress_pd_128:
  35 ; X64:       # %bb.0:
  36 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  37 ; X64-NEXT:    vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
  38 ; X64-NEXT:    retq # encoding: [0xc3]
  39   %1 = bitcast i8 %mask to <8 x i1>
  40   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
  41   %2 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> zeroinitializer, <2 x i1> %extract)
  42   ret <2 x double> %2
  43 }
  44
  45 define <2 x double> @test_compress_pd_128(<2 x double> %data) {
  46 ; CHECK-LABEL: test_compress_pd_128:
  47 ; CHECK:       # %bb.0:
  48 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
  49   %1 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> undef, <2 x i1> <i1 true, i1 true>)
  50   ret <2 x double> %1
  51 }
  52
  53 define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
  54 ; X86-LABEL: test_mask_compress_ps_128:
  55 ; X86:       # %bb.0:
  56 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  57 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
  58 ; X86-NEXT:    vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
  59 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  60 ; X86-NEXT:    retl # encoding: [0xc3]
  61 ;
  62 ; X64-LABEL: test_mask_compress_ps_128:
  63 ; X64:       # %bb.0:
  64 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  65 ; X64-NEXT:    vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
  66 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
  67 ; X64-NEXT:    retq # encoding: [0xc3]
  68   %1 = bitcast i8 %mask to <8 x i1>
  69   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  70   %2 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> %passthru, <4 x i1> %extract)
  71   ret <4 x float> %2
  72 }
  73
  74 define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
  75 ; X86-LABEL: test_maskz_compress_ps_128:
  76 ; X86:       # %bb.0:
  77 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
  78 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
  79 ; X86-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
  80 ; X86-NEXT:    retl # encoding: [0xc3]
  81 ;
  82 ; X64-LABEL: test_maskz_compress_ps_128:
  83 ; X64:       # %bb.0:
  84 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
  85 ; X64-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
  86 ; X64-NEXT:    retq # encoding: [0xc3]
  87   %1 = bitcast i8 %mask to <8 x i1>
  88   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  89   %2 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> zeroinitializer, <4 x i1> %extract)
  90   ret <4 x float> %2
  91 }
  92
  93 define <4 x float> @test_compress_ps_128(<4 x float> %data) {
  94 ; CHECK-LABEL: test_compress_ps_128:
  95 ; CHECK:       # %bb.0:
  96 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
  97   %1 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
  98   ret <4 x float> %1
  99 }
 100
 101 define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
 102 ; X86-LABEL: test_mask_compress_q_128:
 103 ; X86:       # %bb.0:
 104 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 105 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 106 ; X86-NEXT:    vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
 107 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 108 ; X86-NEXT:    retl # encoding: [0xc3]
 109 ;
 110 ; X64-LABEL: test_mask_compress_q_128:
 111 ; X64:       # %bb.0:
 112 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 113 ; X64-NEXT:    vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
 114 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 115 ; X64-NEXT:    retq # encoding: [0xc3]
 116   %1 = bitcast i8 %mask to <8 x i1>
 117   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 118   %2 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> %passthru, <2 x i1> %extract)
 119   ret <2 x i64> %2
 120 }
 121
 122 define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
 123 ; X86-LABEL: test_maskz_compress_q_128:
 124 ; X86:       # %bb.0:
 125 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 126 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 127 ; X86-NEXT:    vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
 128 ; X86-NEXT:    retl # encoding: [0xc3]
 129 ;
 130 ; X64-LABEL: test_maskz_compress_q_128:
 131 ; X64:       # %bb.0:
 132 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 133 ; X64-NEXT:    vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
 134 ; X64-NEXT:    retq # encoding: [0xc3]
 135   %1 = bitcast i8 %mask to <8 x i1>
 136   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 137   %2 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> zeroinitializer, <2 x i1> %extract)
 138   ret <2 x i64> %2
 139 }
 140
 141 define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
 142 ; CHECK-LABEL: test_compress_q_128:
 143 ; CHECK:       # %bb.0:
 144 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 145   %1 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> undef, <2 x i1> <i1 true, i1 true>)
 146   ret <2 x i64> %1
 147 }
 148
 149 define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
 150 ; X86-LABEL: test_mask_compress_d_128:
 151 ; X86:       # %bb.0:
 152 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 153 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 154 ; X86-NEXT:    vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
 155 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 156 ; X86-NEXT:    retl # encoding: [0xc3]
 157 ;
 158 ; X64-LABEL: test_mask_compress_d_128:
 159 ; X64:       # %bb.0:
 160 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 161 ; X64-NEXT:    vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
 162 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 163 ; X64-NEXT:    retq # encoding: [0xc3]
 164   %1 = bitcast i8 %mask to <8 x i1>
 165   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 166   %2 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> %passthru, <4 x i1> %extract)
 167   ret <4 x i32> %2
 168 }
 169
 170 define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
 171 ; X86-LABEL: test_maskz_compress_d_128:
 172 ; X86:       # %bb.0:
 173 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 174 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 175 ; X86-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
 176 ; X86-NEXT:    retl # encoding: [0xc3]
 177 ;
 178 ; X64-LABEL: test_maskz_compress_d_128:
 179 ; X64:       # %bb.0:
 180 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 181 ; X64-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
 182 ; X64-NEXT:    retq # encoding: [0xc3]
 183   %1 = bitcast i8 %mask to <8 x i1>
 184   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 185   %2 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> zeroinitializer, <4 x i1> %extract)
 186   ret <4 x i32> %2
 187 }
 188
 189 define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
 190 ; CHECK-LABEL: test_compress_d_128:
 191 ; CHECK:       # %bb.0:
 192 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 193   %1 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 194   ret <4 x i32> %1
 195 }
 196
 197 define <2 x double> @test_expand_pd_128(<2 x double> %data) {
 198 ; CHECK-LABEL: test_expand_pd_128:
 199 ; CHECK:       # %bb.0:
 200 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 201   %1 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> undef, <2 x i1> <i1 true, i1 true>)
 202   ret <2 x double> %1
 203 }
 204
 205 define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
 206 ; X86-LABEL: test_mask_expand_pd_128:
 207 ; X86:       # %bb.0:
 208 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 209 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 210 ; X86-NEXT:    vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
 211 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 212 ; X86-NEXT:    retl # encoding: [0xc3]
 213 ;
 214 ; X64-LABEL: test_mask_expand_pd_128:
 215 ; X64:       # %bb.0:
 216 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 217 ; X64-NEXT:    vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
 218 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 219 ; X64-NEXT:    retq # encoding: [0xc3]
 220   %1 = bitcast i8 %mask to <8 x i1>
 221   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 222   %2 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> %passthru, <2 x i1> %extract)
 223   ret <2 x double> %2
 224 }
 225
 226 define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
 227 ; X86-LABEL: test_maskz_expand_pd_128:
 228 ; X86:       # %bb.0:
 229 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 230 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 231 ; X86-NEXT:    vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
 232 ; X86-NEXT:    retl # encoding: [0xc3]
 233 ;
 234 ; X64-LABEL: test_maskz_expand_pd_128:
 235 ; X64:       # %bb.0:
 236 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 237 ; X64-NEXT:    vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
 238 ; X64-NEXT:    retq # encoding: [0xc3]
 239   %1 = bitcast i8 %mask to <8 x i1>
 240   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 241   %2 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> zeroinitializer, <2 x i1> %extract)
 242   ret <2 x double> %2
 243 }
 244
 245 define <4 x float> @test_expand_ps_128(<4 x float> %data) {
 246 ; CHECK-LABEL: test_expand_ps_128:
 247 ; CHECK:       # %bb.0:
 248 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 249   %1 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 250   ret <4 x float> %1
 251 }
 252
 253 define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
 254 ; X86-LABEL: test_mask_expand_ps_128:
 255 ; X86:       # %bb.0:
 256 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 257 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 258 ; X86-NEXT:    vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
 259 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 260 ; X86-NEXT:    retl # encoding: [0xc3]
 261 ;
 262 ; X64-LABEL: test_mask_expand_ps_128:
 263 ; X64:       # %bb.0:
 264 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 265 ; X64-NEXT:    vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
 266 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 267 ; X64-NEXT:    retq # encoding: [0xc3]
 268   %1 = bitcast i8 %mask to <8 x i1>
 269   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 270   %2 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> %passthru, <4 x i1> %extract)
 271   ret <4 x float> %2
 272 }
 273
 274 define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
 275 ; X86-LABEL: test_maskz_expand_ps_128:
 276 ; X86:       # %bb.0:
 277 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 278 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 279 ; X86-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
 280 ; X86-NEXT:    retl # encoding: [0xc3]
 281 ;
 282 ; X64-LABEL: test_maskz_expand_ps_128:
 283 ; X64:       # %bb.0:
 284 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 285 ; X64-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
 286 ; X64-NEXT:    retq # encoding: [0xc3]
 287   %1 = bitcast i8 %mask to <8 x i1>
 288   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 289   %2 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> zeroinitializer, <4 x i1> %extract)
 290   ret <4 x float> %2
 291 }
 292
 293 define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
 294 ; CHECK-LABEL: test_expand_q_128:
 295 ; CHECK:       # %bb.0:
 296 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 297   %1 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> undef, <2 x i1> <i1 true, i1 true>)
 298   ret <2 x i64> %1
 299 }
 300
 301 define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
 302 ; X86-LABEL: test_mask_expand_q_128:
 303 ; X86:       # %bb.0:
 304 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 305 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 306 ; X86-NEXT:    vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
 307 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 308 ; X86-NEXT:    retl # encoding: [0xc3]
 309 ;
 310 ; X64-LABEL: test_mask_expand_q_128:
 311 ; X64:       # %bb.0:
 312 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 313 ; X64-NEXT:    vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
 314 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 315 ; X64-NEXT:    retq # encoding: [0xc3]
 316   %1 = bitcast i8 %mask to <8 x i1>
 317   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 318   %2 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> %passthru, <2 x i1> %extract)
 319   ret <2 x i64> %2
 320 }
 321
 322 define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
 323 ; X86-LABEL: test_maskz_expand_q_128:
 324 ; X86:       # %bb.0:
 325 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 326 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 327 ; X86-NEXT:    vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
 328 ; X86-NEXT:    retl # encoding: [0xc3]
 329 ;
 330 ; X64-LABEL: test_maskz_expand_q_128:
 331 ; X64:       # %bb.0:
 332 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 333 ; X64-NEXT:    vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
 334 ; X64-NEXT:    retq # encoding: [0xc3]
 335   %1 = bitcast i8 %mask to <8 x i1>
 336   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
 337   %2 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> zeroinitializer, <2 x i1> %extract)
 338   ret <2 x i64> %2
 339 }
 340
 341 define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
 342 ; CHECK-LABEL: test_expand_d_128:
 343 ; CHECK:       # %bb.0:
 344 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 345   %1 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 346   ret <4 x i32> %1
 347 }
 348
 349 define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
 350 ; X86-LABEL: test_mask_expand_d_128:
 351 ; X86:       # %bb.0:
 352 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 353 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 354 ; X86-NEXT:    vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
 355 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 356 ; X86-NEXT:    retl # encoding: [0xc3]
 357 ;
 358 ; X64-LABEL: test_mask_expand_d_128:
 359 ; X64:       # %bb.0:
 360 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 361 ; X64-NEXT:    vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
 362 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 363 ; X64-NEXT:    retq # encoding: [0xc3]
 364   %1 = bitcast i8 %mask to <8 x i1>
 365   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 366   %2 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> %passthru, <4 x i1> %extract)
 367   ret <4 x i32> %2
 368 }
 369
 370 define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
 371 ; X86-LABEL: test_maskz_expand_d_128:
 372 ; X86:       # %bb.0:
 373 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 374 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 375 ; X86-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
 376 ; X86-NEXT:    retl # encoding: [0xc3]
 377 ;
 378 ; X64-LABEL: test_maskz_expand_d_128:
 379 ; X64:       # %bb.0:
 380 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 381 ; X64-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
 382 ; X64-NEXT:    retq # encoding: [0xc3]
 383   %1 = bitcast i8 %mask to <8 x i1>
 384   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 385   %2 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> zeroinitializer, <4 x i1> %extract)
 386   ret <4 x i32> %2
 387 }
 388
 389 define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
 390 ; X86-LABEL: test_mask_compress_pd_256:
 391 ; X86:       # %bb.0:
 392 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 393 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 394 ; X86-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
 395 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 396 ; X86-NEXT:    retl # encoding: [0xc3]
 397 ;
 398 ; X64-LABEL: test_mask_compress_pd_256:
 399 ; X64:       # %bb.0:
 400 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 401 ; X64-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
 402 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 403 ; X64-NEXT:    retq # encoding: [0xc3]
 404   %1 = bitcast i8 %mask to <8 x i1>
 405   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 406   %2 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> %passthru, <4 x i1> %extract)
 407   ret <4 x double> %2
 408 }
 409
 410 define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
 411 ; X86-LABEL: test_maskz_compress_pd_256:
 412 ; X86:       # %bb.0:
 413 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 414 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 415 ; X86-NEXT:    vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
 416 ; X86-NEXT:    retl # encoding: [0xc3]
 417 ;
 418 ; X64-LABEL: test_maskz_compress_pd_256:
 419 ; X64:       # %bb.0:
 420 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 421 ; X64-NEXT:    vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
 422 ; X64-NEXT:    retq # encoding: [0xc3]
 423   %1 = bitcast i8 %mask to <8 x i1>
 424   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 425   %2 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> zeroinitializer, <4 x i1> %extract)
 426   ret <4 x double> %2
 427 }
 428
 429 define <4 x double> @test_compress_pd_256(<4 x double> %data) {
 430 ; CHECK-LABEL: test_compress_pd_256:
 431 ; CHECK:       # %bb.0:
 432 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 433   %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 434   ret <4 x double> %1
 435 }
 436
 437 define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
 438 ; X86-LABEL: test_mask_compress_ps_256:
 439 ; X86:       # %bb.0:
 440 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 441 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 442 ; X86-NEXT:    vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
 443 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 444 ; X86-NEXT:    retl # encoding: [0xc3]
 445 ;
 446 ; X64-LABEL: test_mask_compress_ps_256:
 447 ; X64:       # %bb.0:
 448 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 449 ; X64-NEXT:    vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
 450 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 451 ; X64-NEXT:    retq # encoding: [0xc3]
 452   %1 = bitcast i8 %mask to <8 x i1>
 453   %2 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> %passthru, <8 x i1> %1)
 454   ret <8 x float> %2
 455 }
 456
 457 define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
 458 ; X86-LABEL: test_maskz_compress_ps_256:
 459 ; X86:       # %bb.0:
 460 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 461 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 462 ; X86-NEXT:    vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
 463 ; X86-NEXT:    retl # encoding: [0xc3]
 464 ;
 465 ; X64-LABEL: test_maskz_compress_ps_256:
 466 ; X64:       # %bb.0:
 467 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 468 ; X64-NEXT:    vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
 469 ; X64-NEXT:    retq # encoding: [0xc3]
 470   %1 = bitcast i8 %mask to <8 x i1>
 471   %2 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> zeroinitializer, <8 x i1> %1)
 472   ret <8 x float> %2
 473 }
 474
 475 define <8 x float> @test_compress_ps_256(<8 x float> %data) {
 476 ; CHECK-LABEL: test_compress_ps_256:
 477 ; CHECK:       # %bb.0:
 478 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 479   %1 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 480   ret <8 x float> %1
 481 }
 482
 483 define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
 484 ; X86-LABEL: test_mask_compress_q_256:
 485 ; X86:       # %bb.0:
 486 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 487 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 488 ; X86-NEXT:    vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
 489 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 490 ; X86-NEXT:    retl # encoding: [0xc3]
 491 ;
 492 ; X64-LABEL: test_mask_compress_q_256:
 493 ; X64:       # %bb.0:
 494 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 495 ; X64-NEXT:    vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
 496 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 497 ; X64-NEXT:    retq # encoding: [0xc3]
 498   %1 = bitcast i8 %mask to <8 x i1>
 499   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 500   %2 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> %passthru, <4 x i1> %extract)
 501   ret <4 x i64> %2
 502 }
 503
 504 define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
 505 ; X86-LABEL: test_maskz_compress_q_256:
 506 ; X86:       # %bb.0:
 507 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 508 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 509 ; X86-NEXT:    vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
 510 ; X86-NEXT:    retl # encoding: [0xc3]
 511 ;
 512 ; X64-LABEL: test_maskz_compress_q_256:
 513 ; X64:       # %bb.0:
 514 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 515 ; X64-NEXT:    vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
 516 ; X64-NEXT:    retq # encoding: [0xc3]
 517   %1 = bitcast i8 %mask to <8 x i1>
 518   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 519   %2 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> zeroinitializer, <4 x i1> %extract)
 520   ret <4 x i64> %2
 521 }
 522
 523 define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
 524 ; CHECK-LABEL: test_compress_q_256:
 525 ; CHECK:       # %bb.0:
 526 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 527   %1 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 528   ret <4 x i64> %1
 529 }
 530
 531 define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
 532 ; X86-LABEL: test_mask_compress_d_256:
 533 ; X86:       # %bb.0:
 534 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 535 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 536 ; X86-NEXT:    vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
 537 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 538 ; X86-NEXT:    retl # encoding: [0xc3]
 539 ;
 540 ; X64-LABEL: test_mask_compress_d_256:
 541 ; X64:       # %bb.0:
 542 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 543 ; X64-NEXT:    vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
 544 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 545 ; X64-NEXT:    retq # encoding: [0xc3]
 546   %1 = bitcast i8 %mask to <8 x i1>
 547   %2 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> %passthru, <8 x i1> %1)
 548   ret <8 x i32> %2
 549 }
 550
 551 define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
 552 ; X86-LABEL: test_maskz_compress_d_256:
 553 ; X86:       # %bb.0:
 554 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 555 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 556 ; X86-NEXT:    vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
 557 ; X86-NEXT:    retl # encoding: [0xc3]
 558 ;
 559 ; X64-LABEL: test_maskz_compress_d_256:
 560 ; X64:       # %bb.0:
 561 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 562 ; X64-NEXT:    vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
 563 ; X64-NEXT:    retq # encoding: [0xc3]
 564   %1 = bitcast i8 %mask to <8 x i1>
 565   %2 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> zeroinitializer, <8 x i1> %1)
 566   ret <8 x i32> %2
 567 }
 568
 569 define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
 570 ; CHECK-LABEL: test_compress_d_256:
 571 ; CHECK:       # %bb.0:
 572 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 573   %1 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 574   ret <8 x i32> %1
 575 }
 576
 577 define <4 x double> @test_expand_pd_256(<4 x double> %data) {
 578 ; CHECK-LABEL: test_expand_pd_256:
 579 ; CHECK:       # %bb.0:
 580 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 581   %1 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 582   ret <4 x double> %1
 583 }
 584
 585 define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
 586 ; X86-LABEL: test_mask_expand_pd_256:
 587 ; X86:       # %bb.0:
 588 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 589 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 590 ; X86-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
 591 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 592 ; X86-NEXT:    retl # encoding: [0xc3]
 593 ;
 594 ; X64-LABEL: test_mask_expand_pd_256:
 595 ; X64:       # %bb.0:
 596 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 597 ; X64-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
 598 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 599 ; X64-NEXT:    retq # encoding: [0xc3]
 600   %1 = bitcast i8 %mask to <8 x i1>
 601   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 602   %2 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> %passthru, <4 x i1> %extract)
 603   ret <4 x double> %2
 604 }
 605
 606 define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
 607 ; X86-LABEL: test_maskz_expand_pd_256:
 608 ; X86:       # %bb.0:
 609 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 610 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 611 ; X86-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
 612 ; X86-NEXT:    retl # encoding: [0xc3]
 613 ;
 614 ; X64-LABEL: test_maskz_expand_pd_256:
 615 ; X64:       # %bb.0:
 616 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 617 ; X64-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
 618 ; X64-NEXT:    retq # encoding: [0xc3]
 619   %1 = bitcast i8 %mask to <8 x i1>
 620   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 621   %2 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> zeroinitializer, <4 x i1> %extract)
 622   ret <4 x double> %2
 623 }
 624
 625 define <8 x float> @test_expand_ps_256(<8 x float> %data) {
 626 ; CHECK-LABEL: test_expand_ps_256:
 627 ; CHECK:       # %bb.0:
 628 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 629   %1 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 630   ret <8 x float> %1
 631 }
 632
 633 define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
 634 ; X86-LABEL: test_mask_expand_ps_256:
 635 ; X86:       # %bb.0:
 636 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 637 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 638 ; X86-NEXT:    vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
 639 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 640 ; X86-NEXT:    retl # encoding: [0xc3]
 641 ;
 642 ; X64-LABEL: test_mask_expand_ps_256:
 643 ; X64:       # %bb.0:
 644 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 645 ; X64-NEXT:    vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
 646 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 647 ; X64-NEXT:    retq # encoding: [0xc3]
 648   %1 = bitcast i8 %mask to <8 x i1>
 649   %2 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> %passthru, <8 x i1> %1)
 650   ret <8 x float> %2
 651 }
 652
 653 define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
 654 ; X86-LABEL: test_maskz_expand_ps_256:
 655 ; X86:       # %bb.0:
 656 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 657 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 658 ; X86-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
 659 ; X86-NEXT:    retl # encoding: [0xc3]
 660 ;
 661 ; X64-LABEL: test_maskz_expand_ps_256:
 662 ; X64:       # %bb.0:
 663 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 664 ; X64-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
 665 ; X64-NEXT:    retq # encoding: [0xc3]
 666   %1 = bitcast i8 %mask to <8 x i1>
 667   %2 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> zeroinitializer, <8 x i1> %1)
 668   ret <8 x float> %2
 669 }
 670
 671 define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
 672 ; CHECK-LABEL: test_expand_q_256:
 673 ; CHECK:       # %bb.0:
 674 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 675   %1 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 676   ret <4 x i64> %1
 677 }
 678
 679 define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
 680 ; X86-LABEL: test_mask_expand_q_256:
 681 ; X86:       # %bb.0:
 682 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 683 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 684 ; X86-NEXT:    vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
 685 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 686 ; X86-NEXT:    retl # encoding: [0xc3]
 687 ;
 688 ; X64-LABEL: test_mask_expand_q_256:
 689 ; X64:       # %bb.0:
 690 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 691 ; X64-NEXT:    vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
 692 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 693 ; X64-NEXT:    retq # encoding: [0xc3]
 694   %1 = bitcast i8 %mask to <8 x i1>
 695   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 696   %2 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> %passthru, <4 x i1> %extract)
 697   ret <4 x i64> %2
 698 }
 699
 700 define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
 701 ; X86-LABEL: test_maskz_expand_q_256:
 702 ; X86:       # %bb.0:
 703 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 704 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 705 ; X86-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
 706 ; X86-NEXT:    retl # encoding: [0xc3]
 707 ;
 708 ; X64-LABEL: test_maskz_expand_q_256:
 709 ; X64:       # %bb.0:
 710 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 711 ; X64-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
 712 ; X64-NEXT:    retq # encoding: [0xc3]
 713   %1 = bitcast i8 %mask to <8 x i1>
 714   %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 715   %2 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> zeroinitializer, <4 x i1> %extract)
 716   ret <4 x i64> %2
 717 }
 718
 719 define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
 720 ; CHECK-LABEL: test_expand_d_256:
 721 ; CHECK:       # %bb.0:
 722 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 723   %1 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 724   ret <8 x i32> %1
 725 }
 726
 727 define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
 728 ; X86-LABEL: test_mask_expand_d_256:
 729 ; X86:       # %bb.0:
 730 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 731 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 732 ; X86-NEXT:    vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
 733 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 734 ; X86-NEXT:    retl # encoding: [0xc3]
 735 ;
 736 ; X64-LABEL: test_mask_expand_d_256:
 737 ; X64:       # %bb.0:
 738 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 739 ; X64-NEXT:    vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
 740 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 741 ; X64-NEXT:    retq # encoding: [0xc3]
 742   %1 = bitcast i8 %mask to <8 x i1>
 743   %2 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> %passthru, <8 x i1> %1)
 744   ret <8 x i32> %2
 745 }
 746
 747 define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
 748 ; X86-LABEL: test_maskz_expand_d_256:
 749 ; X86:       # %bb.0:
 750 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 751 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 752 ; X86-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
 753 ; X86-NEXT:    retl # encoding: [0xc3]
 754 ;
 755 ; X64-LABEL: test_maskz_expand_d_256:
 756 ; X64:       # %bb.0:
 757 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 758 ; X64-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
 759 ; X64-NEXT:    retq # encoding: [0xc3]
 760   %1 = bitcast i8 %mask to <8 x i1>
 761   %2 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> zeroinitializer, <8 x i1> %1)
 762   ret <8 x i32> %2
 763 }
 764
 765 define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
 766 ; CHECK-LABEL: test_cmpps_256:
 767 ; CHECK:       # %bb.0:
 768 ; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
 769 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 770 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 771 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 772 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 773   %res = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2)
 774   %1 = bitcast <8 x i1> %res to i8
 775   ret i8 %1
 776 }
 777 declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32)
 778
 779 define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
 780 ; CHECK-LABEL: test_cmpps_128:
 781 ; CHECK:       # %bb.0:
 782 ; CHECK-NEXT:    vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
 783 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 784 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 785 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 786   %res = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2)
 787   %1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 788   %2 = bitcast <8 x i1> %1 to i8
 789   ret i8 %2
 790 }
 791 declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32)
 792
 793 define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
 794 ; CHECK-LABEL: test_cmppd_256:
 795 ; CHECK:       # %bb.0:
 796 ; CHECK-NEXT:    vcmplepd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
 797 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 798 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 799 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 800 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 801   %res = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2)
 802   %1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 803   %2 = bitcast <8 x i1> %1 to i8
 804   ret i8 %2
 805 }
 806 declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32)
 807
 808 define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
 809 ; CHECK-LABEL: test_cmppd_128:
 810 ; CHECK:       # %bb.0:
 811 ; CHECK-NEXT:    vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
 812 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 813 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 814 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 815   %res = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2)
 816   %1 = shufflevector <2 x i1> %res, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
 817   %2 = bitcast <8 x i1> %1 to i8
 818   ret i8 %2
 819 }
 820 declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32)
 821
 822 define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
 823 ; X86-LABEL: test_mm512_maskz_max_ps_256:
 824 ; X86:       # %bb.0:
 825 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 826 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 827 ; X86-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
 828 ; X86-NEXT:    retl # encoding: [0xc3]
 829 ;
 830 ; X64-LABEL: test_mm512_maskz_max_ps_256:
 831 ; X64:       # %bb.0:
 832 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 833 ; X64-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
 834 ; X64-NEXT:    retq # encoding: [0xc3]
 835   %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
 836   %2 = bitcast i8 %mask to <8 x i1>
 837   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer
 838   ret <8 x float> %3
 839 }
 840
 841 define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
 842 ; X86-LABEL: test_mm512_mask_max_ps_256:
 843 ; X86:       # %bb.0:
 844 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 845 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 846 ; X86-NEXT:    vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
 847 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 848 ; X86-NEXT:    retl # encoding: [0xc3]
 849 ;
 850 ; X64-LABEL: test_mm512_mask_max_ps_256:
 851 ; X64:       # %bb.0:
 852 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 853 ; X64-NEXT:    vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
 854 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 855 ; X64-NEXT:    retq # encoding: [0xc3]
 856   %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
 857   %2 = bitcast i8 %mask to <8 x i1>
 858   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %src
 859   ret <8 x float> %3
 860 }
 861
 862 define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
 863 ; CHECK-LABEL: test_mm512_max_ps_256:
 864 ; CHECK:       # %bb.0:
 865 ; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
 866 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 867   %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
 868   ret <8 x float> %1
 869 }
 870 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>)
 871
 872 define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
 873 ; X86-LABEL: test_mm512_maskz_max_ps_128:
 874 ; X86:       # %bb.0:
 875 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 876 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 877 ; X86-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
 878 ; X86-NEXT:    retl # encoding: [0xc3]
 879 ;
 880 ; X64-LABEL: test_mm512_maskz_max_ps_128:
 881 ; X64:       # %bb.0:
 882 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 883 ; X64-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
 884 ; X64-NEXT:    retq # encoding: [0xc3]
 885   %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
 886   %2 = bitcast i8 %mask to <8 x i1>
 887   %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 888   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> zeroinitializer
 889   ret <4 x float> %3
 890 }
 891
 892 define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
 893 ; X86-LABEL: test_mm512_mask_max_ps_128:
 894 ; X86:       # %bb.0:
 895 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 896 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 897 ; X86-NEXT:    vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
 898 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
 899 ; X86-NEXT:    retl # encoding: [0xc3]
 900 ;
 901 ; X64-LABEL: test_mm512_mask_max_ps_128:
 902 ; X64:       # %bb.0:
 903 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 904 ; X64-NEXT:    vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
 905 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
 906 ; X64-NEXT:    retq # encoding: [0xc3]
 907   %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
 908   %2 = bitcast i8 %mask to <8 x i1>
 909   %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 910   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %src
 911   ret <4 x float> %3
 912 }
 913
 914 define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
 915 ; CHECK-LABEL: test_mm512_max_ps_128:
 916 ; CHECK:       # %bb.0:
 917 ; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
 918 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 919   %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
 920   ret <4 x float> %1
 921 }
 922 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
 923
 924 define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
 925 ; X86-LABEL: test_mm512_maskz_min_ps_256:
 926 ; X86:       # %bb.0:
 927 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 928 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 929 ; X86-NEXT:    vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
 930 ; X86-NEXT:    retl # encoding: [0xc3]
 931 ;
 932 ; X64-LABEL: test_mm512_maskz_min_ps_256:
 933 ; X64:       # %bb.0:
 934 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 935 ; X64-NEXT:    vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
 936 ; X64-NEXT:    retq # encoding: [0xc3]
 937   %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
 938   %2 = bitcast i8 %mask to <8 x i1>
 939   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer
 940   ret <8 x float> %3
 941 }
 942
 943 define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
 944 ; X86-LABEL: test_mm512_mask_min_ps_256:
 945 ; X86:       # %bb.0:
 946 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 947 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 948 ; X86-NEXT:    vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
 949 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 950 ; X86-NEXT:    retl # encoding: [0xc3]
 951 ;
 952 ; X64-LABEL: test_mm512_mask_min_ps_256:
 953 ; X64:       # %bb.0:
 954 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 955 ; X64-NEXT:    vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
 956 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
 957 ; X64-NEXT:    retq # encoding: [0xc3]
 958   %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
 959   %2 = bitcast i8 %mask to <8 x i1>
 960   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %src
 961   ret <8 x float> %3
 962 }
 963
 964 define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
 965 ; CHECK-LABEL: test_mm512_min_ps_256:
 966 ; CHECK:       # %bb.0:
 967 ; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
 968 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 969   %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
 970   ret <8 x float> %1
 971 }
 972 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>)
 973
 974 define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
 975 ; X86-LABEL: test_mm512_maskz_min_ps_128:
 976 ; X86:       # %bb.0:
 977 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 978 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 979 ; X86-NEXT:    vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
 980 ; X86-NEXT:    retl # encoding: [0xc3]
 981 ;
 982 ; X64-LABEL: test_mm512_maskz_min_ps_128:
 983 ; X64:       # %bb.0:
 984 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 985 ; X64-NEXT:    vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
 986 ; X64-NEXT:    retq # encoding: [0xc3]
 987   %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
 988   %2 = bitcast i8 %mask to <8 x i1>
 989   %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 990   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> zeroinitializer
 991   ret <4 x float> %3
 992 }
 993
 994 define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
 995 ; X86-LABEL: test_mm512_mask_min_ps_128:
 996 ; X86:       # %bb.0:
 997 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
 998 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 999 ; X86-NEXT:    vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
1000 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1001 ; X86-NEXT:    retl # encoding: [0xc3]
1002 ;
1003 ; X64-LABEL: test_mm512_mask_min_ps_128:
1004 ; X64:       # %bb.0:
1005 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1006 ; X64-NEXT:    vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
1007 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1008 ; X64-NEXT:    retq # encoding: [0xc3]
1009   %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1010   %2 = bitcast i8 %mask to <8 x i1>
1011   %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1012   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %src
1013   ret <4 x float> %3
1014 }
1015
1016 define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
1017 ; CHECK-LABEL: test_mm512_min_ps_128:
1018 ; CHECK:       # %bb.0:
1019 ; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
1020 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1021   %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1022   ret <4 x float> %1
1023 }
1024 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
1025
1026 define <4 x double> @test_getexp_pd_256(<4 x double> %a0) {
1027 ; CHECK-LABEL: test_getexp_pd_256:
1028 ; CHECK:       # %bb.0:
1029 ; CHECK-NEXT:    vgetexppd %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x42,0xc0]
1030 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1031   %res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0,  <4 x double> zeroinitializer, i8 -1)
1032   ret <4 x double> %res
1033 }
1034
1035 declare <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
1036
1037 define <8 x float> @test_getexp_ps_256(<8 x float> %a0) {
1038 ; CHECK-LABEL: test_getexp_ps_256:
1039 ; CHECK:       # %bb.0:
1040 ; CHECK-NEXT:    vgetexpps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x42,0xc0]
1041 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1042   %res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
1043   ret <8 x float> %res
1044 }
1045 declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
1046
1047 declare <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>)
1048
1049 define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
1050 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
1051 ; X86:       # %bb.0:
1052 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1053 ; X86-NEXT:    vpermt2d %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda]
1054 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1055 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1056 ; X86-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
1057 ; X86-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
1058 ; X86-NEXT:    retl # encoding: [0xc3]
1059 ;
1060 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
1061 ; X64:       # %bb.0:
1062 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1063 ; X64-NEXT:    vpermt2d %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda]
1064 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1065 ; X64-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
1066 ; X64-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
1067 ; X64-NEXT:    retq # encoding: [0xc3]
1068   %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
1069   %2 = bitcast i8 %x3 to <8 x i1>
1070   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1071   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x1
1072   %4 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
1073   %res2 = add <4 x i32> %3, %4
1074   ret <4 x i32> %res2
1075 }
1076
1077 define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
1078 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
1079 ; X86:       # %bb.0:
1080 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
1081 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
1082 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1083 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1084 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
1085 ; X86-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
1086 ; X86-NEXT:    retl # encoding: [0xc3]
1087 ;
1088 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
1089 ; X64:       # %bb.0:
1090 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
1091 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
1092 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1093 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
1094 ; X64-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
1095 ; X64-NEXT:    retq # encoding: [0xc3]
1096   %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1097   %2 = bitcast i8 %x3 to <8 x i1>
1098   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1099   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x1
1100   %4 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1101   %res2 = add <4 x i32> %3, %4
1102   ret <4 x i32> %res2
1103 }
1104
1105 define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
1106 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
1107 ; X86:       # %bb.0:
1108 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
1109 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
1110 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1111 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1112 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca]
1113 ; X86-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
1114 ; X86-NEXT:    retl # encoding: [0xc3]
1115 ;
1116 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
1117 ; X64:       # %bb.0:
1118 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
1119 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
1120 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1121 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca]
1122 ; X64-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
1123 ; X64-NEXT:    retq # encoding: [0xc3]
1124   %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1125   %2 = bitcast i8 %x3 to <8 x i1>
1126   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1127   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
1128   %4 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1129   %res2 = add <4 x i32> %3, %4
1130   ret <4 x i32> %res2
1131 }
1132
1133 declare <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>)
1134
1135 define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
1136 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
1137 ; X86:       # %bb.0:
1138 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1139 ; X86-NEXT:    vpermt2d %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda]
1140 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1141 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1142 ; X86-NEXT:    vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
1143 ; X86-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
1144 ; X86-NEXT:    retl # encoding: [0xc3]
1145 ;
1146 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
1147 ; X64:       # %bb.0:
1148 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1149 ; X64-NEXT:    vpermt2d %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda]
1150 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1151 ; X64-NEXT:    vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
1152 ; X64-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
1153 ; X64-NEXT:    retq # encoding: [0xc3]
1154   %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
1155   %2 = bitcast i8 %x3 to <8 x i1>
1156   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
1157   %4 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
1158   %res2 = add <8 x i32> %3, %4
1159   ret <8 x i32> %res2
1160 }
1161
1162 define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
1163 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
1164 ; X86:       # %bb.0:
1165 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
1166 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
1167 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1168 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1169 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
1170 ; X86-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
1171 ; X86-NEXT:    retl # encoding: [0xc3]
1172 ;
1173 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
1174 ; X64:       # %bb.0:
1175 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
1176 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
1177 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1178 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
1179 ; X64-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
1180 ; X64-NEXT:    retq # encoding: [0xc3]
1181   %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1182   %2 = bitcast i8 %x3 to <8 x i1>
1183   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
1184   %4 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1185   %res2 = add <8 x i32> %3, %4
1186   ret <8 x i32> %res2
1187 }
1188
1189 define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
1190 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
1191 ; X86:       # %bb.0:
1192 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
1193 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
1194 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1195 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1196 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca]
1197 ; X86-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
1198 ; X86-NEXT:    retl # encoding: [0xc3]
1199 ;
1200 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
1201 ; X64:       # %bb.0:
1202 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
1203 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
1204 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1205 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca]
1206 ; X64-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
1207 ; X64-NEXT:    retq # encoding: [0xc3]
1208   %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1209   %2 = bitcast i8 %x3 to <8 x i1>
1210   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
1211   %4 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1212   %res2 = add <8 x i32> %3, %4
1213   ret <8 x i32> %res2
1214 }
1215
1216 declare <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>)
1217
1218 define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
1219 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
1220 ; X86:       # %bb.0:
1221 ; X86-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
1222 ; X86-NEXT:    vpermt2pd %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
1223 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1224 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1225 ; X86-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
1226 ; X86-NEXT:    vaddpd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
1227 ; X86-NEXT:    retl # encoding: [0xc3]
1228 ;
1229 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
1230 ; X64:       # %bb.0:
1231 ; X64-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
1232 ; X64-NEXT:    vpermt2pd %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
1233 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1234 ; X64-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
1235 ; X64-NEXT:    vaddpd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
1236 ; X64-NEXT:    retq # encoding: [0xc3]
1237   %1 = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2)
1238   %2 = bitcast <2 x i64> %x1 to <2 x double>
1239   %3 = bitcast i8 %x3 to <8 x i1>
1240   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <2 x i32> <i32 0, i32 1>
1241   %4 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %2
1242   %5 = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2)
1243   %6 = bitcast <2 x i64> %x1 to <2 x double>
1244   %res2 = fadd <2 x double> %4, %5
1245   ret <2 x double> %res2
1246 }
1247
1248 declare <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>)
1249
1250 define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
1251 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
1252 ; X86:       # %bb.0:
1253 ; X86-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
1254 ; X86-NEXT:    vpermt2pd %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
1255 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1256 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1257 ; X86-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
1258 ; X86-NEXT:    vaddpd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
1259 ; X86-NEXT:    retl # encoding: [0xc3]
1260 ;
1261 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
1262 ; X64:       # %bb.0:
1263 ; X64-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
1264 ; X64-NEXT:    vpermt2pd %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
1265 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1266 ; X64-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
1267 ; X64-NEXT:    vaddpd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
1268 ; X64-NEXT:    retq # encoding: [0xc3]
1269   %1 = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2)
1270   %2 = bitcast <4 x i64> %x1 to <4 x double>
1271   %3 = bitcast i8 %x3 to <8 x i1>
1272   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1273   %4 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %2
1274   %5 = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2)
1275   %6 = bitcast <4 x i64> %x1 to <4 x double>
1276   %res2 = fadd <4 x double> %4, %5
1277   ret <4 x double> %res2
1278 }
1279
1280 declare <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>)
1281
1282 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
1283 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
1284 ; X86:       # %bb.0:
1285 ; X86-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
1286 ; X86-NEXT:    vpermt2ps %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
1287 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1288 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1289 ; X86-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1290 ; X86-NEXT:    vaddps %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
1291 ; X86-NEXT:    retl # encoding: [0xc3]
1292 ;
1293 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
1294 ; X64:       # %bb.0:
1295 ; X64-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
1296 ; X64-NEXT:    vpermt2ps %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
1297 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1298 ; X64-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1299 ; X64-NEXT:    vaddps %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
1300 ; X64-NEXT:    retq # encoding: [0xc3]
1301   %1 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2)
1302   %2 = bitcast <4 x i32> %x1 to <4 x float>
1303   %3 = bitcast i8 %x3 to <8 x i1>
1304   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1305   %4 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %2
1306   %5 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2)
1307   %6 = bitcast <4 x i32> %x1 to <4 x float>
1308   %res2 = fadd <4 x float> %4, %5
1309   ret <4 x float> %res2
1310 }
1311
1312 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %x0, <2 x i64> %x1, <4 x float> %x2, i8 %x3) {
1313 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
1314 ; X86:       # %bb.0:
1315 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1316 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1317 ; X86-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1318 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1319 ; X86-NEXT:    retl # encoding: [0xc3]
1320 ;
1321 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
1322 ; X64:       # %bb.0:
1323 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1324 ; X64-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1325 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1326 ; X64-NEXT:    retq # encoding: [0xc3]
1327   %x1cast = bitcast <2 x i64> %x1 to <4 x i32>
1328   %1 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2)
1329   %2 = bitcast <4 x i32> %x1cast to <4 x float>
1330   %3 = bitcast i8 %x3 to <8 x i1>
1331   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1332   %4 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %2
1333   ret <4 x float> %4
1334 }
1335
1336 declare <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>)
1337
1338 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
1339 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
1340 ; X86:       # %bb.0:
1341 ; X86-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
1342 ; X86-NEXT:    vpermt2ps %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
1343 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1344 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1345 ; X86-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
1346 ; X86-NEXT:    vaddps %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
1347 ; X86-NEXT:    retl # encoding: [0xc3]
1348 ;
1349 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
1350 ; X64:       # %bb.0:
1351 ; X64-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
1352 ; X64-NEXT:    vpermt2ps %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
1353 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1354 ; X64-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
1355 ; X64-NEXT:    vaddps %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
1356 ; X64-NEXT:    retq # encoding: [0xc3]
1357   %1 = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2)
1358   %2 = bitcast <8 x i32> %x1 to <8 x float>
1359   %3 = bitcast i8 %x3 to <8 x i1>
1360   %4 = select <8 x i1> %3, <8 x float> %1, <8 x float> %2
1361   %5 = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2)
1362   %6 = bitcast <8 x i32> %x1 to <8 x float>
1363   %res2 = fadd <8 x float> %4, %5
1364   ret <8 x float> %res2
1365 }
1366
1367 declare <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>)
1368
1369 define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
1370 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
1371 ; X86:       # %bb.0:
1372 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1373 ; X86-NEXT:    vpermt2q %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda]
1374 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1375 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1376 ; X86-NEXT:    vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
1377 ; X86-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
1378 ; X86-NEXT:    retl # encoding: [0xc3]
1379 ;
1380 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
1381 ; X64:       # %bb.0:
1382 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1383 ; X64-NEXT:    vpermt2q %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda]
1384 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1385 ; X64-NEXT:    vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
1386 ; X64-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
1387 ; X64-NEXT:    retq # encoding: [0xc3]
1388   %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
1389   %2 = bitcast i8 %x3 to <8 x i1>
1390   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1391   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1
1392   %4 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
1393   %res2 = add <2 x i64> %3, %4
1394   ret <2 x i64> %res2
1395 }
1396
1397 define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
1398 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
1399 ; X86:       # %bb.0:
1400 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
1401 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
1402 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1403 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1404 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
1405 ; X86-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
1406 ; X86-NEXT:    retl # encoding: [0xc3]
1407 ;
1408 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
1409 ; X64:       # %bb.0:
1410 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
1411 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
1412 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1413 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
1414 ; X64-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
1415 ; X64-NEXT:    retq # encoding: [0xc3]
1416   %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1417   %2 = bitcast i8 %x3 to <8 x i1>
1418   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1419   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1
1420   %4 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1421   %res2 = add <2 x i64> %3, %4
1422   ret <2 x i64> %res2
1423 }
1424
1425 define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
1426 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
1427 ; X86:       # %bb.0:
1428 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
1429 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
1430 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1431 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1432 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca]
1433 ; X86-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
1434 ; X86-NEXT:    retl # encoding: [0xc3]
1435 ;
1436 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
1437 ; X64:       # %bb.0:
1438 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
1439 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
1440 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1441 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca]
1442 ; X64-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
1443 ; X64-NEXT:    retq # encoding: [0xc3]
1444   %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1445   %2 = bitcast i8 %x3 to <8 x i1>
1446   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1447   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> zeroinitializer
1448   %4 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1449   %res2 = add <2 x i64> %3, %4
1450   ret <2 x i64> %res2
1451 }
1452
1453 declare <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>)
1454
1455 define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
1456 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
1457 ; X86:       # %bb.0:
1458 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1459 ; X86-NEXT:    vpermt2q %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda]
1460 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1461 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1462 ; X86-NEXT:    vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
1463 ; X86-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
1464 ; X86-NEXT:    retl # encoding: [0xc3]
1465 ;
1466 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
1467 ; X64:       # %bb.0:
1468 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1469 ; X64-NEXT:    vpermt2q %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda]
1470 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1471 ; X64-NEXT:    vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
1472 ; X64-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
1473 ; X64-NEXT:    retq # encoding: [0xc3]
1474   %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
1475   %2 = bitcast i8 %x3 to <8 x i1>
1476   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1477   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1
1478   %4 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
1479   %res2 = add <4 x i64> %3, %4
1480   ret <4 x i64> %res2
1481 }
1482
1483 define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
1484 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
1485 ; X86:       # %bb.0:
1486 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
1487 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
1488 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1489 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1490 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
1491 ; X86-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
1492 ; X86-NEXT:    retl # encoding: [0xc3]
1493 ;
1494 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
1495 ; X64:       # %bb.0:
1496 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
1497 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
1498 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1499 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
1500 ; X64-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
1501 ; X64-NEXT:    retq # encoding: [0xc3]
1502   %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1503   %2 = bitcast i8 %x3 to <8 x i1>
1504   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1505   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1
1506   %4 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1507   %res2 = add <4 x i64> %3, %4
1508   ret <4 x i64> %res2
1509 }
1510
1511 define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
1512 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
1513 ; X86:       # %bb.0:
1514 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
1515 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
1516 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1517 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1518 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca]
1519 ; X86-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
1520 ; X86-NEXT:    retl # encoding: [0xc3]
1521 ;
1522 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
1523 ; X64:       # %bb.0:
1524 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
1525 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
1526 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1527 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca]
1528 ; X64-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
1529 ; X64-NEXT:    retq # encoding: [0xc3]
1530   %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1531   %2 = bitcast i8 %x3 to <8 x i1>
1532   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1533   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> zeroinitializer
1534   %4 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1535   %res2 = add <4 x i64> %3, %4
1536   ret <4 x i64> %res2
1537 }
1538
1539 declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
1540
1541 define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1542 ; X86-LABEL: test_int_x86_avx512_mask_scalef_pd_128:
1543 ; X86:       # %bb.0:
1544 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1545 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1546 ; X86-NEXT:    vscalefpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1]
1547 ; X86-NEXT:    vscalefpd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x2c,0xc1]
1548 ; X86-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
1549 ; X86-NEXT:    retl # encoding: [0xc3]
1550 ;
1551 ; X64-LABEL: test_int_x86_avx512_mask_scalef_pd_128:
1552 ; X64:       # %bb.0:
1553 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1554 ; X64-NEXT:    vscalefpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1]
1555 ; X64-NEXT:    vscalefpd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x2c,0xc1]
1556 ; X64-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
1557 ; X64-NEXT:    retq # encoding: [0xc3]
1558   %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1559   %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1560   %res2 = fadd <2 x double> %res, %res1
1561   ret <2 x double> %res2
1562 }
1563
1564 declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
1565
1566 define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1567 ; X86-LABEL: test_int_x86_avx512_mask_scalef_pd_256:
1568 ; X86:       # %bb.0:
1569 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1570 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1571 ; X86-NEXT:    vscalefpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x2c,0xd1]
1572 ; X86-NEXT:    vscalefpd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x2c,0xc1]
1573 ; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
1574 ; X86-NEXT:    retl # encoding: [0xc3]
1575 ;
1576 ; X64-LABEL: test_int_x86_avx512_mask_scalef_pd_256:
1577 ; X64:       # %bb.0:
1578 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1579 ; X64-NEXT:    vscalefpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x2c,0xd1]
1580 ; X64-NEXT:    vscalefpd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x2c,0xc1]
1581 ; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
1582 ; X64-NEXT:    retq # encoding: [0xc3]
1583   %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1584   %res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1585   %res2 = fadd <4 x double> %res, %res1
1586   ret <4 x double> %res2
1587 }
1588
1589 declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1590
1591 define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1592 ; X86-LABEL: test_int_x86_avx512_mask_scalef_ps_128:
1593 ; X86:       # %bb.0:
1594 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1595 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1596 ; X86-NEXT:    vscalefps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1]
1597 ; X86-NEXT:    vscalefps %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x2c,0xc1]
1598 ; X86-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
1599 ; X86-NEXT:    retl # encoding: [0xc3]
1600 ;
1601 ; X64-LABEL: test_int_x86_avx512_mask_scalef_ps_128:
1602 ; X64:       # %bb.0:
1603 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1604 ; X64-NEXT:    vscalefps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1]
1605 ; X64-NEXT:    vscalefps %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x2c,0xc1]
1606 ; X64-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
1607 ; X64-NEXT:    retq # encoding: [0xc3]
1608   %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1609   %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1610   %res2 = fadd <4 x float> %res, %res1
1611   ret <4 x float> %res2
1612 }
1613
1614 declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1615
1616 define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1617 ; X86-LABEL: test_int_x86_avx512_mask_scalef_ps_256:
1618 ; X86:       # %bb.0:
1619 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1620 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1621 ; X86-NEXT:    vscalefps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2c,0xd1]
1622 ; X86-NEXT:    vscalefps %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x2c,0xc1]
1623 ; X86-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
1624 ; X86-NEXT:    retl # encoding: [0xc3]
1625 ;
1626 ; X64-LABEL: test_int_x86_avx512_mask_scalef_ps_256:
1627 ; X64:       # %bb.0:
1628 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1629 ; X64-NEXT:    vscalefps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2c,0xd1]
1630 ; X64-NEXT:    vscalefps %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x2c,0xc1]
1631 ; X64-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
1632 ; X64-NEXT:    retq # encoding: [0xc3]
1633   %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1634   %res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1635   %res2 = fadd <8 x float> %res, %res1
1636   ret <8 x float> %res2
1637 }
1638
1639 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8)
1640
1641 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1642 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
1643 ; X86:       # %bb.0:
1644 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1645 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1646 ; X86-NEXT:    vpmovqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc2]
1647 ; X86-NEXT:    vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
1648 ; X86-NEXT:    vpmovqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc0]
1649 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1650 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1651 ; X86-NEXT:    retl # encoding: [0xc3]
1652 ;
1653 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
1654 ; X64:       # %bb.0:
1655 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1656 ; X64-NEXT:    vpmovqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc2]
1657 ; X64-NEXT:    vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
1658 ; X64-NEXT:    vpmovqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc0]
1659 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1660 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1661 ; X64-NEXT:    retq # encoding: [0xc3]
1662     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
1663     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
1664     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1665     %res3 = add <16 x i8> %res0, %res1
1666     %res4 = add <16 x i8> %res3, %res2
1667     ret <16 x i8> %res4
1668 }
1669
1670 declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8)
1671
1672 define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
1673 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
1674 ; X86:       # %bb.0:
1675 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1676 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1677 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1678 ; X86-NEXT:    vpmovqb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x32,0x00]
1679 ; X86-NEXT:    vpmovqb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0x00]
1680 ; X86-NEXT:    retl # encoding: [0xc3]
1681 ;
1682 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
1683 ; X64:       # %bb.0:
1684 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1685 ; X64-NEXT:    vpmovqb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x32,0x07]
1686 ; X64-NEXT:    vpmovqb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0x07]
1687 ; X64-NEXT:    retq # encoding: [0xc3]
1688     call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
1689     call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
1690     ret void
1691 }
1692
1693 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8)
1694
1695 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1696 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
1697 ; X86:       # %bb.0:
1698 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1699 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1700 ; X86-NEXT:    vpmovsqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc2]
1701 ; X86-NEXT:    vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
1702 ; X86-NEXT:    vpmovsqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc0]
1703 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1704 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1705 ; X86-NEXT:    retl # encoding: [0xc3]
1706 ;
1707 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
1708 ; X64:       # %bb.0:
1709 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1710 ; X64-NEXT:    vpmovsqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc2]
1711 ; X64-NEXT:    vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
1712 ; X64-NEXT:    vpmovsqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc0]
1713 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1714 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1715 ; X64-NEXT:    retq # encoding: [0xc3]
1716     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
1717     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
1718     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1719     %res3 = add <16 x i8> %res0, %res1
1720     %res4 = add <16 x i8> %res3, %res2
1721     ret <16 x i8> %res4
1722 }
1723
1724 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8)
1725
1726 define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
1727 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
1728 ; X86:       # %bb.0:
1729 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1730 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1731 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1732 ; X86-NEXT:    vpmovsqb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x22,0x00]
1733 ; X86-NEXT:    vpmovsqb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0x00]
1734 ; X86-NEXT:    retl # encoding: [0xc3]
1735 ;
1736 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
1737 ; X64:       # %bb.0:
1738 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1739 ; X64-NEXT:    vpmovsqb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07]
1740 ; X64-NEXT:    vpmovsqb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0x07]
1741 ; X64-NEXT:    retq # encoding: [0xc3]
1742     call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
1743     call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
1744     ret void
1745 }
1746
1747 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8)
1748
1749 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1750 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
1751 ; X86:       # %bb.0:
1752 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1753 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1754 ; X86-NEXT:    vpmovusqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc2]
1755 ; X86-NEXT:    vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
1756 ; X86-NEXT:    vpmovusqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc0]
1757 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1758 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1759 ; X86-NEXT:    retl # encoding: [0xc3]
1760 ;
1761 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
1762 ; X64:       # %bb.0:
1763 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1764 ; X64-NEXT:    vpmovusqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc2]
1765 ; X64-NEXT:    vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
1766 ; X64-NEXT:    vpmovusqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc0]
1767 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1768 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1769 ; X64-NEXT:    retq # encoding: [0xc3]
1770     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
1771     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
1772     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1773     %res3 = add <16 x i8> %res0, %res1
1774     %res4 = add <16 x i8> %res3, %res2
1775     ret <16 x i8> %res4
1776 }
1777
1778 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8)
1779
1780 define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
1781 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
1782 ; X86:       # %bb.0:
1783 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1784 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1785 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1786 ; X86-NEXT:    vpmovusqb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x12,0x00]
1787 ; X86-NEXT:    vpmovusqb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0x00]
1788 ; X86-NEXT:    retl # encoding: [0xc3]
1789 ;
1790 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
1791 ; X64:       # %bb.0:
1792 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1793 ; X64-NEXT:    vpmovusqb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07]
1794 ; X64-NEXT:    vpmovusqb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0x07]
1795 ; X64-NEXT:    retq # encoding: [0xc3]
1796     call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
1797     call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
1798     ret void
1799 }
1800
1801 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8)
1802
1803 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1804 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
1805 ; X86:       # %bb.0:
1806 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1807 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1808 ; X86-NEXT:    vpmovqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc2]
1809 ; X86-NEXT:    vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
1810 ; X86-NEXT:    vpmovqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc0]
1811 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1812 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1813 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1814 ; X86-NEXT:    retl # encoding: [0xc3]
1815 ;
1816 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
1817 ; X64:       # %bb.0:
1818 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1819 ; X64-NEXT:    vpmovqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc2]
1820 ; X64-NEXT:    vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
1821 ; X64-NEXT:    vpmovqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc0]
1822 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1823 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1824 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1825 ; X64-NEXT:    retq # encoding: [0xc3]
1826     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
1827     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
1828     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1829     %res3 = add <16 x i8> %res0, %res1
1830     %res4 = add <16 x i8> %res3, %res2
1831     ret <16 x i8> %res4
1832 }
1833
1834 declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8)
1835
1836 define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
1837 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
1838 ; X86:       # %bb.0:
1839 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1840 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1841 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1842 ; X86-NEXT:    vpmovqb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x32,0x00]
1843 ; X86-NEXT:    vpmovqb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0x00]
1844 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1845 ; X86-NEXT:    retl # encoding: [0xc3]
1846 ;
1847 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
1848 ; X64:       # %bb.0:
1849 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1850 ; X64-NEXT:    vpmovqb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x32,0x07]
1851 ; X64-NEXT:    vpmovqb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0x07]
1852 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1853 ; X64-NEXT:    retq # encoding: [0xc3]
1854     call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
1855     call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
1856     ret void
1857 }
1858
1859 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8)
1860
1861 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1862 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
1863 ; X86:       # %bb.0:
1864 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1865 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1866 ; X86-NEXT:    vpmovsqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc2]
1867 ; X86-NEXT:    vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
1868 ; X86-NEXT:    vpmovsqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc0]
1869 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1870 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1871 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1872 ; X86-NEXT:    retl # encoding: [0xc3]
1873 ;
1874 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
1875 ; X64:       # %bb.0:
1876 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1877 ; X64-NEXT:    vpmovsqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc2]
1878 ; X64-NEXT:    vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
1879 ; X64-NEXT:    vpmovsqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc0]
1880 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1881 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1882 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1883 ; X64-NEXT:    retq # encoding: [0xc3]
1884     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
1885     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
1886     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1887     %res3 = add <16 x i8> %res0, %res1
1888     %res4 = add <16 x i8> %res3, %res2
1889     ret <16 x i8> %res4
1890 }
1891
1892 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8)
1893
1894 define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
1895 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
1896 ; X86:       # %bb.0:
1897 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1898 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1899 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1900 ; X86-NEXT:    vpmovsqb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x22,0x00]
1901 ; X86-NEXT:    vpmovsqb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0x00]
1902 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1903 ; X86-NEXT:    retl # encoding: [0xc3]
1904 ;
1905 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
1906 ; X64:       # %bb.0:
1907 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1908 ; X64-NEXT:    vpmovsqb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07]
1909 ; X64-NEXT:    vpmovsqb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0x07]
1910 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1911 ; X64-NEXT:    retq # encoding: [0xc3]
1912     call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
1913     call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
1914     ret void
1915 }
1916
1917 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8)
1918
1919 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1920 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
1921 ; X86:       # %bb.0:
1922 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1923 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1924 ; X86-NEXT:    vpmovusqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc2]
1925 ; X86-NEXT:    vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
1926 ; X86-NEXT:    vpmovusqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc0]
1927 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1928 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1929 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1930 ; X86-NEXT:    retl # encoding: [0xc3]
1931 ;
1932 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
1933 ; X64:       # %bb.0:
1934 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1935 ; X64-NEXT:    vpmovusqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc2]
1936 ; X64-NEXT:    vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
1937 ; X64-NEXT:    vpmovusqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc0]
1938 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1939 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
1940 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1941 ; X64-NEXT:    retq # encoding: [0xc3]
1942     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
1943     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
1944     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1945     %res3 = add <16 x i8> %res0, %res1
1946     %res4 = add <16 x i8> %res3, %res2
1947     ret <16 x i8> %res4
1948 }
1949
1950 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8)
1951
1952 define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
1953 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
1954 ; X86:       # %bb.0:
1955 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1956 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1957 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1958 ; X86-NEXT:    vpmovusqb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x12,0x00]
1959 ; X86-NEXT:    vpmovusqb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0x00]
1960 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1961 ; X86-NEXT:    retl # encoding: [0xc3]
1962 ;
1963 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
1964 ; X64:       # %bb.0:
1965 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1966 ; X64-NEXT:    vpmovusqb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07]
1967 ; X64-NEXT:    vpmovusqb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0x07]
1968 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1969 ; X64-NEXT:    retq # encoding: [0xc3]
1970     call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
1971     call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
1972     ret void
1973 }
1974
1975 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8)
1976
1977 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
1978 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
1979 ; X86:       # %bb.0:
1980 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1981 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1982 ; X86-NEXT:    vpmovqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc2]
1983 ; X86-NEXT:    vpmovqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1]
1984 ; X86-NEXT:    vpmovqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc0]
1985 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
1986 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
1987 ; X86-NEXT:    retl # encoding: [0xc3]
1988 ;
1989 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
1990 ; X64:       # %bb.0:
1991 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1992 ; X64-NEXT:    vpmovqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc2]
1993 ; X64-NEXT:    vpmovqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1]
1994 ; X64-NEXT:    vpmovqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc0]
1995 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
1996 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
1997 ; X64-NEXT:    retq # encoding: [0xc3]
1998     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
1999     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
2000     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2001     %res3 = add <8 x i16> %res0, %res1
2002     %res4 = add <8 x i16> %res3, %res2
2003     ret <8 x i16> %res4
2004 }
2005
2006 declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8)
2007
2008 define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
2009 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
2010 ; X86:       # %bb.0:
2011 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2012 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2013 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2014 ; X86-NEXT:    vpmovqw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x34,0x00]
2015 ; X86-NEXT:    vpmovqw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0x00]
2016 ; X86-NEXT:    retl # encoding: [0xc3]
2017 ;
2018 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
2019 ; X64:       # %bb.0:
2020 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2021 ; X64-NEXT:    vpmovqw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x34,0x07]
2022 ; X64-NEXT:    vpmovqw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0x07]
2023 ; X64-NEXT:    retq # encoding: [0xc3]
2024     call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
2025     call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
2026     ret void
2027 }
2028
2029 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8)
2030
2031 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2032 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
2033 ; X86:       # %bb.0:
2034 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2035 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2036 ; X86-NEXT:    vpmovsqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc2]
2037 ; X86-NEXT:    vpmovsqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1]
2038 ; X86-NEXT:    vpmovsqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc0]
2039 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2040 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
2041 ; X86-NEXT:    retl # encoding: [0xc3]
2042 ;
2043 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
2044 ; X64:       # %bb.0:
2045 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2046 ; X64-NEXT:    vpmovsqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc2]
2047 ; X64-NEXT:    vpmovsqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1]
2048 ; X64-NEXT:    vpmovsqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc0]
2049 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2050 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
2051 ; X64-NEXT:    retq # encoding: [0xc3]
2052     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
2053     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
2054     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2055     %res3 = add <8 x i16> %res0, %res1
2056     %res4 = add <8 x i16> %res3, %res2
2057     ret <8 x i16> %res4
2058 }
2059
2060 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8)
2061
2062 define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
2063 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
2064 ; X86:       # %bb.0:
2065 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2066 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2067 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2068 ; X86-NEXT:    vpmovsqw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x24,0x00]
2069 ; X86-NEXT:    vpmovsqw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0x00]
2070 ; X86-NEXT:    retl # encoding: [0xc3]
2071 ;
2072 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
2073 ; X64:       # %bb.0:
2074 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2075 ; X64-NEXT:    vpmovsqw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07]
2076 ; X64-NEXT:    vpmovsqw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0x07]
2077 ; X64-NEXT:    retq # encoding: [0xc3]
2078     call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
2079     call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
2080     ret void
2081 }
2082
2083 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8)
2084
2085 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2086 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
2087 ; X86:       # %bb.0:
2088 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2089 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2090 ; X86-NEXT:    vpmovusqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc2]
2091 ; X86-NEXT:    vpmovusqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1]
2092 ; X86-NEXT:    vpmovusqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc0]
2093 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2094 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
2095 ; X86-NEXT:    retl # encoding: [0xc3]
2096 ;
2097 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
2098 ; X64:       # %bb.0:
2099 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2100 ; X64-NEXT:    vpmovusqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc2]
2101 ; X64-NEXT:    vpmovusqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1]
2102 ; X64-NEXT:    vpmovusqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc0]
2103 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2104 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
2105 ; X64-NEXT:    retq # encoding: [0xc3]
2106     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
2107     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
2108     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2109     %res3 = add <8 x i16> %res0, %res1
2110     %res4 = add <8 x i16> %res3, %res2
2111     ret <8 x i16> %res4
2112 }
2113
2114 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8)
2115
2116 define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
2117 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
2118 ; X86:       # %bb.0:
2119 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2120 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2121 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2122 ; X86-NEXT:    vpmovusqw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x14,0x00]
2123 ; X86-NEXT:    vpmovusqw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0x00]
2124 ; X86-NEXT:    retl # encoding: [0xc3]
2125 ;
2126 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
2127 ; X64:       # %bb.0:
2128 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2129 ; X64-NEXT:    vpmovusqw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07]
2130 ; X64-NEXT:    vpmovusqw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0x07]
2131 ; X64-NEXT:    retq # encoding: [0xc3]
2132     call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
2133     call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
2134     ret void
2135 }
2136
2137 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8)
2138
2139 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2140 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
2141 ; X86:       # %bb.0:
2142 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2143 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2144 ; X86-NEXT:    vpmovqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc2]
2145 ; X86-NEXT:    vpmovqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1]
2146 ; X86-NEXT:    vpmovqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc0]
2147 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2148 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
2149 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2150 ; X86-NEXT:    retl # encoding: [0xc3]
2151 ;
2152 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
2153 ; X64:       # %bb.0:
2154 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2155 ; X64-NEXT:    vpmovqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc2]
2156 ; X64-NEXT:    vpmovqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1]
2157 ; X64-NEXT:    vpmovqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc0]
2158 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2159 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
2160 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2161 ; X64-NEXT:    retq # encoding: [0xc3]
2162     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
2163     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
2164     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2165     %res3 = add <8 x i16> %res0, %res1
2166     %res4 = add <8 x i16> %res3, %res2
2167     ret <8 x i16> %res4
2168 }
2169
2170 declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8)
2171
2172 define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
2173 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
2174 ; X86:       # %bb.0:
2175 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2176 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2177 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2178 ; X86-NEXT:    vpmovqw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x34,0x00]
2179 ; X86-NEXT:    vpmovqw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0x00]
2180 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2181 ; X86-NEXT:    retl # encoding: [0xc3]
2182 ;
2183 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
2184 ; X64:       # %bb.0:
2185 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2186 ; X64-NEXT:    vpmovqw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x34,0x07]
2187 ; X64-NEXT:    vpmovqw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0x07]
2188 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2189 ; X64-NEXT:    retq # encoding: [0xc3]
2190     call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
2191     call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
2192     ret void
2193 }
2194
2195 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8)
2196
2197 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2198 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
2199 ; X86:       # %bb.0:
2200 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2201 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2202 ; X86-NEXT:    vpmovsqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc2]
2203 ; X86-NEXT:    vpmovsqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1]
2204 ; X86-NEXT:    vpmovsqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc0]
2205 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2206 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
2207 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2208 ; X86-NEXT:    retl # encoding: [0xc3]
2209 ;
2210 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
2211 ; X64:       # %bb.0:
2212 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2213 ; X64-NEXT:    vpmovsqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc2]
2214 ; X64-NEXT:    vpmovsqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1]
2215 ; X64-NEXT:    vpmovsqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc0]
2216 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2217 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
2218 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2219 ; X64-NEXT:    retq # encoding: [0xc3]
2220     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
2221     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
2222     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2223     %res3 = add <8 x i16> %res0, %res1
2224     %res4 = add <8 x i16> %res3, %res2
2225     ret <8 x i16> %res4
2226 }
2227
2228 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8)
2229
2230 define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
2231 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
2232 ; X86:       # %bb.0:
2233 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2234 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2235 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2236 ; X86-NEXT:    vpmovsqw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x24,0x00]
2237 ; X86-NEXT:    vpmovsqw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0x00]
2238 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2239 ; X86-NEXT:    retl # encoding: [0xc3]
2240 ;
2241 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
2242 ; X64:       # %bb.0:
2243 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2244 ; X64-NEXT:    vpmovsqw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07]
2245 ; X64-NEXT:    vpmovsqw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0x07]
2246 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2247 ; X64-NEXT:    retq # encoding: [0xc3]
2248     call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
2249     call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
2250     ret void
2251 }
2252
2253 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8)
2254
2255 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2256 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
2257 ; X86:       # %bb.0:
2258 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2259 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2260 ; X86-NEXT:    vpmovusqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc2]
2261 ; X86-NEXT:    vpmovusqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1]
2262 ; X86-NEXT:    vpmovusqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc0]
2263 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2264 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
2265 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2266 ; X86-NEXT:    retl # encoding: [0xc3]
2267 ;
2268 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
2269 ; X64:       # %bb.0:
2270 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2271 ; X64-NEXT:    vpmovusqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc2]
2272 ; X64-NEXT:    vpmovusqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1]
2273 ; X64-NEXT:    vpmovusqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc0]
2274 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2275 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
2276 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2277 ; X64-NEXT:    retq # encoding: [0xc3]
2278     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
2279     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
2280     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2281     %res3 = add <8 x i16> %res0, %res1
2282     %res4 = add <8 x i16> %res3, %res2
2283     ret <8 x i16> %res4
2284 }
2285
2286 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8)
2287
2288 define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
2289 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
2290 ; X86:       # %bb.0:
2291 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2292 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2293 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2294 ; X86-NEXT:    vpmovusqw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x14,0x00]
2295 ; X86-NEXT:    vpmovusqw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0x00]
2296 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2297 ; X86-NEXT:    retl # encoding: [0xc3]
2298 ;
2299 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
2300 ; X64:       # %bb.0:
2301 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2302 ; X64-NEXT:    vpmovusqw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07]
2303 ; X64-NEXT:    vpmovusqw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0x07]
2304 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2305 ; X64-NEXT:    retq # encoding: [0xc3]
2306     call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
2307     call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
2308     ret void
2309 }
2310
2311 declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8)
2312
2313 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2314 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
2315 ; X86:       # %bb.0:
2316 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2317 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2318 ; X86-NEXT:    vpmovqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc2]
2319 ; X86-NEXT:    vpmovqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1]
2320 ; X86-NEXT:    vpmovqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc0]
2321 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2322 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
2323 ; X86-NEXT:    retl # encoding: [0xc3]
2324 ;
2325 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
2326 ; X64:       # %bb.0:
2327 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2328 ; X64-NEXT:    vpmovqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc2]
2329 ; X64-NEXT:    vpmovqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1]
2330 ; X64-NEXT:    vpmovqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc0]
2331 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2332 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
2333 ; X64-NEXT:    retq # encoding: [0xc3]
2334     %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
2335     %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
2336     %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2337     %res3 = add <4 x i32> %res0, %res1
2338     %res4 = add <4 x i32> %res3, %res2
2339     ret <4 x i32> %res4
2340 }
2341
2342 declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8)
2343
2344 define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
2345 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
2346 ; X86:       # %bb.0:
2347 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2348 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2349 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2350 ; X86-NEXT:    vpmovqd %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x35,0x00]
2351 ; X86-NEXT:    vpmovqd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0x00]
2352 ; X86-NEXT:    retl # encoding: [0xc3]
2353 ;
2354 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
2355 ; X64:       # %bb.0:
2356 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2357 ; X64-NEXT:    vpmovqd %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x35,0x07]
2358 ; X64-NEXT:    vpmovqd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0x07]
2359 ; X64-NEXT:    retq # encoding: [0xc3]
2360     call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
2361     call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
2362     ret void
2363 }
2364
2365 declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8)
2366
2367 define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2368 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
2369 ; X86:       # %bb.0:
2370 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2371 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2372 ; X86-NEXT:    vpmovsqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc0]
2373 ; X86-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
2374 ; X86-NEXT:    vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0]
2375 ; X86-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
2376 ; X86-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
2377 ; X86-NEXT:    retl # encoding: [0xc3]
2378 ;
2379 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
2380 ; X64:       # %bb.0:
2381 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2382 ; X64-NEXT:    vpmovsqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc0]
2383 ; X64-NEXT:    vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0]
2384 ; X64-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
2385 ; X64-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
2386 ; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
2387 ; X64-NEXT:    retq # encoding: [0xc3]
2388     %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
2389     %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
2390     %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2391     %res3 = add <4 x i32> %res0, %res1
2392     %res4 = add <4 x i32> %res3, %res2
2393     ret <4 x i32> %res4
2394 }
2395
2396 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8)
2397
2398 define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
2399 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
2400 ; X86:       # %bb.0:
2401 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2402 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2403 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2404 ; X86-NEXT:    vpmovsqd %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x25,0x00]
2405 ; X86-NEXT:    vpmovsqd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0x00]
2406 ; X86-NEXT:    retl # encoding: [0xc3]
2407 ;
2408 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
2409 ; X64:       # %bb.0:
2410 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2411 ; X64-NEXT:    vpmovsqd %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07]
2412 ; X64-NEXT:    vpmovsqd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0x07]
2413 ; X64-NEXT:    retq # encoding: [0xc3]
2414     call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
2415     call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
2416     ret void
2417 }
2418
2419 declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8)
2420
2421 define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2422 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
2423 ; X86:       # %bb.0:
2424 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2425 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2426 ; X86-NEXT:    vpmovusqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc2]
2427 ; X86-NEXT:    vpmovusqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1]
2428 ; X86-NEXT:    vpmovusqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc0]
2429 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2430 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
2431 ; X86-NEXT:    retl # encoding: [0xc3]
2432 ;
2433 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
2434 ; X64:       # %bb.0:
2435 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2436 ; X64-NEXT:    vpmovusqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc2]
2437 ; X64-NEXT:    vpmovusqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1]
2438 ; X64-NEXT:    vpmovusqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc0]
2439 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2440 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
2441 ; X64-NEXT:    retq # encoding: [0xc3]
2442     %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
2443     %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
2444     %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2445     %res3 = add <4 x i32> %res0, %res1
2446     %res4 = add <4 x i32> %res3, %res2
2447     ret <4 x i32> %res4
2448 }
2449
2450 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8)
2451
2452 define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
2453 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
2454 ; X86:       # %bb.0:
2455 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2456 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2457 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2458 ; X86-NEXT:    vpmovusqd %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x15,0x00]
2459 ; X86-NEXT:    vpmovusqd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0x00]
2460 ; X86-NEXT:    retl # encoding: [0xc3]
2461 ;
2462 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
2463 ; X64:       # %bb.0:
2464 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2465 ; X64-NEXT:    vpmovusqd %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07]
2466 ; X64-NEXT:    vpmovusqd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0x07]
2467 ; X64-NEXT:    retq # encoding: [0xc3]
2468     call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
2469     call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
2470     ret void
2471 }
2472
2473 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2474 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
2475 ; X86:       # %bb.0:
2476 ; X86-NEXT:    vpmovqd %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc2]
2477 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2478 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2479 ; X86-NEXT:    vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
2480 ; X86-NEXT:    vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
2481 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2482 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
2483 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2484 ; X86-NEXT:    retl # encoding: [0xc3]
2485 ;
2486 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
2487 ; X64:       # %bb.0:
2488 ; X64-NEXT:    vpmovqd %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc2]
2489 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2490 ; X64-NEXT:    vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
2491 ; X64-NEXT:    vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
2492 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2493 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
2494 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2495 ; X64-NEXT:    retq # encoding: [0xc3]
2496   %1 = trunc <4 x i64> %x0 to <4 x i32>
2497   %2 = trunc <4 x i64> %x0 to <4 x i32>
2498   %3 = bitcast i8 %x2 to <8 x i1>
2499   %extract1 = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2500   %4 = select <4 x i1> %extract1, <4 x i32> %2, <4 x i32> %x1
2501   %5 = trunc <4 x i64> %x0 to <4 x i32>
2502   %6 = bitcast i8 %x2 to <8 x i1>
2503   %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2504   %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
2505   %res3 = add <4 x i32> %1, %4
2506   %res4 = add <4 x i32> %res3, %7
2507   ret <4 x i32> %res4
2508 }
2509
2510 declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8)
2511
2512 define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
2513 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
2514 ; X86:       # %bb.0:
2515 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2516 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2517 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2518 ; X86-NEXT:    vpmovqd %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x35,0x00]
2519 ; X86-NEXT:    vpmovqd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0x00]
2520 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2521 ; X86-NEXT:    retl # encoding: [0xc3]
2522 ;
2523 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
2524 ; X64:       # %bb.0:
2525 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2526 ; X64-NEXT:    vpmovqd %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x35,0x07]
2527 ; X64-NEXT:    vpmovqd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0x07]
2528 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2529 ; X64-NEXT:    retq # encoding: [0xc3]
2530     call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
2531     call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
2532     ret void
2533 }
2534
2535 declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8)
2536
2537 define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2538 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
2539 ; X86:       # %bb.0:
2540 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2541 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2542 ; X86-NEXT:    vpmovsqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0xc1]
2543 ; X86-NEXT:    vpmovsqd %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x25,0xc2]
2544 ; X86-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
2545 ; X86-NEXT:    vpmovsqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x25,0xc0]
2546 ; X86-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
2547 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2548 ; X86-NEXT:    retl # encoding: [0xc3]
2549 ;
2550 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
2551 ; X64:       # %bb.0:
2552 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2553 ; X64-NEXT:    vpmovsqd %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x25,0xc2]
2554 ; X64-NEXT:    vpmovsqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0xc1]
2555 ; X64-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
2556 ; X64-NEXT:    vpmovsqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x25,0xc0]
2557 ; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
2558 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2559 ; X64-NEXT:    retq # encoding: [0xc3]
2560     %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
2561     %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
2562     %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2563     %res3 = add <4 x i32> %res0, %res1
2564     %res4 = add <4 x i32> %res3, %res2
2565     ret <4 x i32> %res4
2566 }
2567
2568 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8)
2569
2570 define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
2571 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
2572 ; X86:       # %bb.0:
2573 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2574 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2575 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2576 ; X86-NEXT:    vpmovsqd %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x25,0x00]
2577 ; X86-NEXT:    vpmovsqd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0x00]
2578 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2579 ; X86-NEXT:    retl # encoding: [0xc3]
2580 ;
2581 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
2582 ; X64:       # %bb.0:
2583 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2584 ; X64-NEXT:    vpmovsqd %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07]
2585 ; X64-NEXT:    vpmovsqd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0x07]
2586 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2587 ; X64-NEXT:    retq # encoding: [0xc3]
2588     call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
2589     call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
2590     ret void
2591 }
2592
2593 declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8)
2594
2595 define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2596 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
2597 ; X86:       # %bb.0:
2598 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2599 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2600 ; X86-NEXT:    vpmovusqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0xc1]
2601 ; X86-NEXT:    vpmovusqd %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x15,0xc2]
2602 ; X86-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
2603 ; X86-NEXT:    vpmovusqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x15,0xc0]
2604 ; X86-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
2605 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2606 ; X86-NEXT:    retl # encoding: [0xc3]
2607 ;
2608 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
2609 ; X64:       # %bb.0:
2610 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2611 ; X64-NEXT:    vpmovusqd %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x15,0xc2]
2612 ; X64-NEXT:    vpmovusqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0xc1]
2613 ; X64-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
2614 ; X64-NEXT:    vpmovusqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x15,0xc0]
2615 ; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
2616 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2617 ; X64-NEXT:    retq # encoding: [0xc3]
2618     %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
2619     %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
2620     %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2621     %res3 = add <4 x i32> %res0, %res1
2622     %res4 = add <4 x i32> %res3, %res2
2623     ret <4 x i32> %res4
2624 }
2625
2626 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8)
2627
2628 define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
2629 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
2630 ; X86:       # %bb.0:
2631 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2632 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2633 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2634 ; X86-NEXT:    vpmovusqd %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x15,0x00]
2635 ; X86-NEXT:    vpmovusqd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0x00]
2636 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2637 ; X86-NEXT:    retl # encoding: [0xc3]
2638 ;
2639 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
2640 ; X64:       # %bb.0:
2641 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2642 ; X64-NEXT:    vpmovusqd %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07]
2643 ; X64-NEXT:    vpmovusqd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0x07]
2644 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2645 ; X64-NEXT:    retq # encoding: [0xc3]
2646     call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
2647     call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
2648     ret void
2649 }
2650
2651 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8)
2652
2653 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2654 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_128:
2655 ; X86:       # %bb.0:
2656 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2657 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2658 ; X86-NEXT:    vpmovdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc2]
2659 ; X86-NEXT:    vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
2660 ; X86-NEXT:    vpmovdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc0]
2661 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2662 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2663 ; X86-NEXT:    retl # encoding: [0xc3]
2664 ;
2665 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_128:
2666 ; X64:       # %bb.0:
2667 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2668 ; X64-NEXT:    vpmovdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc2]
2669 ; X64-NEXT:    vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
2670 ; X64-NEXT:    vpmovdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc0]
2671 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2672 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2673 ; X64-NEXT:    retq # encoding: [0xc3]
2674     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
2675     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
2676     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2677     %res3 = add <16 x i8> %res0, %res1
2678     %res4 = add <16 x i8> %res3, %res2
2679     ret <16 x i8> %res4
2680 }
2681
2682 declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8)
2683
2684 define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
2685 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
2686 ; X86:       # %bb.0:
2687 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2688 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2689 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2690 ; X86-NEXT:    vpmovdb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x31,0x00]
2691 ; X86-NEXT:    vpmovdb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0x00]
2692 ; X86-NEXT:    retl # encoding: [0xc3]
2693 ;
2694 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
2695 ; X64:       # %bb.0:
2696 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2697 ; X64-NEXT:    vpmovdb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x31,0x07]
2698 ; X64-NEXT:    vpmovdb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0x07]
2699 ; X64-NEXT:    retq # encoding: [0xc3]
2700     call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
2701     call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
2702     ret void
2703 }
2704
2705 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8)
2706
2707 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2708 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
2709 ; X86:       # %bb.0:
2710 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2711 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2712 ; X86-NEXT:    vpmovsdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc2]
2713 ; X86-NEXT:    vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
2714 ; X86-NEXT:    vpmovsdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc0]
2715 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2716 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2717 ; X86-NEXT:    retl # encoding: [0xc3]
2718 ;
2719 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
2720 ; X64:       # %bb.0:
2721 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2722 ; X64-NEXT:    vpmovsdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc2]
2723 ; X64-NEXT:    vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
2724 ; X64-NEXT:    vpmovsdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc0]
2725 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2726 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2727 ; X64-NEXT:    retq # encoding: [0xc3]
2728     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
2729     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
2730     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2731     %res3 = add <16 x i8> %res0, %res1
2732     %res4 = add <16 x i8> %res3, %res2
2733     ret <16 x i8> %res4
2734 }
2735
2736 declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8)
2737
2738 define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
2739 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
2740 ; X86:       # %bb.0:
2741 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2742 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2743 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2744 ; X86-NEXT:    vpmovsdb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x21,0x00]
2745 ; X86-NEXT:    vpmovsdb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0x00]
2746 ; X86-NEXT:    retl # encoding: [0xc3]
2747 ;
2748 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
2749 ; X64:       # %bb.0:
2750 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2751 ; X64-NEXT:    vpmovsdb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07]
2752 ; X64-NEXT:    vpmovsdb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0x07]
2753 ; X64-NEXT:    retq # encoding: [0xc3]
2754     call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
2755     call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
2756     ret void
2757 }
2758
2759 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8)
2760
2761 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2762 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
2763 ; X86:       # %bb.0:
2764 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2765 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2766 ; X86-NEXT:    vpmovusdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc2]
2767 ; X86-NEXT:    vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
2768 ; X86-NEXT:    vpmovusdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc0]
2769 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2770 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2771 ; X86-NEXT:    retl # encoding: [0xc3]
2772 ;
2773 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
2774 ; X64:       # %bb.0:
2775 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2776 ; X64-NEXT:    vpmovusdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc2]
2777 ; X64-NEXT:    vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
2778 ; X64-NEXT:    vpmovusdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc0]
2779 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2780 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2781 ; X64-NEXT:    retq # encoding: [0xc3]
2782     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
2783     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
2784     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2785     %res3 = add <16 x i8> %res0, %res1
2786     %res4 = add <16 x i8> %res3, %res2
2787     ret <16 x i8> %res4
2788 }
2789
2790 declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8)
2791
2792 define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
2793 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
2794 ; X86:       # %bb.0:
2795 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2796 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2797 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2798 ; X86-NEXT:    vpmovusdb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x11,0x00]
2799 ; X86-NEXT:    vpmovusdb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0x00]
2800 ; X86-NEXT:    retl # encoding: [0xc3]
2801 ;
2802 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
2803 ; X64:       # %bb.0:
2804 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2805 ; X64-NEXT:    vpmovusdb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07]
2806 ; X64-NEXT:    vpmovusdb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0x07]
2807 ; X64-NEXT:    retq # encoding: [0xc3]
2808     call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
2809     call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
2810     ret void
2811 }
2812
2813 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8)
2814
2815 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2816 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_256:
2817 ; X86:       # %bb.0:
2818 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2819 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2820 ; X86-NEXT:    vpmovdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc2]
2821 ; X86-NEXT:    vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
2822 ; X86-NEXT:    vpmovdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc0]
2823 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2824 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2825 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2826 ; X86-NEXT:    retl # encoding: [0xc3]
2827 ;
2828 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_256:
2829 ; X64:       # %bb.0:
2830 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2831 ; X64-NEXT:    vpmovdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc2]
2832 ; X64-NEXT:    vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
2833 ; X64-NEXT:    vpmovdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc0]
2834 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2835 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2836 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2837 ; X64-NEXT:    retq # encoding: [0xc3]
2838     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
2839     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
2840     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2841     %res3 = add <16 x i8> %res0, %res1
2842     %res4 = add <16 x i8> %res3, %res2
2843     ret <16 x i8> %res4
2844 }
2845
2846 declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8)
2847
2848 define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
2849 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
2850 ; X86:       # %bb.0:
2851 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2852 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2853 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2854 ; X86-NEXT:    vpmovdb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x31,0x00]
2855 ; X86-NEXT:    vpmovdb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0x00]
2856 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2857 ; X86-NEXT:    retl # encoding: [0xc3]
2858 ;
2859 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
2860 ; X64:       # %bb.0:
2861 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2862 ; X64-NEXT:    vpmovdb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x31,0x07]
2863 ; X64-NEXT:    vpmovdb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0x07]
2864 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2865 ; X64-NEXT:    retq # encoding: [0xc3]
2866     call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
2867     call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
2868     ret void
2869 }
2870
2871 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8)
2872
2873 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2874 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
2875 ; X86:       # %bb.0:
2876 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2877 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2878 ; X86-NEXT:    vpmovsdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc2]
2879 ; X86-NEXT:    vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
2880 ; X86-NEXT:    vpmovsdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc0]
2881 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2882 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2883 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2884 ; X86-NEXT:    retl # encoding: [0xc3]
2885 ;
2886 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
2887 ; X64:       # %bb.0:
2888 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2889 ; X64-NEXT:    vpmovsdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc2]
2890 ; X64-NEXT:    vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
2891 ; X64-NEXT:    vpmovsdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc0]
2892 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2893 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2894 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2895 ; X64-NEXT:    retq # encoding: [0xc3]
2896     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
2897     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
2898     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2899     %res3 = add <16 x i8> %res0, %res1
2900     %res4 = add <16 x i8> %res3, %res2
2901     ret <16 x i8> %res4
2902 }
2903
2904 declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8)
2905
2906 define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
2907 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
2908 ; X86:       # %bb.0:
2909 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2910 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2911 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2912 ; X86-NEXT:    vpmovsdb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x21,0x00]
2913 ; X86-NEXT:    vpmovsdb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0x00]
2914 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2915 ; X86-NEXT:    retl # encoding: [0xc3]
2916 ;
2917 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
2918 ; X64:       # %bb.0:
2919 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2920 ; X64-NEXT:    vpmovsdb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07]
2921 ; X64-NEXT:    vpmovsdb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0x07]
2922 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2923 ; X64-NEXT:    retq # encoding: [0xc3]
2924     call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
2925     call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
2926     ret void
2927 }
2928
2929 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8)
2930
2931 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2932 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
2933 ; X86:       # %bb.0:
2934 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2935 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2936 ; X86-NEXT:    vpmovusdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc2]
2937 ; X86-NEXT:    vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
2938 ; X86-NEXT:    vpmovusdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc0]
2939 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2940 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2941 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2942 ; X86-NEXT:    retl # encoding: [0xc3]
2943 ;
2944 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
2945 ; X64:       # %bb.0:
2946 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2947 ; X64-NEXT:    vpmovusdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc2]
2948 ; X64-NEXT:    vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
2949 ; X64-NEXT:    vpmovusdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc0]
2950 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2951 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
2952 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2953 ; X64-NEXT:    retq # encoding: [0xc3]
2954     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
2955     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
2956     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2957     %res3 = add <16 x i8> %res0, %res1
2958     %res4 = add <16 x i8> %res3, %res2
2959     ret <16 x i8> %res4
2960 }
2961
2962 declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8)
2963
2964 define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
2965 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
2966 ; X86:       # %bb.0:
2967 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2968 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2969 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2970 ; X86-NEXT:    vpmovusdb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x11,0x00]
2971 ; X86-NEXT:    vpmovusdb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0x00]
2972 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2973 ; X86-NEXT:    retl # encoding: [0xc3]
2974 ;
2975 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
2976 ; X64:       # %bb.0:
2977 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2978 ; X64-NEXT:    vpmovusdb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07]
2979 ; X64-NEXT:    vpmovusdb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0x07]
2980 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2981 ; X64-NEXT:    retq # encoding: [0xc3]
2982     call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
2983     call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
2984     ret void
2985 }
2986
2987 declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8)
2988
2989 define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
2990 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
2991 ; X86:       # %bb.0:
2992 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2993 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2994 ; X86-NEXT:    vpmovdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc2]
2995 ; X86-NEXT:    vpmovdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1]
2996 ; X86-NEXT:    vpmovdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc0]
2997 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2998 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
2999 ; X86-NEXT:    retl # encoding: [0xc3]
3000 ;
3001 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
3002 ; X64:       # %bb.0:
3003 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3004 ; X64-NEXT:    vpmovdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc2]
3005 ; X64-NEXT:    vpmovdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1]
3006 ; X64-NEXT:    vpmovdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc0]
3007 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3008 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
3009 ; X64-NEXT:    retq # encoding: [0xc3]
3010     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3011     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3012     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3013     %res3 = add <8 x i16> %res0, %res1
3014     %res4 = add <8 x i16> %res3, %res2
3015     ret <8 x i16> %res4
3016 }
3017
3018 declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3019
3020 define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3021 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
3022 ; X86:       # %bb.0:
3023 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3024 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3025 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3026 ; X86-NEXT:    vpmovdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x00]
3027 ; X86-NEXT:    vpmovdw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0x00]
3028 ; X86-NEXT:    retl # encoding: [0xc3]
3029 ;
3030 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
3031 ; X64:       # %bb.0:
3032 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3033 ; X64-NEXT:    vpmovdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x07]
3034 ; X64-NEXT:    vpmovdw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0x07]
3035 ; X64-NEXT:    retq # encoding: [0xc3]
3036     call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3037     call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3038     ret void
3039 }
3040
3041 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8)
3042
3043 define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3044 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
3045 ; X86:       # %bb.0:
3046 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3047 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3048 ; X86-NEXT:    vpmovsdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc2]
3049 ; X86-NEXT:    vpmovsdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1]
3050 ; X86-NEXT:    vpmovsdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc0]
3051 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3052 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
3053 ; X86-NEXT:    retl # encoding: [0xc3]
3054 ;
3055 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
3056 ; X64:       # %bb.0:
3057 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3058 ; X64-NEXT:    vpmovsdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc2]
3059 ; X64-NEXT:    vpmovsdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1]
3060 ; X64-NEXT:    vpmovsdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc0]
3061 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3062 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
3063 ; X64-NEXT:    retq # encoding: [0xc3]
3064     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3065     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3066     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3067     %res3 = add <8 x i16> %res0, %res1
3068     %res4 = add <8 x i16> %res3, %res2
3069     ret <8 x i16> %res4
3070 }
3071
3072 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3073
3074 define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3075 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
3076 ; X86:       # %bb.0:
3077 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3078 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3079 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3080 ; X86-NEXT:    vpmovsdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x23,0x00]
3081 ; X86-NEXT:    vpmovsdw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0x00]
3082 ; X86-NEXT:    retl # encoding: [0xc3]
3083 ;
3084 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
3085 ; X64:       # %bb.0:
3086 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3087 ; X64-NEXT:    vpmovsdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07]
3088 ; X64-NEXT:    vpmovsdw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0x07]
3089 ; X64-NEXT:    retq # encoding: [0xc3]
3090     call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3091     call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3092     ret void
3093 }
3094
3095 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8)
3096
3097 define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3098 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
3099 ; X86:       # %bb.0:
3100 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3101 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3102 ; X86-NEXT:    vpmovusdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc2]
3103 ; X86-NEXT:    vpmovusdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1]
3104 ; X86-NEXT:    vpmovusdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc0]
3105 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3106 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
3107 ; X86-NEXT:    retl # encoding: [0xc3]
3108 ;
3109 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
3110 ; X64:       # %bb.0:
3111 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3112 ; X64-NEXT:    vpmovusdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc2]
3113 ; X64-NEXT:    vpmovusdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1]
3114 ; X64-NEXT:    vpmovusdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc0]
3115 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3116 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
3117 ; X64-NEXT:    retq # encoding: [0xc3]
3118     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3119     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3120     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3121     %res3 = add <8 x i16> %res0, %res1
3122     %res4 = add <8 x i16> %res3, %res2
3123     ret <8 x i16> %res4
3124 }
3125
3126 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3127
3128 define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3129 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
3130 ; X86:       # %bb.0:
3131 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3132 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3133 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3134 ; X86-NEXT:    vpmovusdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x13,0x00]
3135 ; X86-NEXT:    vpmovusdw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0x00]
3136 ; X86-NEXT:    retl # encoding: [0xc3]
3137 ;
3138 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
3139 ; X64:       # %bb.0:
3140 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3141 ; X64-NEXT:    vpmovusdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07]
3142 ; X64-NEXT:    vpmovusdw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0x07]
3143 ; X64-NEXT:    retq # encoding: [0xc3]
3144     call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3145     call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3146     ret void
3147 }
3148
3149 declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
3150
3151 define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3152 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
3153 ; X86:       # %bb.0:
3154 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3155 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3156 ; X86-NEXT:    vpmovdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc2]
3157 ; X86-NEXT:    vpmovdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1]
3158 ; X86-NEXT:    vpmovdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc0]
3159 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3160 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
3161 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3162 ; X86-NEXT:    retl # encoding: [0xc3]
3163 ;
3164 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
3165 ; X64:       # %bb.0:
3166 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3167 ; X64-NEXT:    vpmovdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc2]
3168 ; X64-NEXT:    vpmovdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1]
3169 ; X64-NEXT:    vpmovdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc0]
3170 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3171 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
3172 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3173 ; X64-NEXT:    retq # encoding: [0xc3]
3174     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3175     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3176     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3177     %res3 = add <8 x i16> %res0, %res1
3178     %res4 = add <8 x i16> %res3, %res2
3179     ret <8 x i16> %res4
3180 }
3181
3182 declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8)
3183
3184 define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3185 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
3186 ; X86:       # %bb.0:
3187 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3188 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3189 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3190 ; X86-NEXT:    vpmovdw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x33,0x00]
3191 ; X86-NEXT:    vpmovdw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0x00]
3192 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3193 ; X86-NEXT:    retl # encoding: [0xc3]
3194 ;
3195 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
3196 ; X64:       # %bb.0:
3197 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3198 ; X64-NEXT:    vpmovdw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x33,0x07]
3199 ; X64-NEXT:    vpmovdw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0x07]
3200 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3201 ; X64-NEXT:    retq # encoding: [0xc3]
3202     call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3203     call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3204     ret void
3205 }
3206
3207 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8)
3208
3209 define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3210 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
3211 ; X86:       # %bb.0:
3212 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3213 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3214 ; X86-NEXT:    vpmovsdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc2]
3215 ; X86-NEXT:    vpmovsdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1]
3216 ; X86-NEXT:    vpmovsdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc0]
3217 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3218 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
3219 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3220 ; X86-NEXT:    retl # encoding: [0xc3]
3221 ;
3222 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
3223 ; X64:       # %bb.0:
3224 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3225 ; X64-NEXT:    vpmovsdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc2]
3226 ; X64-NEXT:    vpmovsdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1]
3227 ; X64-NEXT:    vpmovsdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc0]
3228 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3229 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
3230 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3231 ; X64-NEXT:    retq # encoding: [0xc3]
3232     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3233     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3234     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3235     %res3 = add <8 x i16> %res0, %res1
3236     %res4 = add <8 x i16> %res3, %res2
3237     ret <8 x i16> %res4
3238 }
3239
3240 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8)
3241
3242 define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3243 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
3244 ; X86:       # %bb.0:
3245 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3246 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3247 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3248 ; X86-NEXT:    vpmovsdw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x23,0x00]
3249 ; X86-NEXT:    vpmovsdw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0x00]
3250 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3251 ; X86-NEXT:    retl # encoding: [0xc3]
3252 ;
3253 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
3254 ; X64:       # %bb.0:
3255 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3256 ; X64-NEXT:    vpmovsdw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07]
3257 ; X64-NEXT:    vpmovsdw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0x07]
3258 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3259 ; X64-NEXT:    retq # encoding: [0xc3]
3260     call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3261     call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3262     ret void
3263 }
3264
3265 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8)
3266
3267 define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3268 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
3269 ; X86:       # %bb.0:
3270 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3271 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3272 ; X86-NEXT:    vpmovusdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc2]
3273 ; X86-NEXT:    vpmovusdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1]
3274 ; X86-NEXT:    vpmovusdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc0]
3275 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3276 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
3277 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3278 ; X86-NEXT:    retl # encoding: [0xc3]
3279 ;
3280 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
3281 ; X64:       # %bb.0:
3282 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3283 ; X64-NEXT:    vpmovusdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc2]
3284 ; X64-NEXT:    vpmovusdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1]
3285 ; X64-NEXT:    vpmovusdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc0]
3286 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3287 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
3288 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3289 ; X64-NEXT:    retq # encoding: [0xc3]
3290     %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3291     %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3292     %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3293     %res3 = add <8 x i16> %res0, %res1
3294     %res4 = add <8 x i16> %res3, %res2
3295     ret <8 x i16> %res4
3296 }
3297
3298 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8)
3299
3300 define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3301 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
3302 ; X86:       # %bb.0:
3303 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3304 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3305 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3306 ; X86-NEXT:    vpmovusdw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x13,0x00]
3307 ; X86-NEXT:    vpmovusdw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0x00]
3308 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3309 ; X86-NEXT:    retl # encoding: [0xc3]
3310 ;
3311 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
3312 ; X64:       # %bb.0:
3313 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3314 ; X64-NEXT:    vpmovusdw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07]
3315 ; X64-NEXT:    vpmovusdw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0x07]
3316 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3317 ; X64-NEXT:    retq # encoding: [0xc3]
3318     call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3319     call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3320     ret void
3321 }
3322
3323 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
3324
3325 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3326 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
3327 ; X86:       # %bb.0:
3328 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3329 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3330 ; X86-NEXT:    vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3331 ; X86-NEXT:    vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
3332 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3333 ; X86-NEXT:    retl # encoding: [0xc3]
3334 ;
3335 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
3336 ; X64:       # %bb.0:
3337 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3338 ; X64-NEXT:    vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3339 ; X64-NEXT:    vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
3340 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3341 ; X64-NEXT:    retq # encoding: [0xc3]
3342   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3343   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3344   %res2 = add <4 x i32> %res, %res1
3345   ret <4 x i32> %res2
3346 }
3347
3348 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3349 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128_zext:
3350 ; X86:       # %bb.0:
3351 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3352 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3353 ; X86-NEXT:    vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3354 ; X86-NEXT:    vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
3355 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3356 ; X86-NEXT:    retl # encoding: [0xc3]
3357 ;
3358 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128_zext:
3359 ; X64:       # %bb.0:
3360 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3361 ; X64-NEXT:    vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3362 ; X64-NEXT:    vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
3363 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3364 ; X64-NEXT:    retq # encoding: [0xc3]
3365   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3366   %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3367   %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3368   %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3369   %res4 = add <4 x i32> %res1, %res3
3370   ret <4 x i32> %res4
3371 }
3372
3373 declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8)
3374
3375 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
3376 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
3377 ; X86:       # %bb.0:
3378 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3379 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3380 ; X86-NEXT:    vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3381 ; X86-NEXT:    vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
3382 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
3383 ; X86-NEXT:    retl # encoding: [0xc3]
3384 ;
3385 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
3386 ; X64:       # %bb.0:
3387 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3388 ; X64-NEXT:    vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3389 ; X64-NEXT:    vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
3390 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
3391 ; X64-NEXT:    retq # encoding: [0xc3]
3392   %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2)
3393   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1)
3394   %res2 = fadd <4 x float> %res, %res1
3395   ret <4 x float> %res2
3396 }
3397
3398 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_zext(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
3399 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_zext:
3400 ; X86:       # %bb.0:
3401 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3402 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3403 ; X86-NEXT:    vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3404 ; X86-NEXT:    vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
3405 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
3406 ; X86-NEXT:    retl # encoding: [0xc3]
3407 ;
3408 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_zext:
3409 ; X64:       # %bb.0:
3410 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3411 ; X64-NEXT:    vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3412 ; X64-NEXT:    vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
3413 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
3414 ; X64-NEXT:    retq # encoding: [0xc3]
3415   %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2)
3416   %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3417   %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1)
3418   %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3419   %res4 = fadd <4 x float> %res1, %res3
3420   ret <4 x float> %res4
3421 }
3422
3423 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i8)
3424
3425 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3426 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
3427 ; X86:       # %bb.0:
3428 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3429 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3430 ; X86-NEXT:    vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3431 ; X86-NEXT:    vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
3432 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3433 ; X86-NEXT:    retl # encoding: [0xc3]
3434 ;
3435 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
3436 ; X64:       # %bb.0:
3437 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3438 ; X64-NEXT:    vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3439 ; X64-NEXT:    vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
3440 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3441 ; X64-NEXT:    retq # encoding: [0xc3]
3442   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3443   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3444   %res2 = add <4 x i32> %res, %res1
3445   ret <4 x i32> %res2
3446 }
3447
3448 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3449 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128_zext:
3450 ; X86:       # %bb.0:
3451 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3452 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3453 ; X86-NEXT:    vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3454 ; X86-NEXT:    vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
3455 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3456 ; X86-NEXT:    retl # encoding: [0xc3]
3457 ;
3458 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128_zext:
3459 ; X64:       # %bb.0:
3460 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3461 ; X64-NEXT:    vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3462 ; X64-NEXT:    vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
3463 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3464 ; X64-NEXT:    retq # encoding: [0xc3]
3465   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3466   %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3467   %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3468   %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3469   %res4 = add <4 x i32> %res1, %res3
3470   ret <4 x i32> %res4
3471 }
3472
3473 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i8)
3474
3475 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
3476 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
3477 ; X86:       # %bb.0:
3478 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3479 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3480 ; X86-NEXT:    vcvtpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x79,0xc8]
3481 ; X86-NEXT:    vcvtpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x79,0xc0]
3482 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3483 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3484 ; X86-NEXT:    retl # encoding: [0xc3]
3485 ;
3486 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
3487 ; X64:       # %bb.0:
3488 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3489 ; X64-NEXT:    vcvtpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x79,0xc8]
3490 ; X64-NEXT:    vcvtpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x79,0xc0]
3491 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3492 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3493 ; X64-NEXT:    retq # encoding: [0xc3]
3494   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
3495   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
3496   %res2 = add <4 x i32> %res, %res1
3497   ret <4 x i32> %res2
3498 }
3499
3500 declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8)
3501
3502 define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
3503 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
3504 ; X86:       # %bb.0:
3505 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3506 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3507 ; X86-NEXT:    vcvtps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x5b,0xc8]
3508 ; X86-NEXT:    vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
3509 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3510 ; X86-NEXT:    retl # encoding: [0xc3]
3511 ;
3512 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
3513 ; X64:       # %bb.0:
3514 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3515 ; X64-NEXT:    vcvtps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x5b,0xc8]
3516 ; X64-NEXT:    vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
3517 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3518 ; X64-NEXT:    retq # encoding: [0xc3]
3519   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
3520   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
3521   %res2 = add <4 x i32> %res, %res1
3522   ret <4 x i32> %res2
3523 }
3524
3525 declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8)
3526
3527 define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
3528 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
3529 ; X86:       # %bb.0:
3530 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3531 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3532 ; X86-NEXT:    vcvtps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x5b,0xc8]
3533 ; X86-NEXT:    vcvtps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5b,0xc0]
3534 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
3535 ; X86-NEXT:    retl # encoding: [0xc3]
3536 ;
3537 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
3538 ; X64:       # %bb.0:
3539 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3540 ; X64-NEXT:    vcvtps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x5b,0xc8]
3541 ; X64-NEXT:    vcvtps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5b,0xc0]
3542 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
3543 ; X64-NEXT:    retq # encoding: [0xc3]
3544   %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
3545   %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
3546   %res2 = add <8 x i32> %res, %res1
3547   ret <8 x i32> %res2
3548 }
3549
3550 declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8)
3551
3552 define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
3553 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
3554 ; X86:       # %bb.0:
3555 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3556 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3557 ; X86-NEXT:    vcvtps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x79,0xc8]
3558 ; X86-NEXT:    vcvtps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x79,0xc0]
3559 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3560 ; X86-NEXT:    retl # encoding: [0xc3]
3561 ;
3562 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
3563 ; X64:       # %bb.0:
3564 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3565 ; X64-NEXT:    vcvtps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x79,0xc8]
3566 ; X64-NEXT:    vcvtps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x79,0xc0]
3567 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3568 ; X64-NEXT:    retq # encoding: [0xc3]
3569   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
3570   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
3571   %res2 = add <4 x i32> %res, %res1
3572   ret <4 x i32> %res2
3573 }
3574
3575 declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8)
3576
3577 define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
3578 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
3579 ; X86:       # %bb.0:
3580 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3581 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3582 ; X86-NEXT:    vcvtps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x79,0xc8]
3583 ; X86-NEXT:    vcvtps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x79,0xc0]
3584 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
3585 ; X86-NEXT:    retl # encoding: [0xc3]
3586 ;
3587 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
3588 ; X64:       # %bb.0:
3589 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3590 ; X64-NEXT:    vcvtps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x79,0xc8]
3591 ; X64-NEXT:    vcvtps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x79,0xc0]
3592 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
3593 ; X64-NEXT:    retq # encoding: [0xc3]
3594   %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
3595   %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
3596   %res2 = add <8 x i32> %res, %res1
3597   ret <8 x i32> %res2
3598 }
3599
3600 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i8)
3601
3602 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3603 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
3604 ; X86:       # %bb.0:
3605 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3606 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3607 ; X86-NEXT:    vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3608 ; X86-NEXT:    vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
3609 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3610 ; X86-NEXT:    retl # encoding: [0xc3]
3611 ;
3612 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
3613 ; X64:       # %bb.0:
3614 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3615 ; X64-NEXT:    vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3616 ; X64-NEXT:    vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
3617 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3618 ; X64-NEXT:    retq # encoding: [0xc3]
3619   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3620   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3621   %res2 = add <4 x i32> %res, %res1
3622   ret <4 x i32> %res2
3623 }
3624
3625 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3626 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128_zext:
3627 ; X86:       # %bb.0:
3628 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3629 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3630 ; X86-NEXT:    vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3631 ; X86-NEXT:    vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
3632 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3633 ; X86-NEXT:    retl # encoding: [0xc3]
3634 ;
3635 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128_zext:
3636 ; X64:       # %bb.0:
3637 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3638 ; X64-NEXT:    vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3639 ; X64-NEXT:    vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
3640 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3641 ; X64-NEXT:    retq # encoding: [0xc3]
3642   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3643   %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3644   %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3645   %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3646   %res4 = add <4 x i32> %res1, %res3
3647   ret <4 x i32> %res4
3648 }
3649
3650 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>, i8)
3651
3652 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3653 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
3654 ; X86:       # %bb.0:
3655 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3656 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3657 ; X86-NEXT:    vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3658 ; X86-NEXT:    vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
3659 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3660 ; X86-NEXT:    retl # encoding: [0xc3]
3661 ;
3662 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
3663 ; X64:       # %bb.0:
3664 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3665 ; X64-NEXT:    vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3666 ; X64-NEXT:    vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
3667 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3668 ; X64-NEXT:    retq # encoding: [0xc3]
3669   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3670   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3671   %res2 = add <4 x i32> %res, %res1
3672   ret <4 x i32> %res2
3673 }
3674
3675 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3676 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128_zext:
3677 ; X86:       # %bb.0:
3678 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3679 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3680 ; X86-NEXT:    vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3681 ; X86-NEXT:    vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
3682 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3683 ; X86-NEXT:    retl # encoding: [0xc3]
3684 ;
3685 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128_zext:
3686 ; X64:       # %bb.0:
3687 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3688 ; X64-NEXT:    vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3689 ; X64-NEXT:    vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
3690 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3691 ; X64-NEXT:    retq # encoding: [0xc3]
3692   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3693   %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3694   %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3695   %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3696   %res4 = add <4 x i32> %res1, %res3
3697   ret <4 x i32> %res4
3698 }
3699
3700 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8)
3701
3702 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
3703 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
3704 ; X86:       # %bb.0:
3705 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3706 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3707 ; X86-NEXT:    vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8]
3708 ; X86-NEXT:    vcvttpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0]
3709 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3710 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3711 ; X86-NEXT:    retl # encoding: [0xc3]
3712 ;
3713 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
3714 ; X64:       # %bb.0:
3715 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3716 ; X64-NEXT:    vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8]
3717 ; X64-NEXT:    vcvttpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0]
3718 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3719 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3720 ; X64-NEXT:    retq # encoding: [0xc3]
3721   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
3722   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
3723   %res2 = add <4 x i32> %res, %res1
3724   ret <4 x i32> %res2
3725 }
3726
3727 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8)
3728
3729 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
3730 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
3731 ; X86:       # %bb.0:
3732 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3733 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3734 ; X86-NEXT:    vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8]
3735 ; X86-NEXT:    vcvttps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0]
3736 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3737 ; X86-NEXT:    retl # encoding: [0xc3]
3738 ;
3739 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
3740 ; X64:       # %bb.0:
3741 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3742 ; X64-NEXT:    vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8]
3743 ; X64-NEXT:    vcvttps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0]
3744 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3745 ; X64-NEXT:    retq # encoding: [0xc3]
3746   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
3747   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
3748   %res2 = add <4 x i32> %res, %res1
3749   ret <4 x i32> %res2
3750 }
3751
3752 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
3753
3754 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
3755 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
3756 ; X86:       # %bb.0:
3757 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3758 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3759 ; X86-NEXT:    vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8]
3760 ; X86-NEXT:    vcvttps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0]
3761 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
3762 ; X86-NEXT:    retl # encoding: [0xc3]
3763 ;
3764 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
3765 ; X64:       # %bb.0:
3766 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3767 ; X64-NEXT:    vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8]
3768 ; X64-NEXT:    vcvttps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0]
3769 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
3770 ; X64-NEXT:    retq # encoding: [0xc3]
3771   %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
3772   %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
3773   %res2 = add <8 x i32> %res, %res1
3774   ret <8 x i32> %res2
3775 }
3776
3777 declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
3778
3779 define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
3780 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_pd_128:
3781 ; X86:       # %bb.0:
3782 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3783 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3784 ; X86-NEXT:    vrndscalepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x09,0xc8,0x04]
3785 ; X86-NEXT:    vrndscalepd $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x09,0xc0,0x58]
3786 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
3787 ; X86-NEXT:    retl # encoding: [0xc3]
3788 ;
3789 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_pd_128:
3790 ; X64:       # %bb.0:
3791 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3792 ; X64-NEXT:    vrndscalepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x09,0xc8,0x04]
3793 ; X64-NEXT:    vrndscalepd $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x09,0xc0,0x58]
3794 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
3795 ; X64-NEXT:    retq # encoding: [0xc3]
3796   %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
3797   %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)
3798   %res2 = fadd <2 x double> %res, %res1
3799   ret <2 x double> %res2
3800 }
3801
3802 declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
3803
3804 define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
3805 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_pd_256:
3806 ; X86:       # %bb.0:
3807 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3808 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3809 ; X86-NEXT:    vrndscalepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x09,0xc8,0x04]
3810 ; X86-NEXT:    vrndscalepd $88, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x09,0xc0,0x58]
3811 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
3812 ; X86-NEXT:    retl # encoding: [0xc3]
3813 ;
3814 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_pd_256:
3815 ; X64:       # %bb.0:
3816 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3817 ; X64-NEXT:    vrndscalepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x09,0xc8,0x04]
3818 ; X64-NEXT:    vrndscalepd $88, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x09,0xc0,0x58]
3819 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
3820 ; X64-NEXT:    retq # encoding: [0xc3]
3821   %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
3822   %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)
3823   %res2 = fadd <4 x double> %res, %res1
3824   ret <4 x double> %res2
3825 }
3826
3827 declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
3828
3829 define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
3830 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_ps_128:
3831 ; X86:       # %bb.0:
3832 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3833 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3834 ; X86-NEXT:    vrndscaleps $88, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x08,0xc8,0x58]
3835 ; X86-NEXT:    vroundps $4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x04]
3836 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
3837 ; X86-NEXT:    retl # encoding: [0xc3]
3838 ;
3839 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_ps_128:
3840 ; X64:       # %bb.0:
3841 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3842 ; X64-NEXT:    vrndscaleps $88, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x08,0xc8,0x58]
3843 ; X64-NEXT:    vroundps $4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x04]
3844 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
3845 ; X64-NEXT:    retq # encoding: [0xc3]
3846   %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)
3847   %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)
3848   %res2 = fadd <4 x float> %res, %res1
3849   ret <4 x float> %res2
3850 }
3851
3852 declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
3853
3854 define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
3855 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_ps_256:
3856 ; X86:       # %bb.0:
3857 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3858 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3859 ; X86-NEXT:    vrndscaleps $5, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x08,0xc8,0x05]
3860 ; X86-NEXT:    vrndscaleps $66, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x08,0xc0,0x42]
3861 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
3862 ; X86-NEXT:    retl # encoding: [0xc3]
3863 ;
3864 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_ps_256:
3865 ; X64:       # %bb.0:
3866 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3867 ; X64-NEXT:    vrndscaleps $5, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x08,0xc8,0x05]
3868 ; X64-NEXT:    vrndscaleps $66, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x08,0xc0,0x42]
3869 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
3870 ; X64-NEXT:    retq # encoding: [0xc3]
3871   %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)
3872   %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)
3873   %res2 = fadd <8 x float> %res, %res1
3874   ret <8 x float> %res2
3875 }
3876
3877 declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8)
3878
3879 define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
3880 ; X86-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
3881 ; X86:       # %bb.0:
3882 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3883 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3884 ; X86-NEXT:    vgetmantpd $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x26,0xc8,0x0b]
3885 ; X86-NEXT:    vgetmantpd $11, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x26,0xd0,0x0b]
3886 ; X86-NEXT:    vgetmantpd $11, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x26,0xc0,0x0b]
3887 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
3888 ; X86-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
3889 ; X86-NEXT:    retl # encoding: [0xc3]
3890 ;
3891 ; X64-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
3892 ; X64:       # %bb.0:
3893 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3894 ; X64-NEXT:    vgetmantpd $11, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x26,0xd0,0x0b]
3895 ; X64-NEXT:    vgetmantpd $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x26,0xc8,0x0b]
3896 ; X64-NEXT:    vgetmantpd $11, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x26,0xc0,0x0b]
3897 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
3898 ; X64-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
3899 ; X64-NEXT:    retq # encoding: [0xc3]
3900   %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %x3)
3901   %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> zeroinitializer, i8 %x3)
3902   %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 -1)
3903   %res3 = fadd <2 x double> %res, %res1
3904   %res4 = fadd <2 x double> %res2, %res3
3905   ret <2 x double> %res4
3906 }
3907
3908 declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8)
3909
3910 define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
3911 ; X86-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
3912 ; X86:       # %bb.0:
3913 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3914 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3915 ; X86-NEXT:    vgetmantpd $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x26,0xc8,0x0b]
3916 ; X86-NEXT:    vgetmantpd $11, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x26,0xc0,0x0b]
3917 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
3918 ; X86-NEXT:    retl # encoding: [0xc3]
3919 ;
3920 ; X64-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
3921 ; X64:       # %bb.0:
3922 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3923 ; X64-NEXT:    vgetmantpd $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x26,0xc8,0x0b]
3924 ; X64-NEXT:    vgetmantpd $11, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x26,0xc0,0x0b]
3925 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
3926 ; X64-NEXT:    retq # encoding: [0xc3]
3927   %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %x3)
3928   %res1 = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 -1)
3929   %res2 = fadd <4 x double> %res, %res1
3930   ret <4 x double> %res2
3931 }
3932
3933 declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x float>, i8)
3934
3935 define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
3936 ; X86-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
3937 ; X86:       # %bb.0:
3938 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3939 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3940 ; X86-NEXT:    vgetmantps $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x26,0xc8,0x0b]
3941 ; X86-NEXT:    vgetmantps $11, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x26,0xc0,0x0b]
3942 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
3943 ; X86-NEXT:    retl # encoding: [0xc3]
3944 ;
3945 ; X64-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
3946 ; X64:       # %bb.0:
3947 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3948 ; X64-NEXT:    vgetmantps $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x26,0xc8,0x0b]
3949 ; X64-NEXT:    vgetmantps $11, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x26,0xc0,0x0b]
3950 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
3951 ; X64-NEXT:    retq # encoding: [0xc3]
3952   %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 %x3)
3953   %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 -1)
3954   %res2 = fadd <4 x float> %res, %res1
3955   ret <4 x float> %res2
3956 }
3957
3958 declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x float>, i8)
3959
3960 define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
3961 ; X86-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
3962 ; X86:       # %bb.0:
3963 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3964 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3965 ; X86-NEXT:    vgetmantps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x26,0xc8,0x0b]
3966 ; X86-NEXT:    vgetmantps $11, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x26,0xc0,0x0b]
3967 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
3968 ; X86-NEXT:    retl # encoding: [0xc3]
3969 ;
3970 ; X64-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
3971 ; X64:       # %bb.0:
3972 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3973 ; X64-NEXT:    vgetmantps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x26,0xc8,0x0b]
3974 ; X64-NEXT:    vgetmantps $11, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x26,0xc0,0x0b]
3975 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
3976 ; X64-NEXT:    retq # encoding: [0xc3]
3977   %res = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
3978   %res1 = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
3979   %res2 = fadd <8 x float> %res, %res1
3980   ret <8 x float> %res2
3981 }
3982
3983 declare <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32)
3984
3985 define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
3986 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
3987 ; X86:       # %bb.0:
3988 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
3989 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
3990 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3991 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3992 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
3993 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
3994 ; X86-NEXT:    retl # encoding: [0xc3]
3995 ;
3996 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
3997 ; X64:       # %bb.0:
3998 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
3999 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
4000 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4001 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
4002 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
4003 ; X64-NEXT:    retq # encoding: [0xc3]
4004   %1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
4005   %2 = bitcast i8 %x4 to <8 x i1>
4006   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4007   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
4008   %4 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
4009   %res2 = add <4 x i32> %3, %4
4010   ret <4 x i32> %res2
4011 }
4012
4013 declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
4014
4015 define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
4016 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
4017 ; X86:       # %bb.0:
4018 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
4019 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
4020 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4021 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4022 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
4023 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
4024 ; X86-NEXT:    retl # encoding: [0xc3]
4025 ;
4026 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
4027 ; X64:       # %bb.0:
4028 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
4029 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
4030 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4031 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
4032 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
4033 ; X64-NEXT:    retq # encoding: [0xc3]
4034   %1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
4035   %2 = bitcast i8 %x4 to <8 x i1>
4036   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4037   %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
4038   %4 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
4039   %res2 = add <4 x i32> %3, %4
4040   ret <4 x i32> %res2
4041 }
4042
4043 declare <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32)
4044
4045 define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
4046 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
4047 ; X86:       # %bb.0:
4048 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
4049 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
4050 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4051 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4052 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
4053 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
4054 ; X86-NEXT:    retl # encoding: [0xc3]
4055 ;
4056 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
4057 ; X64:       # %bb.0:
4058 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
4059 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
4060 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4061 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
4062 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
4063 ; X64-NEXT:    retq # encoding: [0xc3]
4064   %1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
4065   %2 = bitcast i8 %x4 to <8 x i1>
4066   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
4067   %4 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
4068   %res2 = add <8 x i32> %3, %4
4069   ret <8 x i32> %res2
4070 }
4071
4072 declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
4073
4074 define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
4075 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
4076 ; X86:       # %bb.0:
4077 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
4078 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
4079 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4080 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4081 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
4082 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
4083 ; X86-NEXT:    retl # encoding: [0xc3]
4084 ;
4085 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
4086 ; X64:       # %bb.0:
4087 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
4088 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
4089 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4090 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
4091 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
4092 ; X64-NEXT:    retq # encoding: [0xc3]
4093   %1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
4094   %2 = bitcast i8 %x4 to <8 x i1>
4095   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
4096   %4 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
4097   %res2 = add <8 x i32> %3, %4
4098   ret <8 x i32> %res2
4099 }
4100
4101 declare <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32)
4102
4103 define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
4104 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
4105 ; X86:       # %bb.0:
4106 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
4107 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
4108 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4109 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4110 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
4111 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
4112 ; X86-NEXT:    retl # encoding: [0xc3]
4113 ;
4114 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
4115 ; X64:       # %bb.0:
4116 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
4117 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
4118 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4119 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
4120 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
4121 ; X64-NEXT:    retq # encoding: [0xc3]
4122   %1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
4123   %2 = bitcast i8 %x4 to <8 x i1>
4124   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
4125   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
4126   %4 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
4127   %res2 = add <2 x i64> %3, %4
4128   ret <2 x i64> %res2
4129 }
4130
4131 define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
4132 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
4133 ; X86:       # %bb.0:
4134 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
4135 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
4136 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4137 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4138 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
4139 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
4140 ; X86-NEXT:    retl # encoding: [0xc3]
4141 ;
4142 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
4143 ; X64:       # %bb.0:
4144 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
4145 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
4146 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4147 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
4148 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
4149 ; X64-NEXT:    retq # encoding: [0xc3]
4150   %1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
4151   %2 = bitcast i8 %x4 to <8 x i1>
4152   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
4153   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> zeroinitializer
4154   %4 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
4155   %res2 = add <2 x i64> %3, %4
4156   ret <2 x i64> %res2
4157 }
4158
4159 declare <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32)
4160
4161 define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
4162 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
4163 ; X86:       # %bb.0:
4164 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
4165 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
4166 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4167 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4168 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
4169 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
4170 ; X86-NEXT:    retl # encoding: [0xc3]
4171 ;
4172 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
4173 ; X64:       # %bb.0:
4174 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
4175 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
4176 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4177 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
4178 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
4179 ; X64-NEXT:    retq # encoding: [0xc3]
4180   %1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
4181   %2 = bitcast i8 %x4 to <8 x i1>
4182   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4183   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
4184   %4 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
4185   %res2 = add <4 x i64> %3, %4
4186   ret <4 x i64> %res2
4187 }
4188
4189 define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
4190 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
4191 ; X86:       # %bb.0:
4192 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
4193 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
4194 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4195 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4196 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
4197 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
4198 ; X86-NEXT:    retl # encoding: [0xc3]
4199 ;
4200 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
4201 ; X64:       # %bb.0:
4202 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
4203 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
4204 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4205 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
4206 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
4207 ; X64-NEXT:    retq # encoding: [0xc3]
4208   %1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
4209   %2 = bitcast i8 %x4 to <8 x i1>
4210   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4211   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> zeroinitializer
4212   %4 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
4213   %res2 = add <4 x i64> %3, %4
4214   ret <4 x i64> %res2
4215 }
4216
4217 define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
4218 ; CHECK-LABEL: test_x86_vcvtph2ps_128:
4219 ; CHECK:       # %bb.0:
4220 ; CHECK-NEXT:    vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
4221 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4222   %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1)
4223   ret <4 x float> %res
4224 }
4225
4226 define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) {
4227 ; X86-LABEL: test_x86_vcvtph2ps_128_rrk:
4228 ; X86:       # %bb.0:
4229 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4230 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4231 ; X86-NEXT:    vcvtph2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8]
4232 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4233 ; X86-NEXT:    retl # encoding: [0xc3]
4234 ;
4235 ; X64-LABEL: test_x86_vcvtph2ps_128_rrk:
4236 ; X64:       # %bb.0:
4237 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4238 ; X64-NEXT:    vcvtph2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8]
4239 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4240 ; X64-NEXT:    retq # encoding: [0xc3]
4241   %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask)
4242   ret <4 x float> %res
4243 }
4244
4245
4246 define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) {
4247 ; X86-LABEL: test_x86_vcvtph2ps_128_rrkz:
4248 ; X86:       # %bb.0:
4249 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4250 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4251 ; X86-NEXT:    vcvtph2ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0]
4252 ; X86-NEXT:    retl # encoding: [0xc3]
4253 ;
4254 ; X64-LABEL: test_x86_vcvtph2ps_128_rrkz:
4255 ; X64:       # %bb.0:
4256 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4257 ; X64-NEXT:    vcvtph2ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0]
4258 ; X64-NEXT:    retq # encoding: [0xc3]
4259   %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask)
4260   ret <4 x float> %res
4261 }
4262
4263 declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly
4264
4265 define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
4266 ; CHECK-LABEL: test_x86_vcvtph2ps_256:
4267 ; CHECK:       # %bb.0:
4268 ; CHECK-NEXT:    vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
4269 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4270   %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1)
4271   ret <8 x float> %res
4272 }
4273
4274 define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) {
4275 ; X86-LABEL: test_x86_vcvtph2ps_256_rrk:
4276 ; X86:       # %bb.0:
4277 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4278 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4279 ; X86-NEXT:    vcvtph2ps %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8]
4280 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4281 ; X86-NEXT:    retl # encoding: [0xc3]
4282 ;
4283 ; X64-LABEL: test_x86_vcvtph2ps_256_rrk:
4284 ; X64:       # %bb.0:
4285 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4286 ; X64-NEXT:    vcvtph2ps %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8]
4287 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4288 ; X64-NEXT:    retq # encoding: [0xc3]
4289   %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask)
4290   ret <8 x float> %res
4291 }
4292
4293 define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) {
4294 ; X86-LABEL: test_x86_vcvtph2ps_256_rrkz:
4295 ; X86:       # %bb.0:
4296 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4297 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4298 ; X86-NEXT:    vcvtph2ps %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0]
4299 ; X86-NEXT:    retl # encoding: [0xc3]
4300 ;
4301 ; X64-LABEL: test_x86_vcvtph2ps_256_rrkz:
4302 ; X64:       # %bb.0:
4303 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4304 ; X64-NEXT:    vcvtph2ps %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0]
4305 ; X64-NEXT:    retq # encoding: [0xc3]
4306   %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask)
4307   ret <8 x float> %res
4308 }
4309
4310 declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly
4311
4312 define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %src) {
4313 ; X86-LABEL: test_x86_vcvtps2ph_128:
4314 ; X86:       # %bb.0:
4315 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4316 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4317 ; X86-NEXT:    vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
4318 ; X86-NEXT:    vcvtps2ph $2, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x02]
4319 ; X86-NEXT:    vcvtps2ph $2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x02]
4320 ; X86-NEXT:    vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
4321 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
4322 ; X86-NEXT:    retl # encoding: [0xc3]
4323 ;
4324 ; X64-LABEL: test_x86_vcvtps2ph_128:
4325 ; X64:       # %bb.0:
4326 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4327 ; X64-NEXT:    vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
4328 ; X64-NEXT:    vcvtps2ph $2, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x02]
4329 ; X64-NEXT:    vcvtps2ph $2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x02]
4330 ; X64-NEXT:    vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
4331 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
4332 ; X64-NEXT:    retq # encoding: [0xc3]
4333   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
4334   %res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 %mask)
4335   %res3 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> %src, i8 %mask)
4336   %res0 = add <8 x i16> %res1, %res2
4337   %res = add <8 x i16> %res3, %res0
4338   ret <8 x i16> %res
4339 }
4340
4341 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float>, i32, <8 x i16>, i8) nounwind readonly
4342
4343 define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %src) {
4344 ; X86-LABEL: test_x86_vcvtps2ph_256:
4345 ; X86:       # %bb.0:
4346 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4347 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4348 ; X86-NEXT:    vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
4349 ; X86-NEXT:    vcvtps2ph $2, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x02]
4350 ; X86-NEXT:    vcvtps2ph $2, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x02]
4351 ; X86-NEXT:    vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
4352 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
4353 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
4354 ; X86-NEXT:    retl # encoding: [0xc3]
4355 ;
4356 ; X64-LABEL: test_x86_vcvtps2ph_256:
4357 ; X64:       # %bb.0:
4358 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4359 ; X64-NEXT:    vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
4360 ; X64-NEXT:    vcvtps2ph $2, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x02]
4361 ; X64-NEXT:    vcvtps2ph $2, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x02]
4362 ; X64-NEXT:    vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
4363 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
4364 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
4365 ; X64-NEXT:    retq # encoding: [0xc3]
4366   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
4367   %res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 %mask)
4368   %res3 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> %src, i8 %mask)
4369   %res0 = add <8 x i16> %res1, %res2
4370   %res = add <8 x i16> %res3, %res0
4371   ret <8 x i16> %res
4372 }
4373
4374 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly
4375
4376 define <8 x float> @test_rsqrt_ps_256_rr(<8 x float> %a0) {
4377 ; CHECK-LABEL: test_rsqrt_ps_256_rr:
4378 ; CHECK:       # %bb.0:
4379 ; CHECK-NEXT:    vrsqrt14ps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x4e,0xc0]
4380 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4381   %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
4382   ret <8 x float> %res
4383 }
4384
4385 define <8 x float> @test_rsqrt_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
4386 ; X86-LABEL: test_rsqrt_ps_256_rrkz:
4387 ; X86:       # %bb.0:
4388 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4389 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4390 ; X86-NEXT:    vrsqrt14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4e,0xc0]
4391 ; X86-NEXT:    retl # encoding: [0xc3]
4392 ;
4393 ; X64-LABEL: test_rsqrt_ps_256_rrkz:
4394 ; X64:       # %bb.0:
4395 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4396 ; X64-NEXT:    vrsqrt14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4e,0xc0]
4397 ; X64-NEXT:    retq # encoding: [0xc3]
4398   %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
4399   ret <8 x float> %res
4400 }
4401
4402 define <8 x float> @test_rsqrt_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4403 ; X86-LABEL: test_rsqrt_ps_256_rrk:
4404 ; X86:       # %bb.0:
4405 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4406 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4407 ; X86-NEXT:    vrsqrt14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4e,0xc8]
4408 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4409 ; X86-NEXT:    retl # encoding: [0xc3]
4410 ;
4411 ; X64-LABEL: test_rsqrt_ps_256_rrk:
4412 ; X64:       # %bb.0:
4413 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4414 ; X64-NEXT:    vrsqrt14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4e,0xc8]
4415 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4416 ; X64-NEXT:    retq # encoding: [0xc3]
4417   %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
4418   ret <8 x float> %res
4419 }
4420
4421 define <4 x float> @test_rsqrt_ps_128_rr(<4 x float> %a0) {
4422 ; CHECK-LABEL: test_rsqrt_ps_128_rr:
4423 ; CHECK:       # %bb.0:
4424 ; CHECK-NEXT:    vrsqrt14ps %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x4e,0xc0]
4425 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4426   %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
4427   ret <4 x float> %res
4428 }
4429
4430 define <4 x float> @test_rsqrt_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
4431 ; X86-LABEL: test_rsqrt_ps_128_rrkz:
4432 ; X86:       # %bb.0:
4433 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4434 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4435 ; X86-NEXT:    vrsqrt14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4e,0xc0]
4436 ; X86-NEXT:    retl # encoding: [0xc3]
4437 ;
4438 ; X64-LABEL: test_rsqrt_ps_128_rrkz:
4439 ; X64:       # %bb.0:
4440 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4441 ; X64-NEXT:    vrsqrt14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4e,0xc0]
4442 ; X64-NEXT:    retq # encoding: [0xc3]
4443   %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
4444   ret <4 x float> %res
4445 }
4446
4447 define <4 x float> @test_rsqrt_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4448 ; X86-LABEL: test_rsqrt_ps_128_rrk:
4449 ; X86:       # %bb.0:
4450 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4451 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4452 ; X86-NEXT:    vrsqrt14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4e,0xc8]
4453 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4454 ; X86-NEXT:    retl # encoding: [0xc3]
4455 ;
4456 ; X64-LABEL: test_rsqrt_ps_128_rrk:
4457 ; X64:       # %bb.0:
4458 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4459 ; X64-NEXT:    vrsqrt14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4e,0xc8]
4460 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4461 ; X64-NEXT:    retq # encoding: [0xc3]
4462   %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
4463   ret <4 x float> %res
4464 }
4465
4466 declare <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
4467 declare <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
4468
4469 define <8 x float> @test_rcp_ps_256_rr(<8 x float> %a0) {
4470 ; CHECK-LABEL: test_rcp_ps_256_rr:
4471 ; CHECK:       # %bb.0:
4472 ; CHECK-NEXT:    vrcp14ps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x4c,0xc0]
4473 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4474   %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
4475   ret <8 x float> %res
4476 }
4477
4478 define <8 x float> @test_rcp_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
4479 ; X86-LABEL: test_rcp_ps_256_rrkz:
4480 ; X86:       # %bb.0:
4481 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4482 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4483 ; X86-NEXT:    vrcp14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4c,0xc0]
4484 ; X86-NEXT:    retl # encoding: [0xc3]
4485 ;
4486 ; X64-LABEL: test_rcp_ps_256_rrkz:
4487 ; X64:       # %bb.0:
4488 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4489 ; X64-NEXT:    vrcp14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4c,0xc0]
4490 ; X64-NEXT:    retq # encoding: [0xc3]
4491   %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
4492   ret <8 x float> %res
4493 }
4494
4495 define <8 x float> @test_rcp_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4496 ; X86-LABEL: test_rcp_ps_256_rrk:
4497 ; X86:       # %bb.0:
4498 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4499 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4500 ; X86-NEXT:    vrcp14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4c,0xc8]
4501 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4502 ; X86-NEXT:    retl # encoding: [0xc3]
4503 ;
4504 ; X64-LABEL: test_rcp_ps_256_rrk:
4505 ; X64:       # %bb.0:
4506 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4507 ; X64-NEXT:    vrcp14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4c,0xc8]
4508 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4509 ; X64-NEXT:    retq # encoding: [0xc3]
4510   %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
4511   ret <8 x float> %res
4512 }
4513
4514 define <4 x float> @test_rcp_ps_128_rr(<4 x float> %a0) {
4515 ; CHECK-LABEL: test_rcp_ps_128_rr:
4516 ; CHECK:       # %bb.0:
4517 ; CHECK-NEXT:    vrcp14ps %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x4c,0xc0]
4518 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4519   %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
4520   ret <4 x float> %res
4521 }
4522
4523 define <4 x float> @test_rcp_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
4524 ; X86-LABEL: test_rcp_ps_128_rrkz:
4525 ; X86:       # %bb.0:
4526 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4527 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4528 ; X86-NEXT:    vrcp14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4c,0xc0]
4529 ; X86-NEXT:    retl # encoding: [0xc3]
4530 ;
4531 ; X64-LABEL: test_rcp_ps_128_rrkz:
4532 ; X64:       # %bb.0:
4533 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4534 ; X64-NEXT:    vrcp14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4c,0xc0]
4535 ; X64-NEXT:    retq # encoding: [0xc3]
4536   %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
4537   ret <4 x float> %res
4538 }
4539
4540 define <4 x float> @test_rcp_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4541 ; X86-LABEL: test_rcp_ps_128_rrk:
4542 ; X86:       # %bb.0:
4543 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4544 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4545 ; X86-NEXT:    vrcp14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4c,0xc8]
4546 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4547 ; X86-NEXT:    retl # encoding: [0xc3]
4548 ;
4549 ; X64-LABEL: test_rcp_ps_128_rrk:
4550 ; X64:       # %bb.0:
4551 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4552 ; X64-NEXT:    vrcp14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4c,0xc8]
4553 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4554 ; X64-NEXT:    retq # encoding: [0xc3]
4555   %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
4556   ret <4 x float> %res
4557 }
4558
4559 declare <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
4560 declare <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
4561
4562 define <4 x double> @test_rsqrt_pd_256_rr(<4 x double> %a0) {
4563 ; CHECK-LABEL: test_rsqrt_pd_256_rr:
4564 ; CHECK:       # %bb.0:
4565 ; CHECK-NEXT:    vrsqrt14pd %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x4e,0xc0]
4566 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4567   %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
4568   ret <4 x double> %res
4569 }
4570
4571 define <4 x double> @test_rsqrt_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
4572 ; X86-LABEL: test_rsqrt_pd_256_rrkz:
4573 ; X86:       # %bb.0:
4574 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4575 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4576 ; X86-NEXT:    vrsqrt14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4e,0xc0]
4577 ; X86-NEXT:    retl # encoding: [0xc3]
4578 ;
4579 ; X64-LABEL: test_rsqrt_pd_256_rrkz:
4580 ; X64:       # %bb.0:
4581 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4582 ; X64-NEXT:    vrsqrt14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4e,0xc0]
4583 ; X64-NEXT:    retq # encoding: [0xc3]
4584   %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
4585   ret <4 x double> %res
4586 }
4587
4588 define <4 x double> @test_rsqrt_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
4589 ; X86-LABEL: test_rsqrt_pd_256_rrk:
4590 ; X86:       # %bb.0:
4591 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4592 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4593 ; X86-NEXT:    vrsqrt14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4e,0xc8]
4594 ; X86-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4595 ; X86-NEXT:    retl # encoding: [0xc3]
4596 ;
4597 ; X64-LABEL: test_rsqrt_pd_256_rrk:
4598 ; X64:       # %bb.0:
4599 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4600 ; X64-NEXT:    vrsqrt14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4e,0xc8]
4601 ; X64-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4602 ; X64-NEXT:    retq # encoding: [0xc3]
4603   %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
4604   ret <4 x double> %res
4605 }
4606
4607 define <2 x double> @test_rsqrt_pd_128_rr(<2 x double> %a0) {
4608 ; CHECK-LABEL: test_rsqrt_pd_128_rr:
4609 ; CHECK:       # %bb.0:
4610 ; CHECK-NEXT:    vrsqrt14pd %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x4e,0xc0]
4611 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4612   %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
4613   ret <2 x double> %res
4614 }
4615
4616 define <2 x double> @test_rsqrt_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
4617 ; X86-LABEL: test_rsqrt_pd_128_rrkz:
4618 ; X86:       # %bb.0:
4619 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4620 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4621 ; X86-NEXT:    vrsqrt14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4e,0xc0]
4622 ; X86-NEXT:    retl # encoding: [0xc3]
4623 ;
4624 ; X64-LABEL: test_rsqrt_pd_128_rrkz:
4625 ; X64:       # %bb.0:
4626 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4627 ; X64-NEXT:    vrsqrt14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4e,0xc0]
4628 ; X64-NEXT:    retq # encoding: [0xc3]
4629   %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
4630   ret <2 x double> %res
4631 }
4632
4633 define <2 x double> @test_rsqrt_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4634 ; X86-LABEL: test_rsqrt_pd_128_rrk:
4635 ; X86:       # %bb.0:
4636 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4637 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4638 ; X86-NEXT:    vrsqrt14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4e,0xc8]
4639 ; X86-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4640 ; X86-NEXT:    retl # encoding: [0xc3]
4641 ;
4642 ; X64-LABEL: test_rsqrt_pd_128_rrk:
4643 ; X64:       # %bb.0:
4644 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4645 ; X64-NEXT:    vrsqrt14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4e,0xc8]
4646 ; X64-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4647 ; X64-NEXT:    retq # encoding: [0xc3]
4648   %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
4649   ret <2 x double> %res
4650 }
4651
4652 declare <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
4653 declare <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
4654
4655 define <4 x double> @test_rcp_pd_256_rr(<4 x double> %a0) {
4656 ; CHECK-LABEL: test_rcp_pd_256_rr:
4657 ; CHECK:       # %bb.0:
4658 ; CHECK-NEXT:    vrcp14pd %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x4c,0xc0]
4659 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4660   %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
4661   ret <4 x double> %res
4662 }
4663
4664 define <4 x double> @test_rcp_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
4665 ; X86-LABEL: test_rcp_pd_256_rrkz:
4666 ; X86:       # %bb.0:
4667 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4668 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4669 ; X86-NEXT:    vrcp14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4c,0xc0]
4670 ; X86-NEXT:    retl # encoding: [0xc3]
4671 ;
4672 ; X64-LABEL: test_rcp_pd_256_rrkz:
4673 ; X64:       # %bb.0:
4674 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4675 ; X64-NEXT:    vrcp14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4c,0xc0]
4676 ; X64-NEXT:    retq # encoding: [0xc3]
4677   %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
4678   ret <4 x double> %res
4679 }
4680
4681 define <4 x double> @test_rcp_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
4682 ; X86-LABEL: test_rcp_pd_256_rrk:
4683 ; X86:       # %bb.0:
4684 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4685 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4686 ; X86-NEXT:    vrcp14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4c,0xc8]
4687 ; X86-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4688 ; X86-NEXT:    retl # encoding: [0xc3]
4689 ;
4690 ; X64-LABEL: test_rcp_pd_256_rrk:
4691 ; X64:       # %bb.0:
4692 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4693 ; X64-NEXT:    vrcp14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4c,0xc8]
4694 ; X64-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4695 ; X64-NEXT:    retq # encoding: [0xc3]
4696   %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
4697   ret <4 x double> %res
4698 }
4699
4700 define <2 x double> @test_rcp_pd_128_rr(<2 x double> %a0) {
4701 ; CHECK-LABEL: test_rcp_pd_128_rr:
4702 ; CHECK:       # %bb.0:
4703 ; CHECK-NEXT:    vrcp14pd %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x4c,0xc0]
4704 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4705   %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
4706   ret <2 x double> %res
4707 }
4708
4709 define <2 x double> @test_rcp_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
4710 ; X86-LABEL: test_rcp_pd_128_rrkz:
4711 ; X86:       # %bb.0:
4712 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4713 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4714 ; X86-NEXT:    vrcp14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4c,0xc0]
4715 ; X86-NEXT:    retl # encoding: [0xc3]
4716 ;
4717 ; X64-LABEL: test_rcp_pd_128_rrkz:
4718 ; X64:       # %bb.0:
4719 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4720 ; X64-NEXT:    vrcp14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4c,0xc0]
4721 ; X64-NEXT:    retq # encoding: [0xc3]
4722   %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
4723   ret <2 x double> %res
4724 }
4725
4726 define <2 x double> @test_rcp_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4727 ; X86-LABEL: test_rcp_pd_128_rrk:
4728 ; X86:       # %bb.0:
4729 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4730 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4731 ; X86-NEXT:    vrcp14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4c,0xc8]
4732 ; X86-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4733 ; X86-NEXT:    retl # encoding: [0xc3]
4734 ;
4735 ; X64-LABEL: test_rcp_pd_128_rrk:
4736 ; X64:       # %bb.0:
4737 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4738 ; X64-NEXT:    vrcp14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4c,0xc8]
4739 ; X64-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4740 ; X64-NEXT:    retq # encoding: [0xc3]
4741   %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
4742   ret <2 x double> %res
4743 }
4744
4745 declare <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
4746 declare <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
4747
4748 declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>)
4749
4750 define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
4751 ; X86-LABEL: test_int_x86_avx512_mask_permvar_df_256:
4752 ; X86:       # %bb.0:
4753 ; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
4754 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4755 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4756 ; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
4757 ; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
4758 ; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
4759 ; X86-NEXT:    vaddpd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
4760 ; X86-NEXT:    retl # encoding: [0xc3]
4761 ;
4762 ; X64-LABEL: test_int_x86_avx512_mask_permvar_df_256:
4763 ; X64:       # %bb.0:
4764 ; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
4765 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4766 ; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
4767 ; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
4768 ; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
4769 ; X64-NEXT:    vaddpd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
4770 ; X64-NEXT:    retq # encoding: [0xc3]
4771   %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
4772   %2 = bitcast i8 %x3 to <8 x i1>
4773   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4774   %3 = select <4 x i1> %extract1, <4 x double> %1, <4 x double> %x2
4775   %4 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
4776   %5 = bitcast i8 %x3 to <8 x i1>
4777   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4778   %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> zeroinitializer
4779   %7 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
4780   %res3 = fadd <4 x double> %3, %6
4781   %res4 = fadd <4 x double> %res3, %7
4782   ret <4 x double> %res4
4783 }
4784
4785 declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>)
4786
4787 define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
4788 ; X86-LABEL: test_int_x86_avx512_mask_permvar_di_256:
4789 ; X86:       # %bb.0:
4790 ; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
4791 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4792 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4793 ; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
4794 ; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
4795 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
4796 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
4797 ; X86-NEXT:    retl # encoding: [0xc3]
4798 ;
4799 ; X64-LABEL: test_int_x86_avx512_mask_permvar_di_256:
4800 ; X64:       # %bb.0:
4801 ; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
4802 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4803 ; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
4804 ; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
4805 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
4806 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
4807 ; X64-NEXT:    retq # encoding: [0xc3]
4808   %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
4809   %2 = bitcast i8 %x3 to <8 x i1>
4810   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4811   %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
4812   %4 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
4813   %5 = bitcast i8 %x3 to <8 x i1>
4814   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4815   %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
4816   %7 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
4817   %res3 = add <4 x i64> %3, %6
4818   %res4 = add <4 x i64> %res3, %7
4819   ret <4 x i64> %res4
4820 }
4821
4822 declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
4823
4824 define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
4825 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128:
4826 ; X86:       # %bb.0:
4827 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4828 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4829 ; X86-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4830 ; X86-NEXT:    vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05]
4831 ; X86-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4832 ; X86-NEXT:    vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04]
4833 ; X86-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xdc]
4834 ; X86-NEXT:    vfixupimmpd $3, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03]
4835 ; X86-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4836 ; X86-NEXT:    retl # encoding: [0xc3]
4837 ;
4838 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128:
4839 ; X64:       # %bb.0:
4840 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4841 ; X64-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4842 ; X64-NEXT:    vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05]
4843 ; X64-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4844 ; X64-NEXT:    vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04]
4845 ; X64-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xdc]
4846 ; X64-NEXT:    vfixupimmpd $3, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03]
4847 ; X64-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4848 ; X64-NEXT:    retq # encoding: [0xc3]
4849   %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1,<2 x i64> %x2, i32 5, i8 %x4)
4850   %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> zeroinitializer, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 %x4)
4851   %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 3, i8 -1)
4852   %res3 = fadd <2 x double> %res, %res1
4853   %res4 = fadd <2 x double> %res3, %res2
4854   ret <2 x double> %res4
4855 }
4856
4857 declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
4858
4859 define <2 x double>@test_int_x86_avx512_maskz_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
4860 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128:
4861 ; X86:       # %bb.0:
4862 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4863 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4864 ; X86-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4865 ; X86-NEXT:    vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05]
4866 ; X86-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x57,0xd2]
4867 ; X86-NEXT:    vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03]
4868 ; X86-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4869 ; X86-NEXT:    retl # encoding: [0xc3]
4870 ;
4871 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128:
4872 ; X64:       # %bb.0:
4873 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4874 ; X64-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4875 ; X64-NEXT:    vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05]
4876 ; X64-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x57,0xd2]
4877 ; X64-NEXT:    vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03]
4878 ; X64-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4879 ; X64-NEXT:    retq # encoding: [0xc3]
4880   %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4)
4881   %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 3, i8 %x4)
4882   ;%res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 -1)
4883   %res3 = fadd <2 x double> %res, %res1
4884   ;%res4 = fadd <2 x double> %res3, %res2
4885   ret <2 x double> %res3
4886 }
4887
4888 declare <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8)
4889
4890 define <4 x double>@test_int_x86_avx512_mask_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) {
4891 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256:
4892 ; X86:       # %bb.0:
4893 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4894 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4895 ; X86-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
4896 ; X86-NEXT:    vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
4897 ; X86-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4898 ; X86-NEXT:    vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
4899 ; X86-NEXT:    vaddpd %ymm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdc]
4900 ; X86-NEXT:    vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
4901 ; X86-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
4902 ; X86-NEXT:    retl # encoding: [0xc3]
4903 ;
4904 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256:
4905 ; X64:       # %bb.0:
4906 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4907 ; X64-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
4908 ; X64-NEXT:    vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
4909 ; X64-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4910 ; X64-NEXT:    vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
4911 ; X64-NEXT:    vaddpd %ymm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdc]
4912 ; X64-NEXT:    vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
4913 ; X64-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
4914 ; X64-NEXT:    retq # encoding: [0xc3]
4915   %res = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 4, i8 %x4)
4916   %res1 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> zeroinitializer, <4 x double> %x1, <4 x i64> %x2 , i32 5, i8 %x4)
4917   %res2 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1)
4918   %res3 = fadd <4 x double> %res, %res1
4919   %res4 = fadd <4 x double> %res3, %res2
4920   ret <4 x double> %res4
4921 }
4922
4923 declare <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8)
4924
4925 define <4 x double>@test_int_x86_avx512_maskz_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) {
4926 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256:
4927 ; X86:       # %bb.0:
4928 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4929 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4930 ; X86-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
4931 ; X86-NEXT:    vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
4932 ; X86-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4933 ; X86-NEXT:    vmovapd %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8]
4934 ; X86-NEXT:    vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
4935 ; X86-NEXT:    vaddpd %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdd]
4936 ; X86-NEXT:    vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
4937 ; X86-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
4938 ; X86-NEXT:    retl # encoding: [0xc3]
4939 ;
4940 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256:
4941 ; X64:       # %bb.0:
4942 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4943 ; X64-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
4944 ; X64-NEXT:    vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
4945 ; X64-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4946 ; X64-NEXT:    vmovapd %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8]
4947 ; X64-NEXT:    vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
4948 ; X64-NEXT:    vaddpd %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdd]
4949 ; X64-NEXT:    vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
4950 ; X64-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
4951 ; X64-NEXT:    retq # encoding: [0xc3]
4952   %res = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 5, i8 %x4)
4953   %res1 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> zeroinitializer, i32 4, i8 %x4)
4954   %res2 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1)
4955   %res3 = fadd <4 x double> %res, %res1
4956   %res4 = fadd <4 x double> %res3, %res2
4957   ret <4 x double> %res4
4958 }
4959
4960 declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8)
4961
4962 define <4 x float>@test_int_x86_avx512_mask_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
4963 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128:
4964 ; X86:       # %bb.0:
4965 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4966 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4967 ; X86-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
4968 ; X86-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05]
4969 ; X86-NEXT:    vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
4970 ; X86-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05]
4971 ; X86-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
4972 ; X86-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05]
4973 ; X86-NEXT:    vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
4974 ; X86-NEXT:    vaddps %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc4]
4975 ; X86-NEXT:    retl # encoding: [0xc3]
4976 ;
4977 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128:
4978 ; X64:       # %bb.0:
4979 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4980 ; X64-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
4981 ; X64-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xda,0x05]
4982 ; X64-NEXT:    vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
4983 ; X64-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm4 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xe2,0x05]
4984 ; X64-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
4985 ; X64-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05]
4986 ; X64-NEXT:    vaddps %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x58,0xc0]
4987 ; X64-NEXT:    vaddps %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3]
4988 ; X64-NEXT:    retq # encoding: [0xc3]
4989   %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4)
4990   %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4)
4991   %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1)
4992   %res3 = fadd <4 x float> %res, %res1
4993   %res4 = fadd <4 x float> %res3, %res2
4994   ret <4 x float> %res4
4995 }
4996
4997 declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8)
4998
4999 define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
5000 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128:
5001 ; X86:       # %bb.0:
5002 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5003 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5004 ; X86-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
5005 ; X86-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05]
5006 ; X86-NEXT:    vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
5007 ; X86-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05]
5008 ; X86-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
5009 ; X86-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05]
5010 ; X86-NEXT:    vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
5011 ; X86-NEXT:    vaddps %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc4]
5012 ; X86-NEXT:    retl # encoding: [0xc3]
5013 ;
5014 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128:
5015 ; X64:       # %bb.0:
5016 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5017 ; X64-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
5018 ; X64-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xda,0x05]
5019 ; X64-NEXT:    vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
5020 ; X64-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xe2,0x05]
5021 ; X64-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
5022 ; X64-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05]
5023 ; X64-NEXT:    vaddps %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x58,0xc0]
5024 ; X64-NEXT:    vaddps %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3]
5025 ; X64-NEXT:    retq # encoding: [0xc3]
5026   %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4)
5027   %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4)
5028   %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1)
5029   %res3 = fadd <4 x float> %res, %res1
5030   %res4 = fadd <4 x float> %res3, %res2
5031   ret <4 x float> %res4
5032 }
5033
5034 declare <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8)
5035
5036 define <8 x float>@test_int_x86_avx512_mask_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) {
5037 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256:
5038 ; X86:       # %bb.0:
5039 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5040 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5041 ; X86-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5042 ; X86-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05]
5043 ; X86-NEXT:    vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
5044 ; X86-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05]
5045 ; X86-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
5046 ; X86-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05]
5047 ; X86-NEXT:    vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
5048 ; X86-NEXT:    vaddps %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc4]
5049 ; X86-NEXT:    retl # encoding: [0xc3]
5050 ;
5051 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256:
5052 ; X64:       # %bb.0:
5053 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5054 ; X64-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5055 ; X64-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xda,0x05]
5056 ; X64-NEXT:    vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
5057 ; X64-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm4 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xe2,0x05]
5058 ; X64-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
5059 ; X64-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05]
5060 ; X64-NEXT:    vaddps %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdc,0x58,0xc0]
5061 ; X64-NEXT:    vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
5062 ; X64-NEXT:    retq # encoding: [0xc3]
5063   %res = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4)
5064   %res1 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4)
5065   %res2 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 -1)
5066   %res3 = fadd <8 x float> %res, %res1
5067   %res4 = fadd <8 x float> %res3, %res2
5068   ret <8 x float> %res4
5069 }
5070
5071 declare <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8)
5072
5073 define <8 x float>@test_int_x86_avx512_maskz_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) {
5074 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256:
5075 ; X86:       # %bb.0:
5076 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5077 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5078 ; X86-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5079 ; X86-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05]
5080 ; X86-NEXT:    vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
5081 ; X86-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05]
5082 ; X86-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
5083 ; X86-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05]
5084 ; X86-NEXT:    vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
5085 ; X86-NEXT:    vaddps %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc4]
5086 ; X86-NEXT:    retl # encoding: [0xc3]
5087 ;
5088 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256:
5089 ; X64:       # %bb.0:
5090 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5091 ; X64-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5092 ; X64-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xda,0x05]
5093 ; X64-NEXT:    vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
5094 ; X64-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xe2,0x05]
5095 ; X64-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
5096 ; X64-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05]
5097 ; X64-NEXT:    vaddps %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdc,0x58,0xc0]
5098 ; X64-NEXT:    vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
5099 ; X64-NEXT:    retq # encoding: [0xc3]
5100   %res = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4)
5101   %res1 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4)
5102   %res2 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 -1)
5103   %res3 = fadd <8 x float> %res, %res1
5104   %res4 = fadd <8 x float> %res3, %res2
5105   ret <8 x float> %res4
5106 }
5107
5108 define <2 x i64> @test_x86_avx512_psra_q_128(<2 x i64> %a0, <2 x i64> %a1) {
5109 ; CHECK-LABEL: test_x86_avx512_psra_q_128:
5110 ; CHECK:       # %bb.0:
5111 ; CHECK-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xc1]
5112 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5113   %res = call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
5114   ret <2 x i64> %res
5115 }
5116 define <2 x i64> @test_x86_avx512_mask_psra_q_128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %passthru, i8 %mask) {
5117 ; X86-LABEL: test_x86_avx512_mask_psra_q_128:
5118 ; X86:       # %bb.0:
5119 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5120 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5121 ; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
5122 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5123 ; X86-NEXT:    retl # encoding: [0xc3]
5124 ;
5125 ; X64-LABEL: test_x86_avx512_mask_psra_q_128:
5126 ; X64:       # %bb.0:
5127 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5128 ; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
5129 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5130 ; X64-NEXT:    retq # encoding: [0xc3]
5131   %res = call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
5132   %mask.cast = bitcast i8 %mask to <8 x i1>
5133   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5134   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> %passthru
5135   ret <2 x i64> %res2
5136 }
5137 define <2 x i64> @test_x86_avx512_maskz_psra_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
5138 ; X86-LABEL: test_x86_avx512_maskz_psra_q_128:
5139 ; X86:       # %bb.0:
5140 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5141 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5142 ; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
5143 ; X86-NEXT:    retl # encoding: [0xc3]
5144 ;
5145 ; X64-LABEL: test_x86_avx512_maskz_psra_q_128:
5146 ; X64:       # %bb.0:
5147 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5148 ; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
5149 ; X64-NEXT:    retq # encoding: [0xc3]
5150   %res = call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
5151   %mask.cast = bitcast i8 %mask to <8 x i1>
5152   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5153   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> zeroinitializer
5154   ret <2 x i64> %res2
5155 }
5156 declare <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64>, <2 x i64>) nounwind readnone
5157
5158
5159 define <4 x i64> @test_x86_avx512_psra_q_256(<4 x i64> %a0, <2 x i64> %a1) {
5160 ; CHECK-LABEL: test_x86_avx512_psra_q_256:
5161 ; CHECK:       # %bb.0:
5162 ; CHECK-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xc1]
5163 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5164   %res = call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
5165   ret <4 x i64> %res
5166 }
5167 define <4 x i64> @test_x86_avx512_mask_psra_q_256(<4 x i64> %a0, <2 x i64> %a1, <4 x i64> %passthru, i8 %mask) {
5168 ; X86-LABEL: test_x86_avx512_mask_psra_q_256:
5169 ; X86:       # %bb.0:
5170 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5171 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5172 ; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
5173 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5174 ; X86-NEXT:    retl # encoding: [0xc3]
5175 ;
5176 ; X64-LABEL: test_x86_avx512_mask_psra_q_256:
5177 ; X64:       # %bb.0:
5178 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5179 ; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
5180 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5181 ; X64-NEXT:    retq # encoding: [0xc3]
5182   %res = call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
5183   %mask.cast = bitcast i8 %mask to <8 x i1>
5184   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5185   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> %passthru
5186   ret <4 x i64> %res2
5187 }
5188 define <4 x i64> @test_x86_avx512_maskz_psra_q_256(<4 x i64> %a0, <2 x i64> %a1, <4 x i64> %passthru, i8 %mask) {
5189 ; X86-LABEL: test_x86_avx512_maskz_psra_q_256:
5190 ; X86:       # %bb.0:
5191 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5192 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5193 ; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
5194 ; X86-NEXT:    retl # encoding: [0xc3]
5195 ;
5196 ; X64-LABEL: test_x86_avx512_maskz_psra_q_256:
5197 ; X64:       # %bb.0:
5198 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5199 ; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
5200 ; X64-NEXT:    retq # encoding: [0xc3]
5201   %res = call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
5202   %mask.cast = bitcast i8 %mask to <8 x i1>
5203   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5204   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> zeroinitializer
5205   ret <4 x i64> %res2
5206 }
5207 declare <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64>, <2 x i64>) nounwind readnone
5208
5209
5210 define <2 x i64> @test_x86_avx512_psrai_q_128(<2 x i64> %a0) {
5211 ; CHECK-LABEL: test_x86_avx512_psrai_q_128:
5212 ; CHECK:       # %bb.0:
5213 ; CHECK-NEXT:    vpsraq $7, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xe0,0x07]
5214 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5215   %res = call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
5216   ret <2 x i64> %res
5217 }
5218 define <2 x i64> @test_x86_avx512_mask_psrai_q_128(<2 x i64> %a0, <2 x i64> %passthru, i8 %mask) {
5219 ; X86-LABEL: test_x86_avx512_mask_psrai_q_128:
5220 ; X86:       # %bb.0:
5221 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5222 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5223 ; X86-NEXT:    vpsraq $7, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x07]
5224 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5225 ; X86-NEXT:    retl # encoding: [0xc3]
5226 ;
5227 ; X64-LABEL: test_x86_avx512_mask_psrai_q_128:
5228 ; X64:       # %bb.0:
5229 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5230 ; X64-NEXT:    vpsraq $7, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x07]
5231 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5232 ; X64-NEXT:    retq # encoding: [0xc3]
5233   %res = call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
5234   %mask.cast = bitcast i8 %mask to <8 x i1>
5235   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5236   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> %passthru
5237   ret <2 x i64> %res2
5238 }
5239 define <2 x i64> @test_x86_avx512_maskz_psrai_q_128(<2 x i64> %a0, i8 %mask) {
5240 ; X86-LABEL: test_x86_avx512_maskz_psrai_q_128:
5241 ; X86:       # %bb.0:
5242 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5243 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5244 ; X86-NEXT:    vpsraq $7, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x07]
5245 ; X86-NEXT:    retl # encoding: [0xc3]
5246 ;
5247 ; X64-LABEL: test_x86_avx512_maskz_psrai_q_128:
5248 ; X64:       # %bb.0:
5249 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5250 ; X64-NEXT:    vpsraq $7, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x07]
5251 ; X64-NEXT:    retq # encoding: [0xc3]
5252   %res = call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
5253   %mask.cast = bitcast i8 %mask to <8 x i1>
5254   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5255   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> zeroinitializer
5256   ret <2 x i64> %res2
5257 }
5258 declare <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64>, i32) nounwind readnone
5259
5260
5261 define <4 x i64> @test_x86_avx512_psrai_q_256(<4 x i64> %a0) {
5262 ; CHECK-LABEL: test_x86_avx512_psrai_q_256:
5263 ; CHECK:       # %bb.0:
5264 ; CHECK-NEXT:    vpsraq $7, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xe0,0x07]
5265 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5266   %res = call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
5267   ret <4 x i64> %res
5268 }
5269 define <4 x i64> @test_x86_avx512_mask_psrai_q_256(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
5270 ; X86-LABEL: test_x86_avx512_mask_psrai_q_256:
5271 ; X86:       # %bb.0:
5272 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5273 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5274 ; X86-NEXT:    vpsraq $7, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x07]
5275 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5276 ; X86-NEXT:    retl # encoding: [0xc3]
5277 ;
5278 ; X64-LABEL: test_x86_avx512_mask_psrai_q_256:
5279 ; X64:       # %bb.0:
5280 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5281 ; X64-NEXT:    vpsraq $7, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x07]
5282 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5283 ; X64-NEXT:    retq # encoding: [0xc3]
5284   %res = call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
5285   %mask.cast = bitcast i8 %mask to <8 x i1>
5286   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5287   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> %passthru
5288   ret <4 x i64> %res2
5289 }
5290 define <4 x i64> @test_x86_avx512_maskz_psrai_q_256(<4 x i64> %a0, i8 %mask) {
5291 ; X86-LABEL: test_x86_avx512_maskz_psrai_q_256:
5292 ; X86:       # %bb.0:
5293 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5294 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5295 ; X86-NEXT:    vpsraq $7, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x07]
5296 ; X86-NEXT:    retl # encoding: [0xc3]
5297 ;
5298 ; X64-LABEL: test_x86_avx512_maskz_psrai_q_256:
5299 ; X64:       # %bb.0:
5300 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5301 ; X64-NEXT:    vpsraq $7, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x07]
5302 ; X64-NEXT:    retq # encoding: [0xc3]
5303   %res = call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
5304   %mask.cast = bitcast i8 %mask to <8 x i1>
5305   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5306   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> zeroinitializer
5307   ret <4 x i64> %res2
5308 }
5309 declare <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64>, i32) nounwind readnone
5310
5311 define <2 x i64> @test_x86_avx512_psrav_q_128(<2 x i64> %a0, <2 x i64> %a1) {
5312 ; CHECK-LABEL: test_x86_avx512_psrav_q_128:
5313 ; CHECK:       # %bb.0:
5314 ; CHECK-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xc1]
5315 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5316   %res = call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %a0, <2 x i64> %a1)
5317   ret <2 x i64> %res
5318 }
5319
5320 define <2 x i64> @test_x86_avx512_mask_psrav_q_128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, i8 %mask) {
5321 ; X86-LABEL: test_x86_avx512_mask_psrav_q_128:
5322 ; X86:       # %bb.0:
5323 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5324 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5325 ; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
5326 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5327 ; X86-NEXT:    retl # encoding: [0xc3]
5328 ;
5329 ; X64-LABEL: test_x86_avx512_mask_psrav_q_128:
5330 ; X64:       # %bb.0:
5331 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5332 ; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
5333 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5334 ; X64-NEXT:    retq # encoding: [0xc3]
5335   %res = call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %a0, <2 x i64> %a1)
5336   %mask.cast = bitcast i8 %mask to <8 x i1>
5337   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5338   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> %a2
5339   ret <2 x i64> %res2
5340 }
5341
5342 define <2 x i64> @test_x86_avx512_maskz_psrav_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
5343 ; X86-LABEL: test_x86_avx512_maskz_psrav_q_128:
5344 ; X86:       # %bb.0:
5345 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5346 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5347 ; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
5348 ; X86-NEXT:    retl # encoding: [0xc3]
5349 ;
5350 ; X64-LABEL: test_x86_avx512_maskz_psrav_q_128:
5351 ; X64:       # %bb.0:
5352 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5353 ; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
5354 ; X64-NEXT:    retq # encoding: [0xc3]
5355   %res = call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %a0, <2 x i64> %a1)
5356   %mask.cast = bitcast i8 %mask to <8 x i1>
5357   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5358   %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> zeroinitializer
5359   ret <2 x i64> %res2
5360 }
5361
5362 declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) nounwind readnone
5363
5364 define <4 x i64> @test_x86_avx512_psrav_q_256(<4 x i64> %a0, <4 x i64> %a1) {
5365 ; CHECK-LABEL: test_x86_avx512_psrav_q_256:
5366 ; CHECK:       # %bb.0:
5367 ; CHECK-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xc1]
5368 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5369   %res = call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %a0, <4 x i64> %a1)
5370   ret <4 x i64> %res
5371 }
5372
5373 define <4 x i64> @test_x86_avx512_mask_psrav_q_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, i8 %mask) {
5374 ; X86-LABEL: test_x86_avx512_mask_psrav_q_256:
5375 ; X86:       # %bb.0:
5376 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5377 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5378 ; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
5379 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5380 ; X86-NEXT:    retl # encoding: [0xc3]
5381 ;
5382 ; X64-LABEL: test_x86_avx512_mask_psrav_q_256:
5383 ; X64:       # %bb.0:
5384 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5385 ; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
5386 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5387 ; X64-NEXT:    retq # encoding: [0xc3]
5388   %res = call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %a0, <4 x i64> %a1)
5389   %mask.cast = bitcast i8 %mask to <8 x i1>
5390   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5391   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> %a2
5392   ret <4 x i64> %res2
5393 }
5394
5395 define <4 x i64> @test_x86_avx512_maskz_psrav_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
5396 ; X86-LABEL: test_x86_avx512_maskz_psrav_q_256:
5397 ; X86:       # %bb.0:
5398 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5399 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5400 ; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
5401 ; X86-NEXT:    retl # encoding: [0xc3]
5402 ;
5403 ; X64-LABEL: test_x86_avx512_maskz_psrav_q_256:
5404 ; X64:       # %bb.0:
5405 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5406 ; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
5407 ; X64-NEXT:    retq # encoding: [0xc3]
5408   %res = call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %a0, <4 x i64> %a1)
5409   %mask.cast = bitcast i8 %mask to <8 x i1>
5410   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5411   %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> zeroinitializer
5412   ret <4 x i64> %res2
5413 }
5414
5415 declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) nounwind readnone
5416
5417 define <8 x float> @test_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
5418 ; CHECK-LABEL: test_vfmadd256_ps:
5419 ; CHECK:       # %bb.0:
5420 ; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
5421 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
5422 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5423   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
5424   ret <8 x float> %1
5425 }
5426
5427 define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
5428 ; X86-LABEL: test_mask_vfmadd256_ps:
5429 ; X86:       # %bb.0:
5430 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5431 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5432 ; X86-NEXT:    vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
5433 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
5434 ; X86-NEXT:    retl # encoding: [0xc3]
5435 ;
5436 ; X64-LABEL: test_mask_vfmadd256_ps:
5437 ; X64:       # %bb.0:
5438 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5439 ; X64-NEXT:    vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
5440 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
5441 ; X64-NEXT:    retq # encoding: [0xc3]
5442   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
5443   %2 = bitcast i8 %mask to <8 x i1>
5444   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %a0
5445   ret <8 x float> %3
5446 }
5447
5448 define <4 x float> @test_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
5449 ; CHECK-LABEL: test_vfmadd128_ps:
5450 ; CHECK:       # %bb.0:
5451 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
5452 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
5453 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5454   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
5455   ret <4 x float> %1
5456 }
5457
5458 define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
5459 ; X86-LABEL: test_mask_vfmadd128_ps:
5460 ; X86:       # %bb.0:
5461 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5462 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5463 ; X86-NEXT:    vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
5464 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
5465 ; X86-NEXT:    retl # encoding: [0xc3]
5466 ;
5467 ; X64-LABEL: test_mask_vfmadd128_ps:
5468 ; X64:       # %bb.0:
5469 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5470 ; X64-NEXT:    vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
5471 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
5472 ; X64-NEXT:    retq # encoding: [0xc3]
5473   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
5474   %2 = bitcast i8 %mask to <8 x i1>
5475   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5476   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
5477   ret <4 x float> %3
5478 }
5479
5480 define <4 x double> @test_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
5481 ; CHECK-LABEL: test_fmadd256_pd:
5482 ; CHECK:       # %bb.0:
5483 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
5484 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
5485 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5486   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
5487   ret <4 x double> %1
5488 }
5489
5490 define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
5491 ; X86-LABEL: test_mask_fmadd256_pd:
5492 ; X86:       # %bb.0:
5493 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5494 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5495 ; X86-NEXT:    vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
5496 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
5497 ; X86-NEXT:    retl # encoding: [0xc3]
5498 ;
5499 ; X64-LABEL: test_mask_fmadd256_pd:
5500 ; X64:       # %bb.0:
5501 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5502 ; X64-NEXT:    vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
5503 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
5504 ; X64-NEXT:    retq # encoding: [0xc3]
5505   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
5506   %2 = bitcast i8 %mask to <8 x i1>
5507   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5508   %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %a
5509   ret <4 x double> %3
5510 }
5511
5512 define <2 x double> @test_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
5513 ; CHECK-LABEL: test_fmadd128_pd:
5514 ; CHECK:       # %bb.0:
5515 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
5516 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
5517 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5518   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
5519   ret <2 x double> %1
5520 }
5521
5522 define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
5523 ; X86-LABEL: test_mask_fmadd128_pd:
5524 ; X86:       # %bb.0:
5525 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5526 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5527 ; X86-NEXT:    vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
5528 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
5529 ; X86-NEXT:    retl # encoding: [0xc3]
5530 ;
5531 ; X64-LABEL: test_mask_fmadd128_pd:
5532 ; X64:       # %bb.0:
5533 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5534 ; X64-NEXT:    vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
5535 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
5536 ; X64-NEXT:    retq # encoding: [0xc3]
5537   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
5538   %2 = bitcast i8 %mask to <8 x i1>
5539   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
5540   %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %a
5541   ret <2 x double> %3
5542 }
5543
5544 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5545 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
5546 ; X86:       # %bb.0:
5547 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5548 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5549 ; X86-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
5550 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
5551 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5552 ; X86-NEXT:    retl # encoding: [0xc3]
5553 ;
5554 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
5555 ; X64:       # %bb.0:
5556 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5557 ; X64-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
5558 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
5559 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5560 ; X64-NEXT:    retq # encoding: [0xc3]
5561   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
5562   %2 = bitcast i8 %x3 to <8 x i1>
5563   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
5564   %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %x2
5565   ret <2 x double> %3
5566 }
5567
5568 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5569 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
5570 ; X86:       # %bb.0:
5571 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5572 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5573 ; X86-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
5574 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
5575 ; X86-NEXT:    retl # encoding: [0xc3]
5576 ;
5577 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
5578 ; X64:       # %bb.0:
5579 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5580 ; X64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
5581 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
5582 ; X64-NEXT:    retq # encoding: [0xc3]
5583   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
5584   %2 = bitcast i8 %x3 to <8 x i1>
5585   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
5586   %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> zeroinitializer
5587   ret <2 x double> %3
5588 }
5589
5590 define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
5591 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
5592 ; X86:       # %bb.0:
5593 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5594 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5595 ; X86-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
5596 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
5597 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5598 ; X86-NEXT:    retl # encoding: [0xc3]
5599 ;
5600 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
5601 ; X64:       # %bb.0:
5602 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5603 ; X64-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
5604 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
5605 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5606 ; X64-NEXT:    retq # encoding: [0xc3]
5607   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
5608   %2 = bitcast i8 %x3 to <8 x i1>
5609   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5610   %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %x2
5611   ret <4 x double> %3
5612 }
5613
5614 define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
5615 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
5616 ; X86:       # %bb.0:
5617 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5618 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5619 ; X86-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
5620 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
5621 ; X86-NEXT:    retl # encoding: [0xc3]
5622 ;
5623 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
5624 ; X64:       # %bb.0:
5625 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5626 ; X64-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
5627 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
5628 ; X64-NEXT:    retq # encoding: [0xc3]
5629   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
5630   %2 = bitcast i8 %x3 to <8 x i1>
5631   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5632   %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> zeroinitializer
5633   ret <4 x double> %3
5634 }
5635
5636 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5637 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
5638 ; X86:       # %bb.0:
5639 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5640 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5641 ; X86-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
5642 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
5643 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5644 ; X86-NEXT:    retl # encoding: [0xc3]
5645 ;
5646 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
5647 ; X64:       # %bb.0:
5648 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5649 ; X64-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
5650 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
5651 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5652 ; X64-NEXT:    retq # encoding: [0xc3]
5653   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
5654   %2 = bitcast i8 %x3 to <8 x i1>
5655   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5656   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %x2
5657   ret <4 x float> %3
5658 }
5659
5660 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5661 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
5662 ; X86:       # %bb.0:
5663 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5664 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5665 ; X86-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
5666 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
5667 ; X86-NEXT:    retl # encoding: [0xc3]
5668 ;
5669 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
5670 ; X64:       # %bb.0:
5671 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5672 ; X64-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
5673 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
5674 ; X64-NEXT:    retq # encoding: [0xc3]
5675   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
5676   %2 = bitcast i8 %x3 to <8 x i1>
5677   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5678   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> zeroinitializer
5679   ret <4 x float> %3
5680 }
5681
5682 define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
5683 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
5684 ; X86:       # %bb.0:
5685 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5686 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5687 ; X86-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
5688 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
5689 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5690 ; X86-NEXT:    retl # encoding: [0xc3]
5691 ;
5692 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
5693 ; X64:       # %bb.0:
5694 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5695 ; X64-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
5696 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
5697 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5698 ; X64-NEXT:    retq # encoding: [0xc3]
5699   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
5700   %2 = bitcast i8 %x3 to <8 x i1>
5701   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %x2
5702   ret <8 x float> %3
5703 }
5704
5705 define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
5706 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
5707 ; X86:       # %bb.0:
5708 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5709 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5710 ; X86-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
5711 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
5712 ; X86-NEXT:    retl # encoding: [0xc3]
5713 ;
5714 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
5715 ; X64:       # %bb.0:
5716 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5717 ; X64-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
5718 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
5719 ; X64-NEXT:    retq # encoding: [0xc3]
5720   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
5721   %2 = bitcast i8 %x3 to <8 x i1>
5722   %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer
5723   ret <8 x float> %3
5724 }
5725
5726 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5727 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
5728 ; X86:       # %bb.0:
5729 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5730 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5731 ; X86-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
5732 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
5733 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5734 ; X86-NEXT:    retl # encoding: [0xc3]
5735 ;
5736 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
5737 ; X64:       # %bb.0:
5738 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5739 ; X64-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
5740 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
5741 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5742 ; X64-NEXT:    retq # encoding: [0xc3]
5743   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
5744   %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %1)
5745   %3 = bitcast i8 %x3 to <8 x i1>
5746   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <2 x i32> <i32 0, i32 1>
5747   %4 = select <2 x i1> %extract, <2 x double> %2, <2 x double> %x2
5748   ret <2 x double> %4
5749 }
5750
5751 define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
5752 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
5753 ; X86:       # %bb.0:
5754 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5755 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5756 ; X86-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
5757 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
5758 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5759 ; X86-NEXT:    retl # encoding: [0xc3]
5760 ;
5761 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
5762 ; X64:       # %bb.0:
5763 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5764 ; X64-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
5765 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
5766 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5767 ; X64-NEXT:    retq # encoding: [0xc3]
5768   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
5769   %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %1)
5770   %3 = bitcast i8 %x3 to <8 x i1>
5771   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5772   %4 = select <4 x i1> %extract, <4 x double> %2, <4 x double> %x2
5773   ret <4 x double> %4
5774 }
5775
5776 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5777 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
5778 ; X86:       # %bb.0:
5779 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5780 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5781 ; X86-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
5782 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
5783 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5784 ; X86-NEXT:    retl # encoding: [0xc3]
5785 ;
5786 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
5787 ; X64:       # %bb.0:
5788 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5789 ; X64-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
5790 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
5791 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5792 ; X64-NEXT:    retq # encoding: [0xc3]
5793   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
5794   %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %1)
5795   %3 = bitcast i8 %x3 to <8 x i1>
5796   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5797   %4 = select <4 x i1> %extract, <4 x float> %2, <4 x float> %x2
5798   ret <4 x float> %4
5799 }
5800
5801 define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
5802 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
5803 ; X86:       # %bb.0:
5804 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5805 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5806 ; X86-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
5807 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
5808 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5809 ; X86-NEXT:    retl # encoding: [0xc3]
5810 ;
5811 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
5812 ; X64:       # %bb.0:
5813 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5814 ; X64-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
5815 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
5816 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5817 ; X64-NEXT:    retq # encoding: [0xc3]
5818   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
5819   %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %1)
5820   %3 = bitcast i8 %x3 to <8 x i1>
5821   %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %x2
5822   ret <8 x float> %4
5823 }
5824
5825 define <8 x float> @test_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
5826 ; CHECK-LABEL: test_vfnmadd256_ps:
5827 ; CHECK:       # %bb.0:
5828 ; CHECK-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2]
5829 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) + ymm2
5830 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5831   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5832   %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %a2)
5833   ret <8 x float> %2
5834 }
5835
5836 define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
5837 ; X86-LABEL: test_mask_vfnmadd256_ps:
5838 ; X86:       # %bb.0:
5839 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5840 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5841 ; X86-NEXT:    vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
5842 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
5843 ; X86-NEXT:    retl # encoding: [0xc3]
5844 ;
5845 ; X64-LABEL: test_mask_vfnmadd256_ps:
5846 ; X64:       # %bb.0:
5847 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5848 ; X64-NEXT:    vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
5849 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
5850 ; X64-NEXT:    retq # encoding: [0xc3]
5851   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5852   %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %a2)
5853   %3 = bitcast i8 %mask to <8 x i1>
5854   %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %a0
5855   ret <8 x float> %4
5856 }
5857
5858 define <4 x float> @test_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
5859 ; CHECK-LABEL: test_vfnmadd128_ps:
5860 ; CHECK:       # %bb.0:
5861 ; CHECK-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2]
5862 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
5863 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5864   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5865   %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %a2)
5866   ret <4 x float> %2
5867 }
5868
5869 define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
5870 ; X86-LABEL: test_mask_vfnmadd128_ps:
5871 ; X86:       # %bb.0:
5872 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5873 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5874 ; X86-NEXT:    vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
5875 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
5876 ; X86-NEXT:    retl # encoding: [0xc3]
5877 ;
5878 ; X64-LABEL: test_mask_vfnmadd128_ps:
5879 ; X64:       # %bb.0:
5880 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5881 ; X64-NEXT:    vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
5882 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
5883 ; X64-NEXT:    retq # encoding: [0xc3]
5884   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5885   %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %a2)
5886   %3 = bitcast i8 %mask to <8 x i1>
5887   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5888   %4 = select <4 x i1> %extract, <4 x float> %2, <4 x float> %a0
5889   ret <4 x float> %4
5890 }
5891
5892 define <4 x double> @test_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
5893 ; CHECK-LABEL: test_vfnmadd256_pd:
5894 ; CHECK:       # %bb.0:
5895 ; CHECK-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
5896 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) + ymm2
5897 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5898   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
5899   %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %a2)
5900   ret <4 x double> %2
5901 }
5902
5903 define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
5904 ; X86-LABEL: test_mask_vfnmadd256_pd:
5905 ; X86:       # %bb.0:
5906 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5907 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5908 ; X86-NEXT:    vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
5909 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
5910 ; X86-NEXT:    retl # encoding: [0xc3]
5911 ;
5912 ; X64-LABEL: test_mask_vfnmadd256_pd:
5913 ; X64:       # %bb.0:
5914 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5915 ; X64-NEXT:    vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
5916 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
5917 ; X64-NEXT:    retq # encoding: [0xc3]
5918   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
5919   %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %a2)
5920   %3 = bitcast i8 %mask to <8 x i1>
5921   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5922   %4 = select <4 x i1> %extract, <4 x double> %2, <4 x double> %a0
5923   ret <4 x double> %4
5924 }
5925
5926 define <2 x double> @test_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
5927 ; CHECK-LABEL: test_vfnmadd128_pd:
5928 ; CHECK:       # %bb.0:
5929 ; CHECK-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
5930 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
5931 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5932   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
5933   %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %a2)
5934   ret <2 x double> %2
5935 }
5936
5937 define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
5938 ; X86-LABEL: test_mask_vfnmadd128_pd:
5939 ; X86:       # %bb.0:
5940 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5941 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5942 ; X86-NEXT:    vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
5943 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
5944 ; X86-NEXT:    retl # encoding: [0xc3]
5945 ;
5946 ; X64-LABEL: test_mask_vfnmadd128_pd:
5947 ; X64:       # %bb.0:
5948 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5949 ; X64-NEXT:    vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
5950 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
5951 ; X64-NEXT:    retq # encoding: [0xc3]
5952   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
5953   %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %a2)
5954   %3 = bitcast i8 %mask to <8 x i1>
5955   %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <2 x i32> <i32 0, i32 1>
5956   %4 = select <2 x i1> %extract, <2 x double> %2, <2 x double> %a0
5957   ret <2 x double> %4
5958 }
5959
5960 define <8 x float> @test_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
5961 ; CHECK-LABEL: test_vfnmsub256_ps:
5962 ; CHECK:       # %bb.0:
5963 ; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2]
5964 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) - ymm2
5965 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5966   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5967   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
5968   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %2)
5969   ret <8 x float> %3
5970 }
5971
5972 define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
5973 ; X86-LABEL: test_mask_vfnmsub256_ps:
5974 ; X86:       # %bb.0:
5975 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5976 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5977 ; X86-NEXT:    vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
5978 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
5979 ; X86-NEXT:    retl # encoding: [0xc3]
5980 ;
5981 ; X64-LABEL: test_mask_vfnmsub256_ps:
5982 ; X64:       # %bb.0:
5983 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5984 ; X64-NEXT:    vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
5985 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
5986 ; X64-NEXT:    retq # encoding: [0xc3]
5987   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5988   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
5989   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %2)
5990   %4 = bitcast i8 %mask to <8 x i1>
5991   %5 = select <8 x i1> %4, <8 x float> %3, <8 x float> %a0
5992   ret <8 x float> %5
5993 }
5994
5995 define <4 x float> @test_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
5996 ; CHECK-LABEL: test_vfnmsub128_ps:
5997 ; CHECK:       # %bb.0:
5998 ; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2]
5999 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
6000 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6001   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
6002   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
6003   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %2)
6004   ret <4 x float> %3
6005 }
6006
6007 define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
6008 ; X86-LABEL: test_mask_vfnmsub128_ps:
6009 ; X86:       # %bb.0:
6010 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6011 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6012 ; X86-NEXT:    vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
6013 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
6014 ; X86-NEXT:    retl # encoding: [0xc3]
6015 ;
6016 ; X64-LABEL: test_mask_vfnmsub128_ps:
6017 ; X64:       # %bb.0:
6018 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6019 ; X64-NEXT:    vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
6020 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
6021 ; X64-NEXT:    retq # encoding: [0xc3]
6022   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
6023   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
6024   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %2)
6025   %4 = bitcast i8 %mask to <8 x i1>
6026   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6027   %5 = select <4 x i1> %extract, <4 x float> %3, <4 x float> %a0
6028   ret <4 x float> %5
6029 }
6030
6031 define <4 x double> @test_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
6032 ; CHECK-LABEL: test_vfnmsub256_pd:
6033 ; CHECK:       # %bb.0:
6034 ; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
6035 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) - ymm2
6036 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6037   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
6038   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6039   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %2)
6040   ret <4 x double> %3
6041 }
6042
6043 define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
6044 ; X86-LABEL: test_mask_vfnmsub256_pd:
6045 ; X86:       # %bb.0:
6046 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6047 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6048 ; X86-NEXT:    vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
6049 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
6050 ; X86-NEXT:    retl # encoding: [0xc3]
6051 ;
6052 ; X64-LABEL: test_mask_vfnmsub256_pd:
6053 ; X64:       # %bb.0:
6054 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6055 ; X64-NEXT:    vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
6056 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
6057 ; X64-NEXT:    retq # encoding: [0xc3]
6058   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
6059   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6060   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %2)
6061   %4 = bitcast i8 %mask to <8 x i1>
6062   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6063   %5 = select <4 x i1> %extract, <4 x double> %3, <4 x double> %a0
6064   ret <4 x double> %5
6065 }
6066
6067 define <2 x double> @test_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
6068 ; CHECK-LABEL: test_vfnmsub128_pd:
6069 ; CHECK:       # %bb.0:
6070 ; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
6071 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
6072 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6073   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
6074   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6075   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %2)
6076   ret <2 x double> %3
6077 }
6078
6079 define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
6080 ; X86-LABEL: test_mask_vfnmsub128_pd:
6081 ; X86:       # %bb.0:
6082 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6083 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6084 ; X86-NEXT:    vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
6085 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
6086 ; X86-NEXT:    retl # encoding: [0xc3]
6087 ;
6088 ; X64-LABEL: test_mask_vfnmsub128_pd:
6089 ; X64:       # %bb.0:
6090 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6091 ; X64-NEXT:    vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
6092 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
6093 ; X64-NEXT:    retq # encoding: [0xc3]
6094   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
6095   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6096   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %2)
6097   %4 = bitcast i8 %mask to <8 x i1>
6098   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <2 x i32> <i32 0, i32 1>
6099   %5 = select <2 x i1> %extract, <2 x double> %3, <2 x double> %a0
6100   ret <2 x double> %5
6101 }
6102
6103 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6104 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
6105 ; X86:       # %bb.0:
6106 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6107 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6108 ; X86-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
6109 ; X86-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
6110 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6111 ; X86-NEXT:    retl # encoding: [0xc3]
6112 ;
6113 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
6114 ; X64:       # %bb.0:
6115 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6116 ; X64-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
6117 ; X64-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
6118 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6119 ; X64-NEXT:    retq # encoding: [0xc3]
6120   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x0
6121   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6122   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %x1, <2 x double> %2)
6123   %4 = bitcast i8 %x3 to <8 x i1>
6124   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <2 x i32> <i32 0, i32 1>
6125   %5 = select <2 x i1> %extract, <2 x double> %3, <2 x double> %x2
6126   ret <2 x double> %5
6127 }
6128
6129 define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6130 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
6131 ; X86:       # %bb.0:
6132 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6133 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6134 ; X86-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
6135 ; X86-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
6136 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6137 ; X86-NEXT:    retl # encoding: [0xc3]
6138 ;
6139 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
6140 ; X64:       # %bb.0:
6141 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6142 ; X64-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
6143 ; X64-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
6144 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6145 ; X64-NEXT:    retq # encoding: [0xc3]
6146   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x0
6147   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6148   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %x1, <4 x double> %2)
6149   %4 = bitcast i8 %x3 to <8 x i1>
6150   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6151   %5 = select <4 x i1> %extract, <4 x double> %3, <4 x double> %x2
6152   ret <4 x double> %5
6153 }
6154
6155 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6156 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
6157 ; X86:       # %bb.0:
6158 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6159 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6160 ; X86-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
6161 ; X86-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
6162 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6163 ; X86-NEXT:    retl # encoding: [0xc3]
6164 ;
6165 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
6166 ; X64:       # %bb.0:
6167 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6168 ; X64-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
6169 ; X64-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
6170 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6171 ; X64-NEXT:    retq # encoding: [0xc3]
6172   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0
6173   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6174   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %x1, <4 x float> %2)
6175   %4 = bitcast i8 %x3 to <8 x i1>
6176   %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6177   %5 = select <4 x i1> %extract, <4 x float> %3, <4 x float> %x2
6178   ret <4 x float> %5
6179 }
6180
6181 define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6182 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
6183 ; X86:       # %bb.0:
6184 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6185 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6186 ; X86-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
6187 ; X86-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
6188 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6189 ; X86-NEXT:    retl # encoding: [0xc3]
6190 ;
6191 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
6192 ; X64:       # %bb.0:
6193 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6194 ; X64-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
6195 ; X64-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
6196 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6197 ; X64-NEXT:    retq # encoding: [0xc3]
6198   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0
6199   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6200   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %x1, <8 x float> %2)
6201   %4 = bitcast i8 %x3 to <8 x i1>
6202   %5 = select <8 x i1> %4, <8 x float> %3, <8 x float> %x2
6203   ret <8 x float> %5
6204 }
6205
6206 define <8 x float> @test_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
6207 ; CHECK-LABEL: test_fmaddsub256_ps:
6208 ; CHECK:       # %bb.0:
6209 ; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
6210 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
6211 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6212   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
6213   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6214   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %2)
6215   %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6216   ret <8 x float> %4
6217 }
6218
6219 define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
6220 ; X86-LABEL: test_mask_fmaddsub256_ps:
6221 ; X86:       # %bb.0:
6222 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6223 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6224 ; X86-NEXT:    vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
6225 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
6226 ; X86-NEXT:    retl # encoding: [0xc3]
6227 ;
6228 ; X64-LABEL: test_mask_fmaddsub256_ps:
6229 ; X64:       # %bb.0:
6230 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6231 ; X64-NEXT:    vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
6232 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
6233 ; X64-NEXT:    retq # encoding: [0xc3]
6234   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
6235   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6236   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %2)
6237   %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6238   %5 = bitcast i8 %mask to <8 x i1>
6239   %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> %a
6240   ret <8 x float> %6
6241 }
6242
6243 define <4 x float> @test_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
6244 ; CHECK-LABEL: test_fmaddsub128_ps:
6245 ; CHECK:       # %bb.0:
6246 ; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
6247 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
6248 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6249   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
6250   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6251   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %2)
6252   %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6253   ret <4 x float> %4
6254 }
6255
6256 define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
6257 ; X86-LABEL: test_mask_fmaddsub128_ps:
6258 ; X86:       # %bb.0:
6259 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6260 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6261 ; X86-NEXT:    vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
6262 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
6263 ; X86-NEXT:    retl # encoding: [0xc3]
6264 ;
6265 ; X64-LABEL: test_mask_fmaddsub128_ps:
6266 ; X64:       # %bb.0:
6267 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6268 ; X64-NEXT:    vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
6269 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
6270 ; X64-NEXT:    retq # encoding: [0xc3]
6271   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
6272   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6273   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %2)
6274   %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6275   %5 = bitcast i8 %mask to <8 x i1>
6276   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6277   %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> %a
6278   ret <4 x float> %6
6279 }
6280
6281 define <4 x double> @test_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
6282 ; CHECK-LABEL: test_vfmaddsub256_pd:
6283 ; CHECK:       # %bb.0:
6284 ; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
6285 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
6286 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6287   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
6288   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6289   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
6290   %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6291   ret <4 x double> %4
6292 }
6293
6294 define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
6295 ; X86-LABEL: test_mask_vfmaddsub256_pd:
6296 ; X86:       # %bb.0:
6297 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6298 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6299 ; X86-NEXT:    vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
6300 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
6301 ; X86-NEXT:    retl # encoding: [0xc3]
6302 ;
6303 ; X64-LABEL: test_mask_vfmaddsub256_pd:
6304 ; X64:       # %bb.0:
6305 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6306 ; X64-NEXT:    vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
6307 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
6308 ; X64-NEXT:    retq # encoding: [0xc3]
6309   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
6310   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6311   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
6312   %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6313   %5 = bitcast i8 %mask to <8 x i1>
6314   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6315   %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> %a0
6316   ret <4 x double> %6
6317 }
6318
6319 define <2 x double> @test_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
6320 ; CHECK-LABEL: test_vfmaddsub128_pd:
6321 ; CHECK:       # %bb.0:
6322 ; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
6323 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
6324 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6325   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6326   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6327   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
6328   %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6329   ret <2 x double> %4
6330 }
6331
6332 define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
6333 ; X86-LABEL: test_mask_vfmaddsub128_pd:
6334 ; X86:       # %bb.0:
6335 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6336 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6337 ; X86-NEXT:    vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
6338 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
6339 ; X86-NEXT:    retl # encoding: [0xc3]
6340 ;
6341 ; X64-LABEL: test_mask_vfmaddsub128_pd:
6342 ; X64:       # %bb.0:
6343 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6344 ; X64-NEXT:    vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
6345 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
6346 ; X64-NEXT:    retq # encoding: [0xc3]
6347   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6348   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6349   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
6350   %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6351   %5 = bitcast i8 %mask to <8 x i1>
6352   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6353   %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> %a0
6354   ret <2 x double> %6
6355 }
6356
6357 define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6358 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
6359 ; X86:       # %bb.0:
6360 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6361 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6362 ; X86-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
6363 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
6364 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6365 ; X86-NEXT:    retl # encoding: [0xc3]
6366 ;
6367 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
6368 ; X64:       # %bb.0:
6369 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6370 ; X64-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
6371 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
6372 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6373 ; X64-NEXT:    retq # encoding: [0xc3]
6374   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
6375   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6376   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %2)
6377   %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6378   %5 = bitcast i8 %x3 to <8 x i1>
6379   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6380   %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> %x2
6381   ret <2 x double> %6
6382 }
6383
6384 define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6385 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
6386 ; X86:       # %bb.0:
6387 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6388 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6389 ; X86-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
6390 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
6391 ; X86-NEXT:    retl # encoding: [0xc3]
6392 ;
6393 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
6394 ; X64:       # %bb.0:
6395 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6396 ; X64-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
6397 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
6398 ; X64-NEXT:    retq # encoding: [0xc3]
6399   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
6400   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6401   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %2)
6402   %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6403   %5 = bitcast i8 %x3 to <8 x i1>
6404   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6405   %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> zeroinitializer
6406   ret <2 x double> %6
6407 }
6408
6409 define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6410 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
6411 ; X86:       # %bb.0:
6412 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6413 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6414 ; X86-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
6415 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
6416 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6417 ; X86-NEXT:    retl # encoding: [0xc3]
6418 ;
6419 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
6420 ; X64:       # %bb.0:
6421 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6422 ; X64-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
6423 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
6424 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6425 ; X64-NEXT:    retq # encoding: [0xc3]
6426   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
6427   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6428   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %2)
6429   %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6430   %5 = bitcast i8 %x3 to <8 x i1>
6431   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6432   %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> %x2
6433   ret <4 x double> %6
6434 }
6435
6436 define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6437 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
6438 ; X86:       # %bb.0:
6439 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6440 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6441 ; X86-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
6442 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
6443 ; X86-NEXT:    retl # encoding: [0xc3]
6444 ;
6445 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
6446 ; X64:       # %bb.0:
6447 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6448 ; X64-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
6449 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
6450 ; X64-NEXT:    retq # encoding: [0xc3]
6451   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
6452   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6453   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %2)
6454   %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6455   %5 = bitcast i8 %x3 to <8 x i1>
6456   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6457   %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> zeroinitializer
6458   ret <4 x double> %6
6459 }
6460
6461 define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6462 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
6463 ; X86:       # %bb.0:
6464 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6465 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6466 ; X86-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
6467 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
6468 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6469 ; X86-NEXT:    retl # encoding: [0xc3]
6470 ;
6471 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
6472 ; X64:       # %bb.0:
6473 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6474 ; X64-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
6475 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
6476 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6477 ; X64-NEXT:    retq # encoding: [0xc3]
6478   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
6479   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6480   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %2)
6481   %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6482   %5 = bitcast i8 %x3 to <8 x i1>
6483   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6484   %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> %x2
6485   ret <4 x float> %6
6486 }
6487
6488 define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6489 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
6490 ; X86:       # %bb.0:
6491 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6492 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6493 ; X86-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
6494 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
6495 ; X86-NEXT:    retl # encoding: [0xc3]
6496 ;
6497 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
6498 ; X64:       # %bb.0:
6499 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6500 ; X64-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
6501 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
6502 ; X64-NEXT:    retq # encoding: [0xc3]
6503   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
6504   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6505   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %2)
6506   %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6507   %5 = bitcast i8 %x3 to <8 x i1>
6508   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6509   %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> zeroinitializer
6510   ret <4 x float> %6
6511 }
6512
6513 define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6514 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
6515 ; X86:       # %bb.0:
6516 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6517 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6518 ; X86-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
6519 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
6520 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6521 ; X86-NEXT:    retl # encoding: [0xc3]
6522 ;
6523 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
6524 ; X64:       # %bb.0:
6525 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6526 ; X64-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
6527 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
6528 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6529 ; X64-NEXT:    retq # encoding: [0xc3]
6530   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
6531   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6532   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %2)
6533   %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6534   %5 = bitcast i8 %x3 to <8 x i1>
6535   %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> %x2
6536   ret <8 x float> %6
6537 }
6538
6539 define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6540 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
6541 ; X86:       # %bb.0:
6542 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6543 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6544 ; X86-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
6545 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
6546 ; X86-NEXT:    retl # encoding: [0xc3]
6547 ;
6548 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
6549 ; X64:       # %bb.0:
6550 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6551 ; X64-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
6552 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
6553 ; X64-NEXT:    retq # encoding: [0xc3]
6554   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
6555   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6556   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %2)
6557   %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6558   %5 = bitcast i8 %x3 to <8 x i1>
6559   %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> zeroinitializer
6560   ret <8 x float> %6
6561 }
6562
6563 define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6564 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
6565 ; X86:       # %bb.0:
6566 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6567 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6568 ; X86-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
6569 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
6570 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6571 ; X86-NEXT:    retl # encoding: [0xc3]
6572 ;
6573 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
6574 ; X64:       # %bb.0:
6575 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6576 ; X64-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
6577 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
6578 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6579 ; X64-NEXT:    retq # encoding: [0xc3]
6580   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
6581   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6582   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %2)
6583   %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3>
6584   %5 = bitcast i8 %x3 to <8 x i1>
6585   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6586   %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> %x2
6587   ret <2 x double> %6
6588 }
6589
6590 define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6591 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
6592 ; X86:       # %bb.0:
6593 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6594 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6595 ; X86-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
6596 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
6597 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6598 ; X86-NEXT:    retl # encoding: [0xc3]
6599 ;
6600 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
6601 ; X64:       # %bb.0:
6602 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6603 ; X64-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
6604 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
6605 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6606 ; X64-NEXT:    retq # encoding: [0xc3]
6607   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
6608   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6609   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %2)
6610   %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6611   %5 = bitcast i8 %x3 to <8 x i1>
6612   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6613   %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> %x2
6614   ret <4 x double> %6
6615 }
6616
6617 define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6618 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
6619 ; X86:       # %bb.0:
6620 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6621 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6622 ; X86-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
6623 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
6624 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6625 ; X86-NEXT:    retl # encoding: [0xc3]
6626 ;
6627 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
6628 ; X64:       # %bb.0:
6629 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6630 ; X64-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
6631 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
6632 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6633 ; X64-NEXT:    retq # encoding: [0xc3]
6634   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
6635   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6636   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %2)
6637   %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6638   %5 = bitcast i8 %x3 to <8 x i1>
6639   %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6640   %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> %x2
6641   ret <4 x float> %6
6642 }
6643
6644 define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6645 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
6646 ; X86:       # %bb.0:
6647 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6648 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6649 ; X86-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
6650 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
6651 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6652 ; X86-NEXT:    retl # encoding: [0xc3]
6653 ;
6654 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
6655 ; X64:       # %bb.0:
6656 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6657 ; X64-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
6658 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
6659 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6660 ; X64-NEXT:    retq # encoding: [0xc3]
6661   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
6662   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6663   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %2)
6664   %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6665   %5 = bitcast i8 %x3 to <8 x i1>
6666   %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> %x2
6667   ret <8 x float> %6
6668 }
6669
6670 define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
6671 ; X86-LABEL: test_mask_vfmadd128_ps_rmk:
6672 ; X86:       # %bb.0:
6673 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6674 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6675 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6676 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
6677 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6678 ; X86-NEXT:    retl # encoding: [0xc3]
6679 ;
6680 ; X64-LABEL: test_mask_vfmadd128_ps_rmk:
6681 ; X64:       # %bb.0:
6682 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6683 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
6684 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6685 ; X64-NEXT:    retq # encoding: [0xc3]
6686   %a2 = load <4 x float>, <4 x float>* %ptr_a2
6687   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6688   %2 = bitcast i8 %mask to <8 x i1>
6689   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6690   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6691   ret <4 x float> %3
6692 }
6693
6694 define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
6695 ; X86-LABEL: test_mask_vfmadd128_ps_rmka:
6696 ; X86:       # %bb.0:
6697 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6698 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6699 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6700 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
6701 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6702 ; X86-NEXT:    retl # encoding: [0xc3]
6703 ;
6704 ; X64-LABEL: test_mask_vfmadd128_ps_rmka:
6705 ; X64:       # %bb.0:
6706 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6707 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
6708 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6709 ; X64-NEXT:    retq # encoding: [0xc3]
6710   %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
6711   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6712   %2 = bitcast i8 %mask to <8 x i1>
6713   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6714   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6715   ret <4 x float> %3
6716 }
6717
6718 define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
6719 ; X86-LABEL: test_mask_vfmadd128_ps_rmkz:
6720 ; X86:       # %bb.0:
6721 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6722 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
6723 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6724 ; X86-NEXT:    retl # encoding: [0xc3]
6725 ;
6726 ; X64-LABEL: test_mask_vfmadd128_ps_rmkz:
6727 ; X64:       # %bb.0:
6728 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
6729 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6730 ; X64-NEXT:    retq # encoding: [0xc3]
6731   %a2 = load <4 x float>, <4 x float>* %ptr_a2
6732   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6733   ret <4 x float> %1
6734 }
6735
6736 define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
6737 ; X86-LABEL: test_mask_vfmadd128_ps_rmkza:
6738 ; X86:       # %bb.0:
6739 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6740 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
6741 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6742 ; X86-NEXT:    retl # encoding: [0xc3]
6743 ;
6744 ; X64-LABEL: test_mask_vfmadd128_ps_rmkza:
6745 ; X64:       # %bb.0:
6746 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
6747 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6748 ; X64-NEXT:    retq # encoding: [0xc3]
6749   %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
6750   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6751   ret <4 x float> %1
6752 }
6753
6754 define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
6755 ; X86-LABEL: test_mask_vfmadd128_ps_rmb:
6756 ; X86:       # %bb.0:
6757 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6758 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6759 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6760 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
6761 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6762 ; X86-NEXT:    retl # encoding: [0xc3]
6763 ;
6764 ; X64-LABEL: test_mask_vfmadd128_ps_rmb:
6765 ; X64:       # %bb.0:
6766 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6767 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
6768 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6769 ; X64-NEXT:    retq # encoding: [0xc3]
6770   %q = load float, float* %ptr_a2
6771   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6772   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6773   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6774   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6775   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6776   %2 = bitcast i8 %mask to <8 x i1>
6777   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6778   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6779   ret <4 x float> %3
6780 }
6781
6782 define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
6783 ; X86-LABEL: test_mask_vfmadd128_ps_rmba:
6784 ; X86:       # %bb.0:
6785 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6786 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6787 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6788 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
6789 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6790 ; X86-NEXT:    retl # encoding: [0xc3]
6791 ;
6792 ; X64-LABEL: test_mask_vfmadd128_ps_rmba:
6793 ; X64:       # %bb.0:
6794 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6795 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
6796 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6797 ; X64-NEXT:    retq # encoding: [0xc3]
6798   %q = load float, float* %ptr_a2, align 4
6799   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6800   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6801   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6802   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6803   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6804   %2 = bitcast i8 %mask to <8 x i1>
6805   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6806   %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6807   ret <4 x float> %3
6808 }
6809
6810 define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
6811 ; X86-LABEL: test_mask_vfmadd128_ps_rmbz:
6812 ; X86:       # %bb.0:
6813 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6814 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
6815 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6816 ; X86-NEXT:    retl # encoding: [0xc3]
6817 ;
6818 ; X64-LABEL: test_mask_vfmadd128_ps_rmbz:
6819 ; X64:       # %bb.0:
6820 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
6821 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6822 ; X64-NEXT:    retq # encoding: [0xc3]
6823   %q = load float, float* %ptr_a2
6824   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6825   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6826   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6827   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6828   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6829   ret <4 x float> %1
6830 }
6831
6832 define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
6833 ; X86-LABEL: test_mask_vfmadd128_ps_rmbza:
6834 ; X86:       # %bb.0:
6835 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6836 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
6837 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6838 ; X86-NEXT:    retl # encoding: [0xc3]
6839 ;
6840 ; X64-LABEL: test_mask_vfmadd128_ps_rmbza:
6841 ; X64:       # %bb.0:
6842 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
6843 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6844 ; X64-NEXT:    retq # encoding: [0xc3]
6845   %q = load float, float* %ptr_a2, align 4
6846   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6847   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6848   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6849   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6850   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6851   ret <4 x float> %1
6852 }
6853
6854 define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
6855 ; X86-LABEL: test_mask_vfmadd128_pd_rmk:
6856 ; X86:       # %bb.0:
6857 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6858 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6859 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6860 ; X86-NEXT:    vfmadd213pd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x00]
6861 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6862 ; X86-NEXT:    retl # encoding: [0xc3]
6863 ;
6864 ; X64-LABEL: test_mask_vfmadd128_pd_rmk:
6865 ; X64:       # %bb.0:
6866 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6867 ; X64-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
6868 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6869 ; X64-NEXT:    retq # encoding: [0xc3]
6870   %a2 = load <2 x double>, <2 x double>* %ptr_a2
6871   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6872   %2 = bitcast i8 %mask to <8 x i1>
6873   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
6874   %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %a0
6875   ret <2 x double> %3
6876 }
6877
6878 define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
6879 ; X86-LABEL: test_mask_vfmadd128_pd_rmkz:
6880 ; X86:       # %bb.0:
6881 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6882 ; X86-NEXT:    vfmadd213pd (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x00]
6883 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6884 ; X86-NEXT:    retl # encoding: [0xc3]
6885 ;
6886 ; X64-LABEL: test_mask_vfmadd128_pd_rmkz:
6887 ; X64:       # %bb.0:
6888 ; X64-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
6889 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
6890 ; X64-NEXT:    retq # encoding: [0xc3]
6891   %a2 = load <2 x double>, <2 x double>* %ptr_a2
6892   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6893   ret <2 x double> %1
6894 }
6895
6896 define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
6897 ; X86-LABEL: test_mask_vfmadd256_pd_rmk:
6898 ; X86:       # %bb.0:
6899 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6900 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6901 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6902 ; X86-NEXT:    vfmadd213pd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x00]
6903 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
6904 ; X86-NEXT:    retl # encoding: [0xc3]
6905 ;
6906 ; X64-LABEL: test_mask_vfmadd256_pd_rmk:
6907 ; X64:       # %bb.0:
6908 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6909 ; X64-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
6910 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
6911 ; X64-NEXT:    retq # encoding: [0xc3]
6912   %a2 = load <4 x double>, <4 x double>* %ptr_a2
6913   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
6914   %2 = bitcast i8 %mask to <8 x i1>
6915   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6916   %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %a0
6917   ret <4 x double> %3
6918 }
6919
6920 define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
6921 ; X86-LABEL: test_mask_vfmadd256_pd_rmkz:
6922 ; X86:       # %bb.0:
6923 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6924 ; X86-NEXT:    vfmadd213pd (%eax), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x00]
6925 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
6926 ; X86-NEXT:    retl # encoding: [0xc3]
6927 ;
6928 ; X64-LABEL: test_mask_vfmadd256_pd_rmkz:
6929 ; X64:       # %bb.0:
6930 ; X64-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
6931 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
6932 ; X64-NEXT:    retq # encoding: [0xc3]
6933   %a2 = load <4 x double>, <4 x double>* %ptr_a2
6934   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
6935   ret <4 x double> %1
6936 }
6937
6938 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
6939 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
6940 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
6941 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
6942 declare <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double>, <2 x double>, <2 x i1>)
6943 declare <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float>, <4 x float>, <4 x i1>)
6944 declare <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64>, <2 x i64>, <2 x i1>)
6945 declare <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32>, <4 x i32>, <4 x i1>)
6946 declare <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double>, <2 x double>, <2 x i1>)
6947 declare <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float>, <4 x float>, <4 x i1>)
6948 declare <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64>, <2 x i64>, <2 x i1>)
6949 declare <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32>, <4 x i32>, <4 x i1>)
6950 declare <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double>, <4 x double>, <4 x i1>)
6951 declare <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float>, <8 x float>, <8 x i1>)
6952 declare <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64>, <4 x i64>, <4 x i1>)
6953 declare <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32>, <8 x i32>, <8 x i1>)
6954 declare <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double>, <4 x double>, <4 x i1>)
6955 declare <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float>, <8 x float>, <8 x i1>)
6956 declare <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64>, <4 x i64>, <4 x i1>)
6957 declare <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32>, <8 x i32>, <8 x i1>)