1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16)
7 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) {
8 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
10 ; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x5c,0x24,0x04]
11 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
12 ; X86-NEXT: vpblendmb %xmm3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0xcb]
13 ; X86-NEXT: vmovdqu8 %xmm3, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0xd3]
14 ; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
15 ; X86-NEXT: retl # encoding: [0xc3]
17 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
19 ; X64-NEXT: vmovdqa %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc8]
20 ; X64-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7]
21 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
22 ; X64-NEXT: vpbroadcastb %edi, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xcf]
23 ; X64-NEXT: vpbroadcastb %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7]
24 ; X64-NEXT: retq # encoding: [0xc3]
25 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1)
26 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask)
27 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask)
28 %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0
29 %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1
30 %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2
31 ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5
35 declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8)
37 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) {
38 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
40 ; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x5c,0x24,0x04]
41 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
42 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
43 ; X86-NEXT: vpblendmw %xmm3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0xcb]
44 ; X86-NEXT: vmovdqu16 %xmm3, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0xd3]
45 ; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
46 ; X86-NEXT: retl # encoding: [0xc3]
48 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
50 ; X64-NEXT: vmovdqa %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc8]
51 ; X64-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7]
52 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
53 ; X64-NEXT: vpbroadcastw %edi, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xcf]
54 ; X64-NEXT: vpbroadcastw %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7]
55 ; X64-NEXT: retq # encoding: [0xc3]
56 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1)
57 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask)
58 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask)
59 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
60 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1
61 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2
62 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
66 declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32)
68 define { <32 x i8>, <32 x i8>, <32 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) {
69 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
71 ; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x5c,0x24,0x04]
72 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
73 ; X86-NEXT: vpblendmb %ymm3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0xcb]
74 ; X86-NEXT: vmovdqu8 %ymm3, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0xd3]
75 ; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
76 ; X86-NEXT: retl # encoding: [0xc3]
78 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
80 ; X64-NEXT: vmovdqa %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc8]
81 ; X64-NEXT: vpbroadcastb %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xc7]
82 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
83 ; X64-NEXT: vpbroadcastb %edi, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xcf]
84 ; X64-NEXT: vpbroadcastb %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7]
85 ; X64-NEXT: retq # encoding: [0xc3]
86 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1)
87 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask)
88 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask)
89 %res3 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %res0, 0
90 %res4 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res3, <32 x i8> %res1, 1
91 %res5 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res4, <32 x i8> %res2, 2
92 ret { <32 x i8>, <32 x i8>, <32 x i8> } %res5
97 declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16)
99 define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) {
100 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
102 ; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x5c,0x24,0x04]
103 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
104 ; X86-NEXT: vpblendmw %ymm3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0xcb]
105 ; X86-NEXT: vmovdqu16 %ymm3, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0xd3]
106 ; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
107 ; X86-NEXT: retl # encoding: [0xc3]
109 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
111 ; X64-NEXT: vmovdqa %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc8]
112 ; X64-NEXT: vpbroadcastw %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xc7]
113 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
114 ; X64-NEXT: vpbroadcastw %edi, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xcf]
115 ; X64-NEXT: vpbroadcastw %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7]
116 ; X64-NEXT: retq # encoding: [0xc3]
117 %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1)
118 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask)
119 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask)
120 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0
121 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1
122 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2
123 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
126 declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32)
128 define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1) {
129 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256:
131 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xc0]
132 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
133 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1)
137 define <32 x i8>@test_int_x86_avx512_mask_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) {
138 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_256:
140 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
141 ; X86-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8]
142 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
143 ; X86-NEXT: retl # encoding: [0xc3]
145 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_256:
147 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
148 ; X64-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8]
149 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
150 ; X64-NEXT: retq # encoding: [0xc3]
151 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask)
155 define <32 x i8>@test_int_x86_avx512_maskz_pbroadcastb_256(<16 x i8> %x0, i32 %mask) {
156 ; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_256:
158 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
159 ; X86-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0]
160 ; X86-NEXT: retl # encoding: [0xc3]
162 ; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_256:
164 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
165 ; X64-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0]
166 ; X64-NEXT: retq # encoding: [0xc3]
167 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask)
171 declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16)
173 define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1) {
174 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128:
176 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xc0]
177 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
178 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
182 define <16 x i8>@test_int_x86_avx512_mask_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
183 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_128:
185 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
186 ; X86-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8]
187 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
188 ; X86-NEXT: retl # encoding: [0xc3]
190 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_128:
192 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
193 ; X64-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8]
194 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
195 ; X64-NEXT: retq # encoding: [0xc3]
196 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask)
200 define <16 x i8>@test_int_x86_avx512_maskz_pbroadcastb_128(<16 x i8> %x0, i16 %mask) {
201 ; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_128:
203 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
204 ; X86-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0]
205 ; X86-NEXT: retl # encoding: [0xc3]
207 ; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_128:
209 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
210 ; X64-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0]
211 ; X64-NEXT: retq # encoding: [0xc3]
212 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask)
216 declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16)
218 define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1) {
219 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256:
221 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xc0]
222 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
223 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1)
227 define <16 x i16>@test_int_x86_avx512_mask_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) {
228 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_256:
230 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
231 ; X86-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8]
232 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
233 ; X86-NEXT: retl # encoding: [0xc3]
235 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_256:
237 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
238 ; X64-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8]
239 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
240 ; X64-NEXT: retq # encoding: [0xc3]
241 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask)
245 define <16 x i16>@test_int_x86_avx512_maskz_pbroadcastw_256(<8 x i16> %x0, i16 %mask) {
246 ; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_256:
248 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
249 ; X86-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0]
250 ; X86-NEXT: retl # encoding: [0xc3]
252 ; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_256:
254 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
255 ; X64-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0]
256 ; X64-NEXT: retq # encoding: [0xc3]
257 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask)
261 declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8)
263 define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1) {
264 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128:
266 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xc0]
267 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
268 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
272 define <8 x i16>@test_int_x86_avx512_mask_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) {
273 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_128:
275 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
276 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
277 ; X86-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8]
278 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
279 ; X86-NEXT: retl # encoding: [0xc3]
281 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_128:
283 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
284 ; X64-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8]
285 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
286 ; X64-NEXT: retq # encoding: [0xc3]
287 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask)
291 define <8 x i16>@test_int_x86_avx512_maskz_pbroadcastw_128(<8 x i16> %x0, i8 %mask) {
292 ; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_128:
294 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
295 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
296 ; X86-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0]
297 ; X86-NEXT: retl # encoding: [0xc3]
299 ; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_128:
301 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
302 ; X64-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0]
303 ; X64-NEXT: retq # encoding: [0xc3]
304 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask)
308 declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64)
310 define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1) {
311 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512:
313 ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0]
314 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
315 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1)
319 define <64 x i8>@test_int_x86_avx512_mask_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) {
320 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_512:
322 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
323 ; X86-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
324 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
325 ; X86-NEXT: retl # encoding: [0xc3]
327 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_512:
329 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
330 ; X64-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
331 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
332 ; X64-NEXT: retq # encoding: [0xc3]
333 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask)
337 define <64 x i8>@test_int_x86_avx512_maskz_pbroadcastb_512(<16 x i8> %x0, i64 %mask) {
338 ; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_512:
340 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
341 ; X86-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0]
342 ; X86-NEXT: retl # encoding: [0xc3]
344 ; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_512:
346 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
347 ; X64-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0]
348 ; X64-NEXT: retq # encoding: [0xc3]
349 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask)
353 declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32)
355 define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1) {
356 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512:
358 ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0]
359 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
360 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1)
364 define <32 x i16>@test_int_x86_avx512_mask_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) {
365 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_512:
367 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
368 ; X86-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
369 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
370 ; X86-NEXT: retl # encoding: [0xc3]
372 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_512:
374 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
375 ; X64-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
376 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
377 ; X64-NEXT: retq # encoding: [0xc3]
378 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask)
382 define <32 x i16>@test_int_x86_avx512_maskz_pbroadcastw_512(<8 x i16> %x0, i32 %mask) {
383 ; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_512:
385 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
386 ; X86-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0]
387 ; X86-NEXT: retl # encoding: [0xc3]
389 ; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_512:
391 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
392 ; X64-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0]
393 ; X64-NEXT: retq # encoding: [0xc3]
394 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask)
398 declare void @llvm.x86.avx512.mask.storeu.b.128(ptr, <16 x i8>, i16)
400 define void@test_int_x86_avx512_mask_storeu_b_128(ptr %ptr1, ptr %ptr2, <16 x i8> %x1, i16 %x2) {
401 ; X86-LABEL: test_int_x86_avx512_mask_storeu_b_128:
403 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
404 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
405 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
406 ; X86-NEXT: vmovdqu8 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x01]
407 ; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
408 ; X86-NEXT: retl # encoding: [0xc3]
410 ; X64-LABEL: test_int_x86_avx512_mask_storeu_b_128:
412 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
413 ; X64-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07]
414 ; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
415 ; X64-NEXT: retq # encoding: [0xc3]
416 call void @llvm.x86.avx512.mask.storeu.b.128(ptr %ptr1, <16 x i8> %x1, i16 %x2)
417 call void @llvm.x86.avx512.mask.storeu.b.128(ptr %ptr2, <16 x i8> %x1, i16 -1)
421 declare void @llvm.x86.avx512.mask.storeu.b.256(ptr, <32 x i8>, i32)
423 define void@test_int_x86_avx512_mask_storeu_b_256(ptr %ptr1, ptr %ptr2, <32 x i8> %x1, i32 %x2) {
424 ; X86-LABEL: test_int_x86_avx512_mask_storeu_b_256:
426 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
427 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
428 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
429 ; X86-NEXT: vmovdqu8 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x01]
430 ; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
431 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
432 ; X86-NEXT: retl # encoding: [0xc3]
434 ; X64-LABEL: test_int_x86_avx512_mask_storeu_b_256:
436 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
437 ; X64-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07]
438 ; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
439 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
440 ; X64-NEXT: retq # encoding: [0xc3]
441 call void @llvm.x86.avx512.mask.storeu.b.256(ptr %ptr1, <32 x i8> %x1, i32 %x2)
442 call void @llvm.x86.avx512.mask.storeu.b.256(ptr %ptr2, <32 x i8> %x1, i32 -1)
446 declare void @llvm.x86.avx512.mask.storeu.w.128(ptr, <8 x i16>, i8)
448 define void@test_int_x86_avx512_mask_storeu_w_128(ptr %ptr1, ptr %ptr2, <8 x i16> %x1, i8 %x2) {
449 ; X86-LABEL: test_int_x86_avx512_mask_storeu_w_128:
451 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
452 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
453 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
454 ; X86-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
455 ; X86-NEXT: vmovdqu16 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x01]
456 ; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
457 ; X86-NEXT: retl # encoding: [0xc3]
459 ; X64-LABEL: test_int_x86_avx512_mask_storeu_w_128:
461 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
462 ; X64-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07]
463 ; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
464 ; X64-NEXT: retq # encoding: [0xc3]
465 call void @llvm.x86.avx512.mask.storeu.w.128(ptr %ptr1, <8 x i16> %x1, i8 %x2)
466 call void @llvm.x86.avx512.mask.storeu.w.128(ptr %ptr2, <8 x i16> %x1, i8 -1)
470 declare void @llvm.x86.avx512.mask.storeu.w.256(ptr, <16 x i16>, i16)
472 define void@test_int_x86_avx512_mask_storeu_w_256(ptr %ptr1, ptr %ptr2, <16 x i16> %x1, i16 %x2) {
473 ; X86-LABEL: test_int_x86_avx512_mask_storeu_w_256:
475 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
476 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
477 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
478 ; X86-NEXT: vmovdqu16 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x01]
479 ; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
480 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
481 ; X86-NEXT: retl # encoding: [0xc3]
483 ; X64-LABEL: test_int_x86_avx512_mask_storeu_w_256:
485 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
486 ; X64-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07]
487 ; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
488 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
489 ; X64-NEXT: retq # encoding: [0xc3]
490 call void @llvm.x86.avx512.mask.storeu.w.256(ptr %ptr1, <16 x i16> %x1, i16 %x2)
491 call void @llvm.x86.avx512.mask.storeu.w.256(ptr %ptr2, <16 x i16> %x1, i16 -1)
495 declare <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr, <8 x i16>, i8)
497 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_loadu_w_128(ptr %ptr, ptr %ptr2, <8 x i16> %x1, i8 %mask) {
498 ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_128:
500 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
501 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
502 ; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
503 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
504 ; X86-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
505 ; X86-NEXT: vpblendmw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0x08]
506 ; X86-NEXT: vmovdqu16 (%ecx), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x11]
507 ; X86-NEXT: retl # encoding: [0xc3]
509 ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_128:
511 ; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
512 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
513 ; X64-NEXT: vpblendmw (%rsi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0x0e]
514 ; X64-NEXT: vmovdqu16 (%rdi), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x17]
515 ; X64-NEXT: retq # encoding: [0xc3]
516 %res0 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr %ptr, <8 x i16> %x1, i8 -1)
517 %res1 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr %ptr2, <8 x i16> %res0, i8 %mask)
518 %res2 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr %ptr, <8 x i16> zeroinitializer, i8 %mask)
519 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
520 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1
521 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2
522 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
525 declare <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr, <16 x i16>, i16)
527 define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_loadu_w_256(ptr %ptr, ptr %ptr2, <16 x i16> %x1, i16 %mask) {
528 ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_256:
530 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
531 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
532 ; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
533 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
534 ; X86-NEXT: vpblendmw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0x08]
535 ; X86-NEXT: vmovdqu16 (%ecx), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x11]
536 ; X86-NEXT: retl # encoding: [0xc3]
538 ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_256:
540 ; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
541 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
542 ; X64-NEXT: vpblendmw (%rsi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0x0e]
543 ; X64-NEXT: vmovdqu16 (%rdi), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x17]
544 ; X64-NEXT: retq # encoding: [0xc3]
545 %res0 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr %ptr, <16 x i16> %x1, i16 -1)
546 %res1 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr %ptr2, <16 x i16> %res0, i16 %mask)
547 %res2 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr %ptr, <16 x i16> zeroinitializer, i16 %mask)
548 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
549 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1
550 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2
551 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
554 declare <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr, <16 x i8>, i16)
556 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_loadu_b_128(ptr %ptr, ptr %ptr2, <16 x i8> %x1, i16 %mask) {
557 ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_128:
559 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
560 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
561 ; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
562 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
563 ; X86-NEXT: vpblendmb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0x08]
564 ; X86-NEXT: vmovdqu8 (%ecx), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x11]
565 ; X86-NEXT: retl # encoding: [0xc3]
567 ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_128:
569 ; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
570 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
571 ; X64-NEXT: vpblendmb (%rsi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0x0e]
572 ; X64-NEXT: vmovdqu8 (%rdi), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x17]
573 ; X64-NEXT: retq # encoding: [0xc3]
574 %res0 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr %ptr, <16 x i8> %x1, i16 -1)
575 %res1 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr %ptr2, <16 x i8> %res0, i16 %mask)
576 %res2 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr %ptr, <16 x i8> zeroinitializer, i16 %mask)
577 %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0
578 %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1
579 %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2
580 ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5
583 declare <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr, <32 x i8>, i32)
585 define { <32 x i8>, <32 x i8>, <32 x i8> } @test_int_x86_avx512_mask_loadu_b_256(ptr %ptr, ptr %ptr2, <32 x i8> %x1, i32 %mask) {
586 ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_256:
588 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
589 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
590 ; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
591 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
592 ; X86-NEXT: vpblendmb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0x08]
593 ; X86-NEXT: vmovdqu8 (%ecx), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x11]
594 ; X86-NEXT: retl # encoding: [0xc3]
596 ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_256:
598 ; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
599 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
600 ; X64-NEXT: vpblendmb (%rsi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0x0e]
601 ; X64-NEXT: vmovdqu8 (%rdi), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x17]
602 ; X64-NEXT: retq # encoding: [0xc3]
603 %res0 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr %ptr, <32 x i8> %x1, i32 -1)
604 %res1 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr %ptr2, <32 x i8> %res0, i32 %mask)
605 %res2 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr %ptr, <32 x i8> zeroinitializer, i32 %mask)
606 %res3 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %res0, 0
607 %res4 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res3, <32 x i8> %res1, 1
608 %res5 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res4, <32 x i8> %res2, 2
609 ret { <32 x i8>, <32 x i8>, <32 x i8> } %res5
612 declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16)
614 define <16 x i8>@test_int_x86_avx512_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3) {
615 ; CHECK-LABEL: test_int_x86_avx512_palignr_128:
617 ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xc1,0x02]
618 ; CHECK-NEXT: # xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
619 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
620 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1)
624 define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) {
625 ; X86-LABEL: test_int_x86_avx512_mask_palignr_128:
627 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
628 ; X86-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02]
629 ; X86-NEXT: # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
630 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
631 ; X86-NEXT: retl # encoding: [0xc3]
633 ; X64-LABEL: test_int_x86_avx512_mask_palignr_128:
635 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
636 ; X64-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02]
637 ; X64-NEXT: # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
638 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
639 ; X64-NEXT: retq # encoding: [0xc3]
640 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4)
644 define <16 x i8>@test_int_x86_avx512_maskz_palignr_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x4) {
645 ; X86-LABEL: test_int_x86_avx512_maskz_palignr_128:
647 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
648 ; X86-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02]
649 ; X86-NEXT: # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
650 ; X86-NEXT: retl # encoding: [0xc3]
652 ; X64-LABEL: test_int_x86_avx512_maskz_palignr_128:
654 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
655 ; X64-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02]
656 ; X64-NEXT: # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
657 ; X64-NEXT: retq # encoding: [0xc3]
658 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4)
662 declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32)
664 define <32 x i8>@test_int_x86_avx512_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3) {
665 ; CHECK-LABEL: test_int_x86_avx512_palignr_256:
667 ; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x0f,0xc1,0x02]
668 ; CHECK-NEXT: # ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
669 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
670 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1)
674 define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) {
675 ; X86-LABEL: test_int_x86_avx512_mask_palignr_256:
677 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
678 ; X86-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02]
679 ; X86-NEXT: # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
680 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
681 ; X86-NEXT: retl # encoding: [0xc3]
683 ; X64-LABEL: test_int_x86_avx512_mask_palignr_256:
685 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
686 ; X64-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02]
687 ; X64-NEXT: # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
688 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
689 ; X64-NEXT: retq # encoding: [0xc3]
690 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4)
694 define <32 x i8>@test_int_x86_avx512_maskz_palignr_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x4) {
695 ; X86-LABEL: test_int_x86_avx512_maskz_palignr_256:
697 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
698 ; X86-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02]
699 ; X86-NEXT: # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
700 ; X86-NEXT: retl # encoding: [0xc3]
702 ; X64-LABEL: test_int_x86_avx512_maskz_palignr_256:
704 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
705 ; X64-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02]
706 ; X64-NEXT: # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
707 ; X64-NEXT: retq # encoding: [0xc3]
708 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4)
712 declare <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16>, i32, <8 x i16>, i8)
714 define <8 x i16>@test_int_x86_avx512_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2) {
715 ; CHECK-LABEL: test_int_x86_avx512_pshufh_w_128:
717 ; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x03]
718 ; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,7,4,4,4]
719 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
720 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
724 define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
725 ; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_128:
727 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
728 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
729 ; X86-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03]
730 ; X86-NEXT: # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4]
731 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
732 ; X86-NEXT: retl # encoding: [0xc3]
734 ; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_128:
736 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
737 ; X64-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03]
738 ; X64-NEXT: # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4]
739 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
740 ; X64-NEXT: retq # encoding: [0xc3]
741 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
745 define <8 x i16>@test_int_x86_avx512_maskz_pshufh_w_128(<8 x i16> %x0, i8 %x3) {
746 ; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_128:
748 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
749 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
750 ; X86-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03]
751 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4]
752 ; X86-NEXT: retl # encoding: [0xc3]
754 ; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_128:
756 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
757 ; X64-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03]
758 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4]
759 ; X64-NEXT: retq # encoding: [0xc3]
760 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
764 declare <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16>, i32, <16 x i16>, i16)
766 define <16 x i16>@test_int_x86_avx512_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2) {
767 ; CHECK-LABEL: test_int_x86_avx512_pshufh_w_256:
769 ; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x70,0xc0,0x03]
770 ; CHECK-NEXT: # ymm0 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
771 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
772 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
776 define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
777 ; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_256:
779 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
780 ; X86-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03]
781 ; X86-NEXT: # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
782 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
783 ; X86-NEXT: retl # encoding: [0xc3]
785 ; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_256:
787 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
788 ; X64-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03]
789 ; X64-NEXT: # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
790 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
791 ; X64-NEXT: retq # encoding: [0xc3]
792 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
796 define <16 x i16>@test_int_x86_avx512_maskz_pshufh_w_256(<16 x i16> %x0, i16 %x3) {
797 ; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_256:
799 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
800 ; X86-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03]
801 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
802 ; X86-NEXT: retl # encoding: [0xc3]
804 ; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_256:
806 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
807 ; X64-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03]
808 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
809 ; X64-NEXT: retq # encoding: [0xc3]
810 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
814 declare <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16>, i32, <8 x i16>, i8)
816 define <8 x i16>@test_int_x86_avx512_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2) {
817 ; CHECK-LABEL: test_int_x86_avx512_pshufl_w_128:
819 ; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x03]
820 ; CHECK-NEXT: # xmm0 = xmm0[3,0,0,0,4,5,6,7]
821 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
822 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
826 define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
827 ; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_128:
829 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
830 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
831 ; X86-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03]
832 ; X86-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7]
833 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
834 ; X86-NEXT: retl # encoding: [0xc3]
836 ; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_128:
838 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
839 ; X64-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03]
840 ; X64-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7]
841 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
842 ; X64-NEXT: retq # encoding: [0xc3]
843 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
847 define <8 x i16>@test_int_x86_avx512_maskz_pshufl_w_128(<8 x i16> %x0, i8 %x3) {
848 ; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_128:
850 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
851 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
852 ; X86-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03]
853 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7]
854 ; X86-NEXT: retl # encoding: [0xc3]
856 ; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_128:
858 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
859 ; X64-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03]
860 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7]
861 ; X64-NEXT: retq # encoding: [0xc3]
862 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
866 declare <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16>, i32, <16 x i16>, i16)
868 define <16 x i16>@test_int_x86_avx512_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
869 ; CHECK-LABEL: test_int_x86_avx512_pshufl_w_256:
871 ; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x70,0xc0,0x03]
872 ; CHECK-NEXT: # ymm0 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
873 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
874 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
878 define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
879 ; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_256:
881 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
882 ; X86-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03]
883 ; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
884 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
885 ; X86-NEXT: retl # encoding: [0xc3]
887 ; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_256:
889 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
890 ; X64-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03]
891 ; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
892 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
893 ; X64-NEXT: retq # encoding: [0xc3]
894 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
898 define <16 x i16>@test_int_x86_avx512_maskz_pshufl_w_256(<16 x i16> %x0, i16 %x3) {
899 ; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_256:
901 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
902 ; X86-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03]
903 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
904 ; X86-NEXT: retl # encoding: [0xc3]
906 ; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_256:
908 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
909 ; X64-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03]
910 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
911 ; X64-NEXT: retq # encoding: [0xc3]
912 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
916 define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
917 ; CHECK-LABEL: test_pcmpeq_b_256:
919 ; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
920 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
921 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
922 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
923 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
927 define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
928 ; X86-LABEL: test_mask_pcmpeq_b_256:
930 ; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
931 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
932 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
933 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
934 ; X86-NEXT: retl # encoding: [0xc3]
936 ; X64-LABEL: test_mask_pcmpeq_b_256:
938 ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
939 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
940 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
941 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
942 ; X64-NEXT: retq # encoding: [0xc3]
943 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
947 declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
949 define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
950 ; CHECK-LABEL: test_pcmpeq_w_256:
952 ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
953 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
954 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
955 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
956 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
957 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
961 define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
962 ; X86-LABEL: test_mask_pcmpeq_w_256:
964 ; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
965 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
966 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
967 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
968 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
969 ; X86-NEXT: retl # encoding: [0xc3]
971 ; X64-LABEL: test_mask_pcmpeq_w_256:
973 ; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
974 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
975 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
976 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
977 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
978 ; X64-NEXT: retq # encoding: [0xc3]
979 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
983 declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
985 define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
986 ; CHECK-LABEL: test_pcmpgt_b_256:
988 ; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
989 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
990 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
991 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
992 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
996 define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
997 ; X86-LABEL: test_mask_pcmpgt_b_256:
999 ; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
1000 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1001 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
1002 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1003 ; X86-NEXT: retl # encoding: [0xc3]
1005 ; X64-LABEL: test_mask_pcmpgt_b_256:
1007 ; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
1008 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1009 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
1010 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1011 ; X64-NEXT: retq # encoding: [0xc3]
1012 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
1016 declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32)
1018 define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
1019 ; CHECK-LABEL: test_pcmpgt_w_256:
1021 ; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
1022 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1023 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
1024 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1025 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1026 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
1030 define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1031 ; X86-LABEL: test_mask_pcmpgt_w_256:
1033 ; X86-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
1034 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1035 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
1036 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
1037 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1038 ; X86-NEXT: retl # encoding: [0xc3]
1040 ; X64-LABEL: test_mask_pcmpgt_w_256:
1042 ; X64-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
1043 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1044 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
1045 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
1046 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1047 ; X64-NEXT: retq # encoding: [0xc3]
1048 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
1052 declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16)
1054 define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) {
1055 ; CHECK-LABEL: test_pcmpeq_b_128:
1057 ; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
1058 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1059 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
1060 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1061 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
1065 define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
1066 ; X86-LABEL: test_mask_pcmpeq_b_128:
1068 ; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
1069 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1070 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
1071 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
1072 ; X86-NEXT: retl # encoding: [0xc3]
1074 ; X64-LABEL: test_mask_pcmpeq_b_128:
1076 ; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
1077 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1078 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
1079 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
1080 ; X64-NEXT: retq # encoding: [0xc3]
1081 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
1085 declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16)
1087 define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) {
1088 ; CHECK-LABEL: test_pcmpeq_w_128:
1090 ; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1091 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1092 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1093 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1094 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
1098 define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1099 ; X86-LABEL: test_mask_pcmpeq_w_128:
1101 ; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1102 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1103 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
1104 ; X86-NEXT: # kill: def $al killed $al killed $eax
1105 ; X86-NEXT: retl # encoding: [0xc3]
1107 ; X64-LABEL: test_mask_pcmpeq_w_128:
1109 ; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1110 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1111 ; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8]
1112 ; X64-NEXT: # kill: def $al killed $al killed $eax
1113 ; X64-NEXT: retq # encoding: [0xc3]
1114 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
1118 declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8)
1120 define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) {
1121 ; CHECK-LABEL: test_pcmpgt_b_128:
1123 ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
1124 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1125 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
1126 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1127 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
1131 define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
1132 ; X86-LABEL: test_mask_pcmpgt_b_128:
1134 ; X86-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
1135 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1136 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
1137 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
1138 ; X86-NEXT: retl # encoding: [0xc3]
1140 ; X64-LABEL: test_mask_pcmpgt_b_128:
1142 ; X64-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
1143 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1144 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
1145 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
1146 ; X64-NEXT: retq # encoding: [0xc3]
1147 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
1151 declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16)
1153 define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) {
1154 ; CHECK-LABEL: test_pcmpgt_w_128:
1156 ; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
1157 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1158 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1159 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1160 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
1164 define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1165 ; X86-LABEL: test_mask_pcmpgt_w_128:
1167 ; X86-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
1168 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1169 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
1170 ; X86-NEXT: # kill: def $al killed $al killed $eax
1171 ; X86-NEXT: retl # encoding: [0xc3]
1173 ; X64-LABEL: test_mask_pcmpgt_w_128:
1175 ; X64-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
1176 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1177 ; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8]
1178 ; X64-NEXT: # kill: def $al killed $al killed $eax
1179 ; X64-NEXT: retq # encoding: [0xc3]
1180 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
1184 declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8)
1186 declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
1188 define <16 x i8>@test_int_x86_avx512_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
1189 ; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_128:
1191 ; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1]
1192 ; CHECK-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
1193 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1194 %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
1198 define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
1199 ; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
1201 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1202 ; X86-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1]
1203 ; X86-NEXT: # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
1204 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1205 ; X86-NEXT: retl # encoding: [0xc3]
1207 ; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
1209 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1210 ; X64-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1]
1211 ; X64-NEXT: # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
1212 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1213 ; X64-NEXT: retq # encoding: [0xc3]
1214 %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
1218 declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
1220 define <16 x i8>@test_int_x86_avx512_ask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
1221 ; CHECK-LABEL: test_int_x86_avx512_ask_punpcklb_w_128:
1223 ; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1]
1224 ; CHECK-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1225 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1226 %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
1230 define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
1231 ; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
1233 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1234 ; X86-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1]
1235 ; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1236 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1237 ; X86-NEXT: retl # encoding: [0xc3]
1239 ; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
1241 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1242 ; X64-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1]
1243 ; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1244 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1245 ; X64-NEXT: retq # encoding: [0xc3]
1246 %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
1250 declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
1252 define <32 x i8>@test_int_x86_avx512_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
1253 ; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_256:
1255 ; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x68,0xc1]
1256 ; CHECK-NEXT: # ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
1257 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1258 %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
1262 define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
1263 ; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
1265 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1266 ; X86-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1]
1267 ; X86-NEXT: # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
1268 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1269 ; X86-NEXT: retl # encoding: [0xc3]
1271 ; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
1273 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1274 ; X64-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1]
1275 ; X64-NEXT: # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
1276 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1277 ; X64-NEXT: retq # encoding: [0xc3]
1278 %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
1282 declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
1284 define <32 x i8>@test_int_x86_avx512_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
1285 ; CHECK-LABEL: test_int_x86_avx512_punpcklb_w_256:
1287 ; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x60,0xc1]
1288 ; CHECK-NEXT: # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
1289 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1290 %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
1294 define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
1295 ; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
1297 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1298 ; X86-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1]
1299 ; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
1300 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1301 ; X86-NEXT: retl # encoding: [0xc3]
1303 ; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
1305 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1306 ; X64-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1]
1307 ; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
1308 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1309 ; X64-NEXT: retq # encoding: [0xc3]
1310 %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
1314 declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1316 define <8 x i16>@test_int_x86_avx512_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1317 ; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_128:
1319 ; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1]
1320 ; CHECK-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1321 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1322 %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
1326 define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1327 ; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
1329 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1330 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1331 ; X86-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1]
1332 ; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1333 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1334 ; X86-NEXT: retl # encoding: [0xc3]
1336 ; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
1338 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1339 ; X64-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1]
1340 ; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1341 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1342 ; X64-NEXT: retq # encoding: [0xc3]
1343 %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1347 declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1349 define <8 x i16>@test_int_x86_avx512_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1350 ; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_128:
1352 ; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1]
1353 ; CHECK-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1354 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1355 %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
1359 define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1360 ; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
1362 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1363 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1364 ; X86-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1]
1365 ; X86-NEXT: # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1366 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1367 ; X86-NEXT: retl # encoding: [0xc3]
1369 ; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
1371 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1372 ; X64-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1]
1373 ; X64-NEXT: # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1374 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1375 ; X64-NEXT: retq # encoding: [0xc3]
1376 %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1380 declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1382 define <16 x i16>@test_int_x86_avx512_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1383 ; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_256:
1385 ; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x61,0xc1]
1386 ; CHECK-NEXT: # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1387 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1388 %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
1392 define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1393 ; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
1395 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1396 ; X86-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1]
1397 ; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1398 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1399 ; X86-NEXT: retl # encoding: [0xc3]
1401 ; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
1403 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1404 ; X64-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1]
1405 ; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1406 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1407 ; X64-NEXT: retq # encoding: [0xc3]
1408 %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1412 declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1414 define <16 x i16>@test_int_x86_avx512_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1415 ; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_256:
1417 ; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x69,0xc1]
1418 ; CHECK-NEXT: # ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
1419 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1420 %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
1424 define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1425 ; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
1427 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1428 ; X86-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1]
1429 ; X86-NEXT: # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
1430 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1431 ; X86-NEXT: retl # encoding: [0xc3]
1433 ; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
1435 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1436 ; X64-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1]
1437 ; X64-NEXT: # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
1438 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1439 ; X64-NEXT: retq # encoding: [0xc3]
1440 %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1444 define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1445 ; CHECK-LABEL: test_mask_add_epi16_rr_128:
1447 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
1448 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1449 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1453 define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1454 ; X86-LABEL: test_mask_add_epi16_rrk_128:
1456 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1457 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1458 ; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
1459 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1460 ; X86-NEXT: retl # encoding: [0xc3]
1462 ; X64-LABEL: test_mask_add_epi16_rrk_128:
1464 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1465 ; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
1466 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1467 ; X64-NEXT: retq # encoding: [0xc3]
1468 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1472 define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1473 ; X86-LABEL: test_mask_add_epi16_rrkz_128:
1475 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1476 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1477 ; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
1478 ; X86-NEXT: retl # encoding: [0xc3]
1480 ; X64-LABEL: test_mask_add_epi16_rrkz_128:
1482 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1483 ; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
1484 ; X64-NEXT: retq # encoding: [0xc3]
1485 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1489 define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
1490 ; X86-LABEL: test_mask_add_epi16_rm_128:
1492 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1493 ; X86-NEXT: vpaddw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x00]
1494 ; X86-NEXT: retl # encoding: [0xc3]
1496 ; X64-LABEL: test_mask_add_epi16_rm_128:
1498 ; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x07]
1499 ; X64-NEXT: retq # encoding: [0xc3]
1500 %b = load <8 x i16>, ptr %ptr_b
1501 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1505 define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
1506 ; X86-LABEL: test_mask_add_epi16_rmk_128:
1508 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1509 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1510 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1511 ; X86-NEXT: vpaddw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x08]
1512 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1513 ; X86-NEXT: retl # encoding: [0xc3]
1515 ; X64-LABEL: test_mask_add_epi16_rmk_128:
1517 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1518 ; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
1519 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1520 ; X64-NEXT: retq # encoding: [0xc3]
1521 %b = load <8 x i16>, ptr %ptr_b
1522 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1526 define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
1527 ; X86-LABEL: test_mask_add_epi16_rmkz_128:
1529 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1530 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1531 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1532 ; X86-NEXT: vpaddw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x00]
1533 ; X86-NEXT: retl # encoding: [0xc3]
1535 ; X64-LABEL: test_mask_add_epi16_rmkz_128:
1537 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1538 ; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
1539 ; X64-NEXT: retq # encoding: [0xc3]
1540 %b = load <8 x i16>, ptr %ptr_b
1541 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1545 declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1547 define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1548 ; CHECK-LABEL: test_mask_add_epi16_rr_256:
1550 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1]
1551 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1552 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1556 define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1557 ; X86-LABEL: test_mask_add_epi16_rrk_256:
1559 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1560 ; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
1561 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1562 ; X86-NEXT: retl # encoding: [0xc3]
1564 ; X64-LABEL: test_mask_add_epi16_rrk_256:
1566 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1567 ; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
1568 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1569 ; X64-NEXT: retq # encoding: [0xc3]
1570 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1574 define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1575 ; X86-LABEL: test_mask_add_epi16_rrkz_256:
1577 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1578 ; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
1579 ; X86-NEXT: retl # encoding: [0xc3]
1581 ; X64-LABEL: test_mask_add_epi16_rrkz_256:
1583 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1584 ; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
1585 ; X64-NEXT: retq # encoding: [0xc3]
1586 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1590 define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
1591 ; X86-LABEL: test_mask_add_epi16_rm_256:
1593 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1594 ; X86-NEXT: vpaddw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x00]
1595 ; X86-NEXT: retl # encoding: [0xc3]
1597 ; X64-LABEL: test_mask_add_epi16_rm_256:
1599 ; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x07]
1600 ; X64-NEXT: retq # encoding: [0xc3]
1601 %b = load <16 x i16>, ptr %ptr_b
1602 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1606 define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
1607 ; X86-LABEL: test_mask_add_epi16_rmk_256:
1609 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1610 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1611 ; X86-NEXT: vpaddw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x08]
1612 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1613 ; X86-NEXT: retl # encoding: [0xc3]
1615 ; X64-LABEL: test_mask_add_epi16_rmk_256:
1617 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1618 ; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
1619 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1620 ; X64-NEXT: retq # encoding: [0xc3]
1621 %b = load <16 x i16>, ptr %ptr_b
1622 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1626 define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
1627 ; X86-LABEL: test_mask_add_epi16_rmkz_256:
1629 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1630 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1631 ; X86-NEXT: vpaddw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x00]
1632 ; X86-NEXT: retl # encoding: [0xc3]
1634 ; X64-LABEL: test_mask_add_epi16_rmkz_256:
1636 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1637 ; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
1638 ; X64-NEXT: retq # encoding: [0xc3]
1639 %b = load <16 x i16>, ptr %ptr_b
1640 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1644 declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1646 define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1647 ; CHECK-LABEL: test_mask_sub_epi16_rr_128:
1649 ; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1]
1650 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1651 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1655 define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1656 ; X86-LABEL: test_mask_sub_epi16_rrk_128:
1658 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1659 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1660 ; X86-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
1661 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1662 ; X86-NEXT: retl # encoding: [0xc3]
1664 ; X64-LABEL: test_mask_sub_epi16_rrk_128:
1666 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1667 ; X64-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
1668 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1669 ; X64-NEXT: retq # encoding: [0xc3]
1670 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1674 define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1675 ; X86-LABEL: test_mask_sub_epi16_rrkz_128:
1677 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1678 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1679 ; X86-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
1680 ; X86-NEXT: retl # encoding: [0xc3]
1682 ; X64-LABEL: test_mask_sub_epi16_rrkz_128:
1684 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1685 ; X64-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
1686 ; X64-NEXT: retq # encoding: [0xc3]
1687 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1691 define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
1692 ; X86-LABEL: test_mask_sub_epi16_rm_128:
1694 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1695 ; X86-NEXT: vpsubw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x00]
1696 ; X86-NEXT: retl # encoding: [0xc3]
1698 ; X64-LABEL: test_mask_sub_epi16_rm_128:
1700 ; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x07]
1701 ; X64-NEXT: retq # encoding: [0xc3]
1702 %b = load <8 x i16>, ptr %ptr_b
1703 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1707 define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
1708 ; X86-LABEL: test_mask_sub_epi16_rmk_128:
1710 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1711 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1712 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1713 ; X86-NEXT: vpsubw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x08]
1714 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1715 ; X86-NEXT: retl # encoding: [0xc3]
1717 ; X64-LABEL: test_mask_sub_epi16_rmk_128:
1719 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1720 ; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
1721 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1722 ; X64-NEXT: retq # encoding: [0xc3]
1723 %b = load <8 x i16>, ptr %ptr_b
1724 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1728 define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
1729 ; X86-LABEL: test_mask_sub_epi16_rmkz_128:
1731 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1732 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1733 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1734 ; X86-NEXT: vpsubw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x00]
1735 ; X86-NEXT: retl # encoding: [0xc3]
1737 ; X64-LABEL: test_mask_sub_epi16_rmkz_128:
1739 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1740 ; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
1741 ; X64-NEXT: retq # encoding: [0xc3]
1742 %b = load <8 x i16>, ptr %ptr_b
1743 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1747 declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1749 define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1750 ; CHECK-LABEL: test_mask_sub_epi16_rr_256:
1752 ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1]
1753 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1754 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1758 define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1759 ; X86-LABEL: test_mask_sub_epi16_rrk_256:
1761 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1762 ; X86-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
1763 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1764 ; X86-NEXT: retl # encoding: [0xc3]
1766 ; X64-LABEL: test_mask_sub_epi16_rrk_256:
1768 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1769 ; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
1770 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1771 ; X64-NEXT: retq # encoding: [0xc3]
1772 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1776 define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1777 ; X86-LABEL: test_mask_sub_epi16_rrkz_256:
1779 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1780 ; X86-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
1781 ; X86-NEXT: retl # encoding: [0xc3]
1783 ; X64-LABEL: test_mask_sub_epi16_rrkz_256:
1785 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1786 ; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
1787 ; X64-NEXT: retq # encoding: [0xc3]
1788 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1792 define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
1793 ; X86-LABEL: test_mask_sub_epi16_rm_256:
1795 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1796 ; X86-NEXT: vpsubw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x00]
1797 ; X86-NEXT: retl # encoding: [0xc3]
1799 ; X64-LABEL: test_mask_sub_epi16_rm_256:
1801 ; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x07]
1802 ; X64-NEXT: retq # encoding: [0xc3]
1803 %b = load <16 x i16>, ptr %ptr_b
1804 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1808 define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
1809 ; X86-LABEL: test_mask_sub_epi16_rmk_256:
1811 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1812 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1813 ; X86-NEXT: vpsubw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x08]
1814 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1815 ; X86-NEXT: retl # encoding: [0xc3]
1817 ; X64-LABEL: test_mask_sub_epi16_rmk_256:
1819 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1820 ; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
1821 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1822 ; X64-NEXT: retq # encoding: [0xc3]
1823 %b = load <16 x i16>, ptr %ptr_b
1824 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1828 define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
1829 ; X86-LABEL: test_mask_sub_epi16_rmkz_256:
1831 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1832 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1833 ; X86-NEXT: vpsubw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x00]
1834 ; X86-NEXT: retl # encoding: [0xc3]
1836 ; X64-LABEL: test_mask_sub_epi16_rmkz_256:
1838 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1839 ; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
1840 ; X64-NEXT: retq # encoding: [0xc3]
1841 %b = load <16 x i16>, ptr %ptr_b
1842 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1846 declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1848 define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1849 ; CHECK-LABEL: test_mask_add_epi16_rr_512:
1851 ; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
1852 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1853 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1857 define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1858 ; X86-LABEL: test_mask_add_epi16_rrk_512:
1860 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1861 ; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
1862 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1863 ; X86-NEXT: retl # encoding: [0xc3]
1865 ; X64-LABEL: test_mask_add_epi16_rrk_512:
1867 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1868 ; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
1869 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1870 ; X64-NEXT: retq # encoding: [0xc3]
1871 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1875 define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1876 ; X86-LABEL: test_mask_add_epi16_rrkz_512:
1878 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1879 ; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
1880 ; X86-NEXT: retl # encoding: [0xc3]
1882 ; X64-LABEL: test_mask_add_epi16_rrkz_512:
1884 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1885 ; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
1886 ; X64-NEXT: retq # encoding: [0xc3]
1887 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1891 define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) {
1892 ; X86-LABEL: test_mask_add_epi16_rm_512:
1894 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1895 ; X86-NEXT: vpaddw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x00]
1896 ; X86-NEXT: retl # encoding: [0xc3]
1898 ; X64-LABEL: test_mask_add_epi16_rm_512:
1900 ; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
1901 ; X64-NEXT: retq # encoding: [0xc3]
1902 %b = load <32 x i16>, ptr %ptr_b
1903 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1907 define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
1908 ; X86-LABEL: test_mask_add_epi16_rmk_512:
1910 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1911 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1912 ; X86-NEXT: vpaddw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x08]
1913 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1914 ; X86-NEXT: retl # encoding: [0xc3]
1916 ; X64-LABEL: test_mask_add_epi16_rmk_512:
1918 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1919 ; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
1920 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1921 ; X64-NEXT: retq # encoding: [0xc3]
1922 %b = load <32 x i16>, ptr %ptr_b
1923 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1927 define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) {
1928 ; X86-LABEL: test_mask_add_epi16_rmkz_512:
1930 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1931 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1932 ; X86-NEXT: vpaddw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x00]
1933 ; X86-NEXT: retl # encoding: [0xc3]
1935 ; X64-LABEL: test_mask_add_epi16_rmkz_512:
1937 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1938 ; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
1939 ; X64-NEXT: retq # encoding: [0xc3]
1940 %b = load <32 x i16>, ptr %ptr_b
1941 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1945 declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1947 define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1948 ; CHECK-LABEL: test_mask_sub_epi16_rr_512:
1950 ; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
1951 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1952 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1956 define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1957 ; X86-LABEL: test_mask_sub_epi16_rrk_512:
1959 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1960 ; X86-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
1961 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1962 ; X86-NEXT: retl # encoding: [0xc3]
1964 ; X64-LABEL: test_mask_sub_epi16_rrk_512:
1966 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1967 ; X64-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
1968 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1969 ; X64-NEXT: retq # encoding: [0xc3]
1970 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1974 define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1975 ; X86-LABEL: test_mask_sub_epi16_rrkz_512:
1977 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1978 ; X86-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
1979 ; X86-NEXT: retl # encoding: [0xc3]
1981 ; X64-LABEL: test_mask_sub_epi16_rrkz_512:
1983 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1984 ; X64-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
1985 ; X64-NEXT: retq # encoding: [0xc3]
1986 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1990 define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) {
1991 ; X86-LABEL: test_mask_sub_epi16_rm_512:
1993 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1994 ; X86-NEXT: vpsubw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x00]
1995 ; X86-NEXT: retl # encoding: [0xc3]
1997 ; X64-LABEL: test_mask_sub_epi16_rm_512:
1999 ; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
2000 ; X64-NEXT: retq # encoding: [0xc3]
2001 %b = load <32 x i16>, ptr %ptr_b
2002 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
2006 define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
2007 ; X86-LABEL: test_mask_sub_epi16_rmk_512:
2009 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2010 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2011 ; X86-NEXT: vpsubw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x08]
2012 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2013 ; X86-NEXT: retl # encoding: [0xc3]
2015 ; X64-LABEL: test_mask_sub_epi16_rmk_512:
2017 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2018 ; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
2019 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2020 ; X64-NEXT: retq # encoding: [0xc3]
2021 %b = load <32 x i16>, ptr %ptr_b
2022 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
2026 define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) {
2027 ; X86-LABEL: test_mask_sub_epi16_rmkz_512:
2029 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2030 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2031 ; X86-NEXT: vpsubw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x00]
2032 ; X86-NEXT: retl # encoding: [0xc3]
2034 ; X64-LABEL: test_mask_sub_epi16_rmkz_512:
2036 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2037 ; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
2038 ; X64-NEXT: retq # encoding: [0xc3]
2039 %b = load <32 x i16>, ptr %ptr_b
2040 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2044 declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2046 define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
2047 ; CHECK-LABEL: test_mask_mullo_epi16_rr_512:
2049 ; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
2050 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2051 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
2055 define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
2056 ; X86-LABEL: test_mask_mullo_epi16_rrk_512:
2058 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2059 ; X86-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
2060 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2061 ; X86-NEXT: retl # encoding: [0xc3]
2063 ; X64-LABEL: test_mask_mullo_epi16_rrk_512:
2065 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2066 ; X64-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
2067 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2068 ; X64-NEXT: retq # encoding: [0xc3]
2069 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
2073 define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
2074 ; X86-LABEL: test_mask_mullo_epi16_rrkz_512:
2076 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2077 ; X86-NEXT: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
2078 ; X86-NEXT: retl # encoding: [0xc3]
2080 ; X64-LABEL: test_mask_mullo_epi16_rrkz_512:
2082 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2083 ; X64-NEXT: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
2084 ; X64-NEXT: retq # encoding: [0xc3]
2085 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2089 define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) {
2090 ; X86-LABEL: test_mask_mullo_epi16_rm_512:
2092 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2093 ; X86-NEXT: vpmullw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x00]
2094 ; X86-NEXT: retl # encoding: [0xc3]
2096 ; X64-LABEL: test_mask_mullo_epi16_rm_512:
2098 ; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
2099 ; X64-NEXT: retq # encoding: [0xc3]
2100 %b = load <32 x i16>, ptr %ptr_b
2101 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
2105 define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
2106 ; X86-LABEL: test_mask_mullo_epi16_rmk_512:
2108 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2109 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2110 ; X86-NEXT: vpmullw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x08]
2111 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2112 ; X86-NEXT: retl # encoding: [0xc3]
2114 ; X64-LABEL: test_mask_mullo_epi16_rmk_512:
2116 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2117 ; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
2118 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2119 ; X64-NEXT: retq # encoding: [0xc3]
2120 %b = load <32 x i16>, ptr %ptr_b
2121 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
2125 define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) {
2126 ; X86-LABEL: test_mask_mullo_epi16_rmkz_512:
2128 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2129 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2130 ; X86-NEXT: vpmullw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x00]
2131 ; X86-NEXT: retl # encoding: [0xc3]
2133 ; X64-LABEL: test_mask_mullo_epi16_rmkz_512:
2135 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2136 ; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
2137 ; X64-NEXT: retq # encoding: [0xc3]
2138 %b = load <32 x i16>, ptr %ptr_b
2139 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2143 declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2145 define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2146 ; CHECK-LABEL: test_mask_mullo_epi16_rr_128:
2148 ; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1]
2149 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2150 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2154 define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
2155 ; X86-LABEL: test_mask_mullo_epi16_rrk_128:
2157 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2158 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2159 ; X86-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
2160 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2161 ; X86-NEXT: retl # encoding: [0xc3]
2163 ; X64-LABEL: test_mask_mullo_epi16_rrk_128:
2165 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2166 ; X64-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
2167 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2168 ; X64-NEXT: retq # encoding: [0xc3]
2169 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2173 define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
2174 ; X86-LABEL: test_mask_mullo_epi16_rrkz_128:
2176 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2177 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2178 ; X86-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
2179 ; X86-NEXT: retl # encoding: [0xc3]
2181 ; X64-LABEL: test_mask_mullo_epi16_rrkz_128:
2183 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2184 ; X64-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
2185 ; X64-NEXT: retq # encoding: [0xc3]
2186 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2190 define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
2191 ; X86-LABEL: test_mask_mullo_epi16_rm_128:
2193 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2194 ; X86-NEXT: vpmullw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x00]
2195 ; X86-NEXT: retl # encoding: [0xc3]
2197 ; X64-LABEL: test_mask_mullo_epi16_rm_128:
2199 ; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x07]
2200 ; X64-NEXT: retq # encoding: [0xc3]
2201 %b = load <8 x i16>, ptr %ptr_b
2202 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2206 define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
2207 ; X86-LABEL: test_mask_mullo_epi16_rmk_128:
2209 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2210 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2211 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2212 ; X86-NEXT: vpmullw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x08]
2213 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2214 ; X86-NEXT: retl # encoding: [0xc3]
2216 ; X64-LABEL: test_mask_mullo_epi16_rmk_128:
2218 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2219 ; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
2220 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2221 ; X64-NEXT: retq # encoding: [0xc3]
2222 %b = load <8 x i16>, ptr %ptr_b
2223 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2227 define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
2228 ; X86-LABEL: test_mask_mullo_epi16_rmkz_128:
2230 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2231 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2232 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2233 ; X86-NEXT: vpmullw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x00]
2234 ; X86-NEXT: retl # encoding: [0xc3]
2236 ; X64-LABEL: test_mask_mullo_epi16_rmkz_128:
2238 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2239 ; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
2240 ; X64-NEXT: retq # encoding: [0xc3]
2241 %b = load <8 x i16>, ptr %ptr_b
2242 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2246 declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2248 define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2249 ; CHECK-LABEL: test_mask_mullo_epi16_rr_256:
2251 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0xc1]
2252 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2253 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2257 define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
2258 ; X86-LABEL: test_mask_mullo_epi16_rrk_256:
2260 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2261 ; X86-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
2262 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2263 ; X86-NEXT: retl # encoding: [0xc3]
2265 ; X64-LABEL: test_mask_mullo_epi16_rrk_256:
2267 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2268 ; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
2269 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2270 ; X64-NEXT: retq # encoding: [0xc3]
2271 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2275 define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
2276 ; X86-LABEL: test_mask_mullo_epi16_rrkz_256:
2278 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2279 ; X86-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
2280 ; X86-NEXT: retl # encoding: [0xc3]
2282 ; X64-LABEL: test_mask_mullo_epi16_rrkz_256:
2284 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2285 ; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
2286 ; X64-NEXT: retq # encoding: [0xc3]
2287 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2291 define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
2292 ; X86-LABEL: test_mask_mullo_epi16_rm_256:
2294 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2295 ; X86-NEXT: vpmullw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x00]
2296 ; X86-NEXT: retl # encoding: [0xc3]
2298 ; X64-LABEL: test_mask_mullo_epi16_rm_256:
2300 ; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x07]
2301 ; X64-NEXT: retq # encoding: [0xc3]
2302 %b = load <16 x i16>, ptr %ptr_b
2303 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2307 define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
2308 ; X86-LABEL: test_mask_mullo_epi16_rmk_256:
2310 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2311 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2312 ; X86-NEXT: vpmullw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x08]
2313 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2314 ; X86-NEXT: retl # encoding: [0xc3]
2316 ; X64-LABEL: test_mask_mullo_epi16_rmk_256:
2318 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2319 ; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
2320 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2321 ; X64-NEXT: retq # encoding: [0xc3]
2322 %b = load <16 x i16>, ptr %ptr_b
2323 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2327 define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
2328 ; X86-LABEL: test_mask_mullo_epi16_rmkz_256:
2330 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2331 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2332 ; X86-NEXT: vpmullw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x00]
2333 ; X86-NEXT: retl # encoding: [0xc3]
2335 ; X64-LABEL: test_mask_mullo_epi16_rmkz_256:
2337 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2338 ; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
2339 ; X64-NEXT: retq # encoding: [0xc3]
2340 %b = load <16 x i16>, ptr %ptr_b
2341 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2345 declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2347 declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2349 define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2350 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_128:
2352 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2353 ; X86-NEXT: vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1]
2354 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2355 ; X86-NEXT: retl # encoding: [0xc3]
2357 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_128:
2359 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2360 ; X64-NEXT: vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1]
2361 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2362 ; X64-NEXT: retq # encoding: [0xc3]
2363 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask)
2367 define <16 x i8>@test_int_x86_avx512_maskz_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2368 ; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_b_128:
2370 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2371 ; X86-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1]
2372 ; X86-NEXT: retl # encoding: [0xc3]
2374 ; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_b_128:
2376 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2377 ; X64-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1]
2378 ; X64-NEXT: retq # encoding: [0xc3]
2379 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2383 declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2385 define <32 x i8>@test_int_x86_avx512_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2386 ; CHECK-LABEL: test_int_x86_avx512_pmaxs_b_256:
2388 ; CHECK-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xc1]
2389 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2390 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2394 define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2395 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_256:
2397 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2398 ; X86-NEXT: vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1]
2399 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2400 ; X86-NEXT: retl # encoding: [0xc3]
2402 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_256:
2404 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2405 ; X64-NEXT: vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1]
2406 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2407 ; X64-NEXT: retq # encoding: [0xc3]
2408 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2412 declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2414 define <8 x i16>@test_int_x86_avx512_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2415 ; CHECK-LABEL: test_int_x86_avx512_pmaxs_w_128:
2417 ; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
2418 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2419 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2423 define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2424 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_128:
2426 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2427 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2428 ; X86-NEXT: vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1]
2429 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2430 ; X86-NEXT: retl # encoding: [0xc3]
2432 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_128:
2434 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2435 ; X64-NEXT: vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1]
2436 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2437 ; X64-NEXT: retq # encoding: [0xc3]
2438 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2442 declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2444 define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2445 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_256:
2447 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2448 ; X86-NEXT: vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1]
2449 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2450 ; X86-NEXT: retl # encoding: [0xc3]
2452 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_256:
2454 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2455 ; X64-NEXT: vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1]
2456 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2457 ; X64-NEXT: retq # encoding: [0xc3]
2458 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2462 define <16 x i16>@test_int_x86_avx512_maskz_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2463 ; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_w_256:
2465 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2466 ; X86-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1]
2467 ; X86-NEXT: retl # encoding: [0xc3]
2469 ; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_w_256:
2471 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2472 ; X64-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1]
2473 ; X64-NEXT: retq # encoding: [0xc3]
2474 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2478 declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2480 define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) {
2481 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_128:
2483 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2484 ; X86-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1]
2485 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2486 ; X86-NEXT: retl # encoding: [0xc3]
2488 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_128:
2490 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2491 ; X64-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1]
2492 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2493 ; X64-NEXT: retq # encoding: [0xc3]
2494 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2498 define <16 x i8>@test_int_x86_avx512_maskz_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2499 ; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_b_128:
2501 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2502 ; X86-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1]
2503 ; X86-NEXT: retl # encoding: [0xc3]
2505 ; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_b_128:
2507 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2508 ; X64-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1]
2509 ; X64-NEXT: retq # encoding: [0xc3]
2510 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2514 declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2516 define <32 x i8>@test_int_x86_avx512_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2517 ; CHECK-LABEL: test_int_x86_avx512_pmaxu_b_256:
2519 ; CHECK-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xc1]
2520 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2521 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2525 define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2526 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_256:
2528 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2529 ; X86-NEXT: vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1]
2530 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2531 ; X86-NEXT: retl # encoding: [0xc3]
2533 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_256:
2535 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2536 ; X64-NEXT: vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1]
2537 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2538 ; X64-NEXT: retq # encoding: [0xc3]
2539 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2543 declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2545 define <8 x i16>@test_int_x86_avx512_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2546 ; CHECK-LABEL: test_int_x86_avx512_pmaxu_w_128:
2548 ; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1]
2549 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2550 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2554 define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2555 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_128:
2557 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2558 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2559 ; X86-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1]
2560 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2561 ; X86-NEXT: retl # encoding: [0xc3]
2563 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_128:
2565 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2566 ; X64-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1]
2567 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2568 ; X64-NEXT: retq # encoding: [0xc3]
2569 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2573 declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2575 define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2576 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_256:
2578 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2579 ; X86-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1]
2580 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2581 ; X86-NEXT: retl # encoding: [0xc3]
2583 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_256:
2585 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2586 ; X64-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1]
2587 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2588 ; X64-NEXT: retq # encoding: [0xc3]
2589 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2593 define <16 x i16>@test_int_x86_avx512_maskz_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2594 ; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_w_256:
2596 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2597 ; X86-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1]
2598 ; X86-NEXT: retl # encoding: [0xc3]
2600 ; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_w_256:
2602 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2603 ; X64-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1]
2604 ; X64-NEXT: retq # encoding: [0xc3]
2605 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2609 declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2611 define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2612 ; X86-LABEL: test_int_x86_avx512_mask_pmins_b_128:
2614 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2615 ; X86-NEXT: vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1]
2616 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2617 ; X86-NEXT: retl # encoding: [0xc3]
2619 ; X64-LABEL: test_int_x86_avx512_mask_pmins_b_128:
2621 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2622 ; X64-NEXT: vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1]
2623 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2624 ; X64-NEXT: retq # encoding: [0xc3]
2625 %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2629 define <16 x i8>@test_int_x86_avx512_maskz_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2630 ; X86-LABEL: test_int_x86_avx512_maskz_pmins_b_128:
2632 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2633 ; X86-NEXT: vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1]
2634 ; X86-NEXT: retl # encoding: [0xc3]
2636 ; X64-LABEL: test_int_x86_avx512_maskz_pmins_b_128:
2638 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2639 ; X64-NEXT: vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1]
2640 ; X64-NEXT: retq # encoding: [0xc3]
2641 %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2645 declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2647 define <32 x i8>@test_int_x86_avx512_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2648 ; CHECK-LABEL: test_int_x86_avx512_pmins_b_256:
2650 ; CHECK-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xc1]
2651 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2652 %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2656 define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2657 ; X86-LABEL: test_int_x86_avx512_mask_pmins_b_256:
2659 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2660 ; X86-NEXT: vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1]
2661 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2662 ; X86-NEXT: retl # encoding: [0xc3]
2664 ; X64-LABEL: test_int_x86_avx512_mask_pmins_b_256:
2666 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2667 ; X64-NEXT: vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1]
2668 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2669 ; X64-NEXT: retq # encoding: [0xc3]
2670 %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2674 declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2676 define <8 x i16>@test_int_x86_avx512_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2677 ; CHECK-LABEL: test_int_x86_avx512_pmins_w_128:
2679 ; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
2680 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2681 %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2685 define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2686 ; X86-LABEL: test_int_x86_avx512_mask_pmins_w_128:
2688 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2689 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2690 ; X86-NEXT: vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1]
2691 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2692 ; X86-NEXT: retl # encoding: [0xc3]
2694 ; X64-LABEL: test_int_x86_avx512_mask_pmins_w_128:
2696 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2697 ; X64-NEXT: vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1]
2698 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2699 ; X64-NEXT: retq # encoding: [0xc3]
2700 %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2704 declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2706 define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2707 ; X86-LABEL: test_int_x86_avx512_mask_pmins_w_256:
2709 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2710 ; X86-NEXT: vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1]
2711 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2712 ; X86-NEXT: retl # encoding: [0xc3]
2714 ; X64-LABEL: test_int_x86_avx512_mask_pmins_w_256:
2716 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2717 ; X64-NEXT: vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1]
2718 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2719 ; X64-NEXT: retq # encoding: [0xc3]
2720 %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2724 define <16 x i16>@test_int_x86_avx512_maskz_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2725 ; X86-LABEL: test_int_x86_avx512_maskz_pmins_w_256:
2727 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2728 ; X86-NEXT: vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1]
2729 ; X86-NEXT: retl # encoding: [0xc3]
2731 ; X64-LABEL: test_int_x86_avx512_maskz_pmins_w_256:
2733 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2734 ; X64-NEXT: vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1]
2735 ; X64-NEXT: retq # encoding: [0xc3]
2736 %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2740 declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2742 define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2743 ; X86-LABEL: test_int_x86_avx512_mask_pminu_b_128:
2745 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2746 ; X86-NEXT: vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1]
2747 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2748 ; X86-NEXT: retl # encoding: [0xc3]
2750 ; X64-LABEL: test_int_x86_avx512_mask_pminu_b_128:
2752 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2753 ; X64-NEXT: vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1]
2754 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2755 ; X64-NEXT: retq # encoding: [0xc3]
2756 %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2760 define <16 x i8>@test_int_x86_avx512_maskz_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2761 ; X86-LABEL: test_int_x86_avx512_maskz_pminu_b_128:
2763 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2764 ; X86-NEXT: vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1]
2765 ; X86-NEXT: retl # encoding: [0xc3]
2767 ; X64-LABEL: test_int_x86_avx512_maskz_pminu_b_128:
2769 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2770 ; X64-NEXT: vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1]
2771 ; X64-NEXT: retq # encoding: [0xc3]
2772 %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2776 declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2778 define <32 x i8>@test_int_x86_avx512_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2779 ; CHECK-LABEL: test_int_x86_avx512_pminu_b_256:
2781 ; CHECK-NEXT: vpminub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xc1]
2782 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2783 %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2787 define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2788 ; X86-LABEL: test_int_x86_avx512_mask_pminu_b_256:
2790 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2791 ; X86-NEXT: vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1]
2792 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2793 ; X86-NEXT: retl # encoding: [0xc3]
2795 ; X64-LABEL: test_int_x86_avx512_mask_pminu_b_256:
2797 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2798 ; X64-NEXT: vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1]
2799 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2800 ; X64-NEXT: retq # encoding: [0xc3]
2801 %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2805 declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2807 define <8 x i16>@test_int_x86_avx512_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2808 ; CHECK-LABEL: test_int_x86_avx512_pminu_w_128:
2810 ; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1]
2811 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2812 %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2816 define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2817 ; X86-LABEL: test_int_x86_avx512_mask_pminu_w_128:
2819 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2820 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2821 ; X86-NEXT: vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1]
2822 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2823 ; X86-NEXT: retl # encoding: [0xc3]
2825 ; X64-LABEL: test_int_x86_avx512_mask_pminu_w_128:
2827 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2828 ; X64-NEXT: vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1]
2829 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2830 ; X64-NEXT: retq # encoding: [0xc3]
2831 %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2835 declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2837 define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2838 ; X86-LABEL: test_int_x86_avx512_mask_pminu_w_256:
2840 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2841 ; X86-NEXT: vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1]
2842 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2843 ; X86-NEXT: retl # encoding: [0xc3]
2845 ; X64-LABEL: test_int_x86_avx512_mask_pminu_w_256:
2847 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2848 ; X64-NEXT: vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1]
2849 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2850 ; X64-NEXT: retq # encoding: [0xc3]
2851 %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2855 define <16 x i16>@test_int_x86_avx512_maskz_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2856 ; X86-LABEL: test_int_x86_avx512_maskz_pminu_w_256:
2858 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2859 ; X86-NEXT: vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1]
2860 ; X86-NEXT: retl # encoding: [0xc3]
2862 ; X64-LABEL: test_int_x86_avx512_maskz_pminu_w_256:
2864 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2865 ; X64-NEXT: vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1]
2866 ; X64-NEXT: retq # encoding: [0xc3]
2867 %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2871 declare <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2873 define <8 x i16>@test_int_x86_avx512_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2874 ; CHECK-LABEL: test_int_x86_avx512_psrl_w_128:
2876 ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1]
2877 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2878 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2882 define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2883 ; X86-LABEL: test_int_x86_avx512_mask_psrl_w_128:
2885 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2886 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2887 ; X86-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1]
2888 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2889 ; X86-NEXT: retl # encoding: [0xc3]
2891 ; X64-LABEL: test_int_x86_avx512_mask_psrl_w_128:
2893 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2894 ; X64-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1]
2895 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2896 ; X64-NEXT: retq # encoding: [0xc3]
2897 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2902 define <8 x i16>@test_int_x86_avx512_maskz_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2903 ; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_128:
2905 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2906 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2907 ; X86-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1]
2908 ; X86-NEXT: retl # encoding: [0xc3]
2910 ; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_128:
2912 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2913 ; X64-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1]
2914 ; X64-NEXT: retq # encoding: [0xc3]
2915 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
2919 declare <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
2921 define <16 x i16>@test_int_x86_avx512_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) {
2922 ; CHECK-LABEL: test_int_x86_avx512_psrl_w_256:
2924 ; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1]
2925 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2926 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
2930 define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2931 ; X86-LABEL: test_int_x86_avx512_mask_psrl_w_256:
2933 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2934 ; X86-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1]
2935 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2936 ; X86-NEXT: retl # encoding: [0xc3]
2938 ; X64-LABEL: test_int_x86_avx512_mask_psrl_w_256:
2940 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2941 ; X64-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1]
2942 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2943 ; X64-NEXT: retq # encoding: [0xc3]
2944 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
2948 define <16 x i16>@test_int_x86_avx512_maskz_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) {
2949 ; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_256:
2951 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2952 ; X86-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1]
2953 ; X86-NEXT: retl # encoding: [0xc3]
2955 ; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_256:
2957 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2958 ; X64-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1]
2959 ; X64-NEXT: retq # encoding: [0xc3]
2960 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
2964 declare <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2966 define <8 x i16>@test_int_x86_avx512_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2967 ; CHECK-LABEL: test_int_x86_avx512_psra_w_128:
2969 ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1]
2970 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2971 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2975 define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2976 ; X86-LABEL: test_int_x86_avx512_mask_psra_w_128:
2978 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2979 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2980 ; X86-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1]
2981 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2982 ; X86-NEXT: retl # encoding: [0xc3]
2984 ; X64-LABEL: test_int_x86_avx512_mask_psra_w_128:
2986 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2987 ; X64-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1]
2988 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2989 ; X64-NEXT: retq # encoding: [0xc3]
2990 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2994 define <8 x i16>@test_int_x86_avx512_maskz_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2995 ; X86-LABEL: test_int_x86_avx512_maskz_psra_w_128:
2997 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2998 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2999 ; X86-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1]
3000 ; X86-NEXT: retl # encoding: [0xc3]
3002 ; X64-LABEL: test_int_x86_avx512_maskz_psra_w_128:
3004 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3005 ; X64-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1]
3006 ; X64-NEXT: retq # encoding: [0xc3]
3007 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
3011 declare <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
3013 define <16 x i16>@test_int_x86_avx512_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) {
3014 ; CHECK-LABEL: test_int_x86_avx512_psra_w_256:
3016 ; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1]
3017 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3018 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
3022 define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3023 ; X86-LABEL: test_int_x86_avx512_mask_psra_w_256:
3025 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3026 ; X86-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1]
3027 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3028 ; X86-NEXT: retl # encoding: [0xc3]
3030 ; X64-LABEL: test_int_x86_avx512_mask_psra_w_256:
3032 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3033 ; X64-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1]
3034 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3035 ; X64-NEXT: retq # encoding: [0xc3]
3036 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
3040 define <16 x i16>@test_int_x86_avx512_maskz_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) {
3041 ; X86-LABEL: test_int_x86_avx512_maskz_psra_w_256:
3043 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3044 ; X86-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1]
3045 ; X86-NEXT: retl # encoding: [0xc3]
3047 ; X64-LABEL: test_int_x86_avx512_maskz_psra_w_256:
3049 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3050 ; X64-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1]
3051 ; X64-NEXT: retq # encoding: [0xc3]
3052 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
3056 declare <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3058 define <8 x i16>@test_int_x86_avx512_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
3059 ; CHECK-LABEL: test_int_x86_avx512_psll_w_128:
3061 ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1]
3062 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3063 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3067 define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3068 ; X86-LABEL: test_int_x86_avx512_mask_psll_w_128:
3070 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3071 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3072 ; X86-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1]
3073 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3074 ; X86-NEXT: retl # encoding: [0xc3]
3076 ; X64-LABEL: test_int_x86_avx512_mask_psll_w_128:
3078 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3079 ; X64-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1]
3080 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3081 ; X64-NEXT: retq # encoding: [0xc3]
3082 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3086 define <8 x i16>@test_int_x86_avx512_maskz_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
3087 ; X86-LABEL: test_int_x86_avx512_maskz_psll_w_128:
3089 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3090 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3091 ; X86-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1]
3092 ; X86-NEXT: retl # encoding: [0xc3]
3094 ; X64-LABEL: test_int_x86_avx512_maskz_psll_w_128:
3096 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3097 ; X64-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1]
3098 ; X64-NEXT: retq # encoding: [0xc3]
3099 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
3103 declare <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
3105 define <16 x i16>@test_int_x86_avx512_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) {
3106 ; CHECK-LABEL: test_int_x86_avx512_psll_w_256:
3108 ; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1]
3109 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3110 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
3114 define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3115 ; X86-LABEL: test_int_x86_avx512_mask_psll_w_256:
3117 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3118 ; X86-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1]
3119 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3120 ; X86-NEXT: retl # encoding: [0xc3]
3122 ; X64-LABEL: test_int_x86_avx512_mask_psll_w_256:
3124 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3125 ; X64-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1]
3126 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3127 ; X64-NEXT: retq # encoding: [0xc3]
3128 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
3132 define <16 x i16>@test_int_x86_avx512_maskz_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) {
3133 ; X86-LABEL: test_int_x86_avx512_maskz_psll_w_256:
3135 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3136 ; X86-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1]
3137 ; X86-NEXT: retl # encoding: [0xc3]
3139 ; X64-LABEL: test_int_x86_avx512_maskz_psll_w_256:
3141 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3142 ; X64-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1]
3143 ; X64-NEXT: retq # encoding: [0xc3]
3144 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
3148 declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i32, <8 x i16>, i8)
3150 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
3151 ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_128:
3153 ; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3154 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3155 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3156 ; X86-NEXT: vpsrlw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xd0,0x03]
3157 ; X86-NEXT: vpsrlw $4, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x71,0xd0,0x04]
3158 ; X86-NEXT: vpsrlw $5, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x05]
3159 ; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3160 ; X86-NEXT: retl # encoding: [0xc3]
3162 ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_128:
3164 ; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3165 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3166 ; X64-NEXT: vpsrlw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xd0,0x03]
3167 ; X64-NEXT: vpsrlw $4, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x71,0xd0,0x04]
3168 ; X64-NEXT: vpsrlw $5, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x05]
3169 ; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3170 ; X64-NEXT: retq # encoding: [0xc3]
3171 %res0 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
3172 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 4, <8 x i16> %x2, i8 -1)
3173 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 5, <8 x i16> zeroinitializer, i8 %x3)
3174 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
3175 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1
3176 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2
3177 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
3180 declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i32, <16 x i16>, i16)
3182 define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
3183 ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_256:
3185 ; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3186 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3187 ; X86-NEXT: vpsrlw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xd0,0x03]
3188 ; X86-NEXT: vpsrlw $4, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x71,0xd0,0x04]
3189 ; X86-NEXT: vpsrlw $5, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x05]
3190 ; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3191 ; X86-NEXT: retl # encoding: [0xc3]
3193 ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_256:
3195 ; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3196 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3197 ; X64-NEXT: vpsrlw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xd0,0x03]
3198 ; X64-NEXT: vpsrlw $4, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x71,0xd0,0x04]
3199 ; X64-NEXT: vpsrlw $5, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x05]
3200 ; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3201 ; X64-NEXT: retq # encoding: [0xc3]
3202 %res0 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
3203 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 4, <16 x i16> %x2, i16 -1)
3204 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 5, <16 x i16> zeroinitializer, i16 %x3)
3205 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
3206 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1
3207 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2
3208 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
3211 declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i32, <8 x i16>, i8)
3213 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
3214 ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_128:
3216 ; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3217 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3218 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3219 ; X86-NEXT: vpsraw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xe0,0x03]
3220 ; X86-NEXT: vpsraw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xe0,0x04]
3221 ; X86-NEXT: vpsraw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x05]
3222 ; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3223 ; X86-NEXT: retl # encoding: [0xc3]
3225 ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_128:
3227 ; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3228 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3229 ; X64-NEXT: vpsraw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xe0,0x03]
3230 ; X64-NEXT: vpsraw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xe0,0x04]
3231 ; X64-NEXT: vpsraw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x05]
3232 ; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3233 ; X64-NEXT: retq # encoding: [0xc3]
3234 %res0 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
3235 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3)
3236 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1)
3237 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
3238 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1
3239 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2
3240 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
3243 declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i32, <16 x i16>, i16)
3245 define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
3246 ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_256:
3248 ; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3249 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3250 ; X86-NEXT: vpsraw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xe0,0x03]
3251 ; X86-NEXT: vpsraw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xe0,0x04]
3252 ; X86-NEXT: vpsraw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x05]
3253 ; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3254 ; X86-NEXT: retl # encoding: [0xc3]
3256 ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_256:
3258 ; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3259 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3260 ; X64-NEXT: vpsraw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xe0,0x03]
3261 ; X64-NEXT: vpsraw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xe0,0x04]
3262 ; X64-NEXT: vpsraw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x05]
3263 ; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3264 ; X64-NEXT: retq # encoding: [0xc3]
3265 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
3266 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3)
3267 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1)
3268 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0
3269 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1
3270 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2
3271 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
3274 declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i32, <8 x i16>, i8)
3276 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
3277 ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_128:
3279 ; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3280 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3281 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3282 ; X86-NEXT: vpsllw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xf0,0x03]
3283 ; X86-NEXT: vpsllw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xf0,0x04]
3284 ; X86-NEXT: vpsllw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x05]
3285 ; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3286 ; X86-NEXT: retl # encoding: [0xc3]
3288 ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_128:
3290 ; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3291 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3292 ; X64-NEXT: vpsllw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xf0,0x03]
3293 ; X64-NEXT: vpsllw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xf0,0x04]
3294 ; X64-NEXT: vpsllw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x05]
3295 ; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3296 ; X64-NEXT: retq # encoding: [0xc3]
3297 %res0 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
3298 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3)
3299 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1)
3300 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
3301 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1
3302 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2
3303 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
3306 declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i32, <16 x i16>, i16)
3308 define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
3309 ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_256:
3311 ; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3312 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3313 ; X86-NEXT: vpsllw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xf0,0x03]
3314 ; X86-NEXT: vpsllw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xf0,0x04]
3315 ; X86-NEXT: vpsllw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x05]
3316 ; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3317 ; X86-NEXT: retl # encoding: [0xc3]
3319 ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_256:
3321 ; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3322 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3323 ; X64-NEXT: vpsllw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xf0,0x03]
3324 ; X64-NEXT: vpsllw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xf0,0x04]
3325 ; X64-NEXT: vpsllw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x05]
3326 ; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3327 ; X64-NEXT: retq # encoding: [0xc3]
3328 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
3329 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3)
3330 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1)
3331 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0
3332 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1
3333 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2
3334 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
3337 declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3339 define <16 x i8>@test_int_x86_avx512_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
3340 ; CHECK-LABEL: test_int_x86_avx512_pshuf_b_128:
3342 ; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1]
3343 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3344 %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
3348 define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
3349 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_128:
3351 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3352 ; X86-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
3353 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3354 ; X86-NEXT: retl # encoding: [0xc3]
3356 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_128:
3358 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3359 ; X64-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
3360 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3361 ; X64-NEXT: retq # encoding: [0xc3]
3362 %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
3366 declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3368 define <32 x i8>@test_int_x86_avx512_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
3369 ; CHECK-LABEL: test_int_x86_avx512_pshuf_b_256:
3371 ; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
3372 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3373 %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
3377 define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
3378 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_256:
3380 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3381 ; X86-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
3382 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3383 ; X86-NEXT: retl # encoding: [0xc3]
3385 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_256:
3387 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3388 ; X64-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
3389 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3390 ; X64-NEXT: retq # encoding: [0xc3]
3391 %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
3395 declare <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8>, <8 x i16>, i8)
3397 define <8 x i16>@test_int_x86_avx512_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1) {
3398 ; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_128:
3400 ; CHECK-NEXT: vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0]
3401 ; CHECK-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3402 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3403 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
3407 define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
3408 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128:
3410 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3411 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3412 ; X86-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8]
3413 ; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3414 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3415 ; X86-NEXT: retl # encoding: [0xc3]
3417 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128:
3419 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3420 ; X64-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8]
3421 ; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3422 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3423 ; X64-NEXT: retq # encoding: [0xc3]
3424 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
3428 define <8 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_128(<16 x i8> %x0, i8 %x2) {
3429 ; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_128:
3431 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3432 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3433 ; X86-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0]
3434 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3435 ; X86-NEXT: retl # encoding: [0xc3]
3437 ; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_128:
3439 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3440 ; X64-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0]
3441 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3442 ; X64-NEXT: retq # encoding: [0xc3]
3443 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
3447 declare <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8>, <16 x i16>, i16)
3449 define <16 x i16>@test_int_x86_avx512_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1) {
3450 ; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_256:
3452 ; CHECK-NEXT: vpmovzxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc0]
3453 ; CHECK-NEXT: # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3454 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3455 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
3459 define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
3460 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256:
3462 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3463 ; X86-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8]
3464 ; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3465 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3466 ; X86-NEXT: retl # encoding: [0xc3]
3468 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256:
3470 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3471 ; X64-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8]
3472 ; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3473 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3474 ; X64-NEXT: retq # encoding: [0xc3]
3475 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
3479 define <16 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_256(<16 x i8> %x0, i16 %x2) {
3480 ; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_256:
3482 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3483 ; X86-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0]
3484 ; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3485 ; X86-NEXT: retl # encoding: [0xc3]
3487 ; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_256:
3489 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3490 ; X64-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0]
3491 ; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3492 ; X64-NEXT: retq # encoding: [0xc3]
3493 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
3497 declare <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8>, <8 x i16>, i8)
3499 define <8 x i16>@test_int_x86_avx512_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1) {
3500 ; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_128:
3502 ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0]
3503 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3504 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
3508 define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
3509 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128:
3511 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3512 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3513 ; X86-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8]
3514 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3515 ; X86-NEXT: retl # encoding: [0xc3]
3517 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128:
3519 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3520 ; X64-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8]
3521 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3522 ; X64-NEXT: retq # encoding: [0xc3]
3523 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
3527 define <8 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_128(<16 x i8> %x0, i8 %x2) {
3528 ; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_128:
3530 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3531 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3532 ; X86-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0]
3533 ; X86-NEXT: retl # encoding: [0xc3]
3535 ; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_128:
3537 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3538 ; X64-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0]
3539 ; X64-NEXT: retq # encoding: [0xc3]
3540 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
3544 declare <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8>, <16 x i16>, i16)
3546 define <16 x i16>@test_int_x86_avx512_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1) {
3547 ; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_256:
3549 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0xc0]
3550 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3551 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
3555 define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
3556 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256:
3558 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3559 ; X86-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8]
3560 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3561 ; X86-NEXT: retl # encoding: [0xc3]
3563 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256:
3565 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3566 ; X64-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8]
3567 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3568 ; X64-NEXT: retq # encoding: [0xc3]
3569 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
3573 define <16 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_256(<16 x i8> %x0, i16 %x2) {
3574 ; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_256:
3576 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3577 ; X86-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0]
3578 ; X86-NEXT: retl # encoding: [0xc3]
3580 ; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_256:
3582 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3583 ; X64-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0]
3584 ; X64-NEXT: retq # encoding: [0xc3]
3585 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
3589 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32>, <2 x i64>, i8)
3591 define <2 x i64>@test_int_x86_avx512_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1) {
3592 ; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_128:
3594 ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0]
3595 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3596 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
3600 define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
3601 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128:
3603 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3604 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3605 ; X86-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8]
3606 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3607 ; X86-NEXT: retl # encoding: [0xc3]
3609 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128:
3611 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3612 ; X64-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8]
3613 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3614 ; X64-NEXT: retq # encoding: [0xc3]
3615 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
3619 define <2 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_128(<4 x i32> %x0, i8 %x2) {
3620 ; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_128:
3622 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3623 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3624 ; X86-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0]
3625 ; X86-NEXT: retl # encoding: [0xc3]
3627 ; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_128:
3629 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3630 ; X64-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0]
3631 ; X64-NEXT: retq # encoding: [0xc3]
3632 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
3636 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32>, <4 x i64>, i8)
3638 define <4 x i64>@test_int_x86_avx512_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1) {
3639 ; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_256:
3641 ; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x25,0xc0]
3642 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3643 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
3647 define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
3648 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256:
3650 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3651 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3652 ; X86-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8]
3653 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3654 ; X86-NEXT: retl # encoding: [0xc3]
3656 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256:
3658 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3659 ; X64-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8]
3660 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3661 ; X64-NEXT: retq # encoding: [0xc3]
3662 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
3666 define <4 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_256(<4 x i32> %x0, i8 %x2) {
3667 ; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_256:
3669 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3670 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3671 ; X86-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0]
3672 ; X86-NEXT: retl # encoding: [0xc3]
3674 ; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_256:
3676 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3677 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0]
3678 ; X64-NEXT: retq # encoding: [0xc3]
3679 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
3683 declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16)
3685 define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) {
3686 ; X86-LABEL: test_int_x86_avx512_cvtmask2b_128:
3688 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
3689 ; X86-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
3690 ; X86-NEXT: retl # encoding: [0xc3]
3692 ; X64-LABEL: test_int_x86_avx512_cvtmask2b_128:
3694 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3695 ; X64-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
3696 ; X64-NEXT: retq # encoding: [0xc3]
3697 %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0)
3701 declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32)
3703 define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) {
3704 ; X86-LABEL: test_int_x86_avx512_cvtmask2b_256:
3706 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
3707 ; X86-NEXT: vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0]
3708 ; X86-NEXT: retl # encoding: [0xc3]
3710 ; X64-LABEL: test_int_x86_avx512_cvtmask2b_256:
3712 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3713 ; X64-NEXT: vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0]
3714 ; X64-NEXT: retq # encoding: [0xc3]
3715 %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0)
3719 declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8)
3721 define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) {
3722 ; X86-LABEL: test_int_x86_avx512_cvtmask2w_128:
3724 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3725 ; X86-NEXT: kmovd %eax, %k0 # encoding: [0xc5,0xfb,0x92,0xc0]
3726 ; X86-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
3727 ; X86-NEXT: retl # encoding: [0xc3]
3729 ; X64-LABEL: test_int_x86_avx512_cvtmask2w_128:
3731 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3732 ; X64-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
3733 ; X64-NEXT: retq # encoding: [0xc3]
3734 %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0)
3738 declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16)
3740 define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) {
3741 ; X86-LABEL: test_int_x86_avx512_cvtmask2w_256:
3743 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
3744 ; X86-NEXT: vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0]
3745 ; X86-NEXT: retl # encoding: [0xc3]
3747 ; X64-LABEL: test_int_x86_avx512_cvtmask2w_256:
3749 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3750 ; X64-NEXT: vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0]
3751 ; X64-NEXT: retq # encoding: [0xc3]
3752 %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0)
3755 define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
3756 ; CHECK-LABEL: test_mask_packs_epi32_rr_128:
3758 ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
3759 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3760 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
3764 define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
3765 ; X86-LABEL: test_mask_packs_epi32_rrk_128:
3767 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3768 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3769 ; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
3770 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3771 ; X86-NEXT: retl # encoding: [0xc3]
3773 ; X64-LABEL: test_mask_packs_epi32_rrk_128:
3775 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3776 ; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
3777 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3778 ; X64-NEXT: retq # encoding: [0xc3]
3779 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
3783 define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
3784 ; X86-LABEL: test_mask_packs_epi32_rrkz_128:
3786 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3787 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3788 ; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
3789 ; X86-NEXT: retl # encoding: [0xc3]
3791 ; X64-LABEL: test_mask_packs_epi32_rrkz_128:
3793 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3794 ; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
3795 ; X64-NEXT: retq # encoding: [0xc3]
3796 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
3800 define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, ptr %ptr_b) {
3801 ; X86-LABEL: test_mask_packs_epi32_rm_128:
3803 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3804 ; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00]
3805 ; X86-NEXT: retl # encoding: [0xc3]
3807 ; X64-LABEL: test_mask_packs_epi32_rm_128:
3809 ; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07]
3810 ; X64-NEXT: retq # encoding: [0xc3]
3811 %b = load <4 x i32>, ptr %ptr_b
3812 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
3816 define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
3817 ; X86-LABEL: test_mask_packs_epi32_rmk_128:
3819 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3820 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3821 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3822 ; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08]
3823 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3824 ; X86-NEXT: retl # encoding: [0xc3]
3826 ; X64-LABEL: test_mask_packs_epi32_rmk_128:
3828 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3829 ; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
3830 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3831 ; X64-NEXT: retq # encoding: [0xc3]
3832 %b = load <4 x i32>, ptr %ptr_b
3833 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
3837 define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) {
3838 ; X86-LABEL: test_mask_packs_epi32_rmkz_128:
3840 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3841 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3842 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3843 ; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00]
3844 ; X86-NEXT: retl # encoding: [0xc3]
3846 ; X64-LABEL: test_mask_packs_epi32_rmkz_128:
3848 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3849 ; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
3850 ; X64-NEXT: retq # encoding: [0xc3]
3851 %b = load <4 x i32>, ptr %ptr_b
3852 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
3856 define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, ptr %ptr_b) {
3857 ; X86-LABEL: test_mask_packs_epi32_rmb_128:
3859 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3860 ; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00]
3861 ; X86-NEXT: retl # encoding: [0xc3]
3863 ; X64-LABEL: test_mask_packs_epi32_rmb_128:
3865 ; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
3866 ; X64-NEXT: retq # encoding: [0xc3]
3867 %q = load i32, ptr %ptr_b
3868 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3869 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3870 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
3874 define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
3875 ; X86-LABEL: test_mask_packs_epi32_rmbk_128:
3877 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3878 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3879 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3880 ; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08]
3881 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3882 ; X86-NEXT: retl # encoding: [0xc3]
3884 ; X64-LABEL: test_mask_packs_epi32_rmbk_128:
3886 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3887 ; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
3888 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3889 ; X64-NEXT: retq # encoding: [0xc3]
3890 %q = load i32, ptr %ptr_b
3891 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3892 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3893 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
3897 define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) {
3898 ; X86-LABEL: test_mask_packs_epi32_rmbkz_128:
3900 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3901 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3902 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3903 ; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00]
3904 ; X86-NEXT: retl # encoding: [0xc3]
3906 ; X64-LABEL: test_mask_packs_epi32_rmbkz_128:
3908 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3909 ; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
3910 ; X64-NEXT: retq # encoding: [0xc3]
3911 %q = load i32, ptr %ptr_b
3912 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3913 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3914 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
3918 declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
3920 define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
3921 ; CHECK-LABEL: test_mask_packs_epi32_rr_256:
3923 ; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
3924 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3925 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
3929 define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
3930 ; X86-LABEL: test_mask_packs_epi32_rrk_256:
3932 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3933 ; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
3934 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3935 ; X86-NEXT: retl # encoding: [0xc3]
3937 ; X64-LABEL: test_mask_packs_epi32_rrk_256:
3939 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3940 ; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
3941 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3942 ; X64-NEXT: retq # encoding: [0xc3]
3943 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
3947 define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
3948 ; X86-LABEL: test_mask_packs_epi32_rrkz_256:
3950 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3951 ; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
3952 ; X86-NEXT: retl # encoding: [0xc3]
3954 ; X64-LABEL: test_mask_packs_epi32_rrkz_256:
3956 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3957 ; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
3958 ; X64-NEXT: retq # encoding: [0xc3]
3959 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
3963 define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, ptr %ptr_b) {
3964 ; X86-LABEL: test_mask_packs_epi32_rm_256:
3966 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3967 ; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00]
3968 ; X86-NEXT: retl # encoding: [0xc3]
3970 ; X64-LABEL: test_mask_packs_epi32_rm_256:
3972 ; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07]
3973 ; X64-NEXT: retq # encoding: [0xc3]
3974 %b = load <8 x i32>, ptr %ptr_b
3975 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
3979 define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
3980 ; X86-LABEL: test_mask_packs_epi32_rmk_256:
3982 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3983 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3984 ; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08]
3985 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3986 ; X86-NEXT: retl # encoding: [0xc3]
3988 ; X64-LABEL: test_mask_packs_epi32_rmk_256:
3990 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3991 ; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
3992 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3993 ; X64-NEXT: retq # encoding: [0xc3]
3994 %b = load <8 x i32>, ptr %ptr_b
3995 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
3999 define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) {
4000 ; X86-LABEL: test_mask_packs_epi32_rmkz_256:
4002 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4003 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4004 ; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00]
4005 ; X86-NEXT: retl # encoding: [0xc3]
4007 ; X64-LABEL: test_mask_packs_epi32_rmkz_256:
4009 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4010 ; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
4011 ; X64-NEXT: retq # encoding: [0xc3]
4012 %b = load <8 x i32>, ptr %ptr_b
4013 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4017 define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, ptr %ptr_b) {
4018 ; X86-LABEL: test_mask_packs_epi32_rmb_256:
4020 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4021 ; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00]
4022 ; X86-NEXT: retl # encoding: [0xc3]
4024 ; X64-LABEL: test_mask_packs_epi32_rmb_256:
4026 ; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
4027 ; X64-NEXT: retq # encoding: [0xc3]
4028 %q = load i32, ptr %ptr_b
4029 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4030 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4031 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4035 define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
4036 ; X86-LABEL: test_mask_packs_epi32_rmbk_256:
4038 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4039 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4040 ; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08]
4041 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4042 ; X86-NEXT: retl # encoding: [0xc3]
4044 ; X64-LABEL: test_mask_packs_epi32_rmbk_256:
4046 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4047 ; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
4048 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4049 ; X64-NEXT: retq # encoding: [0xc3]
4050 %q = load i32, ptr %ptr_b
4051 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4052 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4053 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4057 define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) {
4058 ; X86-LABEL: test_mask_packs_epi32_rmbkz_256:
4060 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4061 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4062 ; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00]
4063 ; X86-NEXT: retl # encoding: [0xc3]
4065 ; X64-LABEL: test_mask_packs_epi32_rmbkz_256:
4067 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4068 ; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
4069 ; X64-NEXT: retq # encoding: [0xc3]
4070 %q = load i32, ptr %ptr_b
4071 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4072 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4073 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4077 declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
4079 define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
4080 ; CHECK-LABEL: test_mask_packs_epi16_rr_128:
4082 ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
4083 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4084 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4088 define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
4089 ; X86-LABEL: test_mask_packs_epi16_rrk_128:
4091 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4092 ; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
4093 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4094 ; X86-NEXT: retl # encoding: [0xc3]
4096 ; X64-LABEL: test_mask_packs_epi16_rrk_128:
4098 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4099 ; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
4100 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4101 ; X64-NEXT: retq # encoding: [0xc3]
4102 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4106 define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
4107 ; X86-LABEL: test_mask_packs_epi16_rrkz_128:
4109 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4110 ; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
4111 ; X86-NEXT: retl # encoding: [0xc3]
4113 ; X64-LABEL: test_mask_packs_epi16_rrkz_128:
4115 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4116 ; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
4117 ; X64-NEXT: retq # encoding: [0xc3]
4118 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4122 define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
4123 ; X86-LABEL: test_mask_packs_epi16_rm_128:
4125 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4126 ; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00]
4127 ; X86-NEXT: retl # encoding: [0xc3]
4129 ; X64-LABEL: test_mask_packs_epi16_rm_128:
4131 ; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07]
4132 ; X64-NEXT: retq # encoding: [0xc3]
4133 %b = load <8 x i16>, ptr %ptr_b
4134 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4138 define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
4139 ; X86-LABEL: test_mask_packs_epi16_rmk_128:
4141 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4142 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4143 ; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08]
4144 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4145 ; X86-NEXT: retl # encoding: [0xc3]
4147 ; X64-LABEL: test_mask_packs_epi16_rmk_128:
4149 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4150 ; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f]
4151 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4152 ; X64-NEXT: retq # encoding: [0xc3]
4153 %b = load <8 x i16>, ptr %ptr_b
4154 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4158 define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i16 %mask) {
4159 ; X86-LABEL: test_mask_packs_epi16_rmkz_128:
4161 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4162 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4163 ; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00]
4164 ; X86-NEXT: retl # encoding: [0xc3]
4166 ; X64-LABEL: test_mask_packs_epi16_rmkz_128:
4168 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4169 ; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07]
4170 ; X64-NEXT: retq # encoding: [0xc3]
4171 %b = load <8 x i16>, ptr %ptr_b
4172 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4176 declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
4178 define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
4179 ; CHECK-LABEL: test_mask_packs_epi16_rr_256:
4181 ; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
4182 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4183 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4187 define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
4188 ; X86-LABEL: test_mask_packs_epi16_rrk_256:
4190 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4191 ; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
4192 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4193 ; X86-NEXT: retl # encoding: [0xc3]
4195 ; X64-LABEL: test_mask_packs_epi16_rrk_256:
4197 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4198 ; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
4199 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4200 ; X64-NEXT: retq # encoding: [0xc3]
4201 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4205 define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
4206 ; X86-LABEL: test_mask_packs_epi16_rrkz_256:
4208 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4209 ; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
4210 ; X86-NEXT: retl # encoding: [0xc3]
4212 ; X64-LABEL: test_mask_packs_epi16_rrkz_256:
4214 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4215 ; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
4216 ; X64-NEXT: retq # encoding: [0xc3]
4217 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4221 define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
4222 ; X86-LABEL: test_mask_packs_epi16_rm_256:
4224 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4225 ; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00]
4226 ; X86-NEXT: retl # encoding: [0xc3]
4228 ; X64-LABEL: test_mask_packs_epi16_rm_256:
4230 ; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07]
4231 ; X64-NEXT: retq # encoding: [0xc3]
4232 %b = load <16 x i16>, ptr %ptr_b
4233 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4237 define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
4238 ; X86-LABEL: test_mask_packs_epi16_rmk_256:
4240 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4241 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4242 ; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08]
4243 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4244 ; X86-NEXT: retl # encoding: [0xc3]
4246 ; X64-LABEL: test_mask_packs_epi16_rmk_256:
4248 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4249 ; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f]
4250 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4251 ; X64-NEXT: retq # encoding: [0xc3]
4252 %b = load <16 x i16>, ptr %ptr_b
4253 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4257 define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i32 %mask) {
4258 ; X86-LABEL: test_mask_packs_epi16_rmkz_256:
4260 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4261 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4262 ; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00]
4263 ; X86-NEXT: retl # encoding: [0xc3]
4265 ; X64-LABEL: test_mask_packs_epi16_rmkz_256:
4267 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4268 ; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07]
4269 ; X64-NEXT: retq # encoding: [0xc3]
4270 %b = load <16 x i16>, ptr %ptr_b
4271 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4275 declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
4278 define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
4279 ; CHECK-LABEL: test_mask_packus_epi32_rr_128:
4281 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1]
4282 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4283 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
4287 define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
4288 ; X86-LABEL: test_mask_packus_epi32_rrk_128:
4290 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4291 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
4292 ; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
4293 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4294 ; X86-NEXT: retl # encoding: [0xc3]
4296 ; X64-LABEL: test_mask_packus_epi32_rrk_128:
4298 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4299 ; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
4300 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4301 ; X64-NEXT: retq # encoding: [0xc3]
4302 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
4306 define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
4307 ; X86-LABEL: test_mask_packus_epi32_rrkz_128:
4309 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4310 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
4311 ; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
4312 ; X86-NEXT: retl # encoding: [0xc3]
4314 ; X64-LABEL: test_mask_packus_epi32_rrkz_128:
4316 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4317 ; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
4318 ; X64-NEXT: retq # encoding: [0xc3]
4319 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
4323 define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, ptr %ptr_b) {
4324 ; X86-LABEL: test_mask_packus_epi32_rm_128:
4326 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4327 ; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00]
4328 ; X86-NEXT: retl # encoding: [0xc3]
4330 ; X64-LABEL: test_mask_packus_epi32_rm_128:
4332 ; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07]
4333 ; X64-NEXT: retq # encoding: [0xc3]
4334 %b = load <4 x i32>, ptr %ptr_b
4335 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
4339 define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
4340 ; X86-LABEL: test_mask_packus_epi32_rmk_128:
4342 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4343 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4344 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4345 ; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08]
4346 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4347 ; X86-NEXT: retl # encoding: [0xc3]
4349 ; X64-LABEL: test_mask_packus_epi32_rmk_128:
4351 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4352 ; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f]
4353 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4354 ; X64-NEXT: retq # encoding: [0xc3]
4355 %b = load <4 x i32>, ptr %ptr_b
4356 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
4360 define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) {
4361 ; X86-LABEL: test_mask_packus_epi32_rmkz_128:
4363 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4364 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4365 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4366 ; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00]
4367 ; X86-NEXT: retl # encoding: [0xc3]
4369 ; X64-LABEL: test_mask_packus_epi32_rmkz_128:
4371 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4372 ; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07]
4373 ; X64-NEXT: retq # encoding: [0xc3]
4374 %b = load <4 x i32>, ptr %ptr_b
4375 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
4379 define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, ptr %ptr_b) {
4380 ; X86-LABEL: test_mask_packus_epi32_rmb_128:
4382 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4383 ; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00]
4384 ; X86-NEXT: retl # encoding: [0xc3]
4386 ; X64-LABEL: test_mask_packus_epi32_rmb_128:
4388 ; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07]
4389 ; X64-NEXT: retq # encoding: [0xc3]
4390 %q = load i32, ptr %ptr_b
4391 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4392 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4393 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
4397 define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
4398 ; X86-LABEL: test_mask_packus_epi32_rmbk_128:
4400 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4401 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4402 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4403 ; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08]
4404 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4405 ; X86-NEXT: retl # encoding: [0xc3]
4407 ; X64-LABEL: test_mask_packus_epi32_rmbk_128:
4409 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4410 ; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f]
4411 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4412 ; X64-NEXT: retq # encoding: [0xc3]
4413 %q = load i32, ptr %ptr_b
4414 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4415 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4416 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
4420 define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) {
4421 ; X86-LABEL: test_mask_packus_epi32_rmbkz_128:
4423 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4424 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4425 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4426 ; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00]
4427 ; X86-NEXT: retl # encoding: [0xc3]
4429 ; X64-LABEL: test_mask_packus_epi32_rmbkz_128:
4431 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4432 ; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07]
4433 ; X64-NEXT: retq # encoding: [0xc3]
4434 %q = load i32, ptr %ptr_b
4435 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4436 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4437 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
4441 declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
4443 define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
4444 ; CHECK-LABEL: test_mask_packus_epi32_rr_256:
4446 ; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
4447 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4448 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4452 define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
4453 ; X86-LABEL: test_mask_packus_epi32_rrk_256:
4455 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4456 ; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
4457 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4458 ; X86-NEXT: retl # encoding: [0xc3]
4460 ; X64-LABEL: test_mask_packus_epi32_rrk_256:
4462 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4463 ; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
4464 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4465 ; X64-NEXT: retq # encoding: [0xc3]
4466 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4470 define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
4471 ; X86-LABEL: test_mask_packus_epi32_rrkz_256:
4473 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4474 ; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
4475 ; X86-NEXT: retl # encoding: [0xc3]
4477 ; X64-LABEL: test_mask_packus_epi32_rrkz_256:
4479 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4480 ; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
4481 ; X64-NEXT: retq # encoding: [0xc3]
4482 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4486 define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, ptr %ptr_b) {
4487 ; X86-LABEL: test_mask_packus_epi32_rm_256:
4489 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4490 ; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00]
4491 ; X86-NEXT: retl # encoding: [0xc3]
4493 ; X64-LABEL: test_mask_packus_epi32_rm_256:
4495 ; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07]
4496 ; X64-NEXT: retq # encoding: [0xc3]
4497 %b = load <8 x i32>, ptr %ptr_b
4498 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4502 define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
4503 ; X86-LABEL: test_mask_packus_epi32_rmk_256:
4505 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4506 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4507 ; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08]
4508 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4509 ; X86-NEXT: retl # encoding: [0xc3]
4511 ; X64-LABEL: test_mask_packus_epi32_rmk_256:
4513 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4514 ; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f]
4515 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4516 ; X64-NEXT: retq # encoding: [0xc3]
4517 %b = load <8 x i32>, ptr %ptr_b
4518 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4522 define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) {
4523 ; X86-LABEL: test_mask_packus_epi32_rmkz_256:
4525 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4526 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4527 ; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00]
4528 ; X86-NEXT: retl # encoding: [0xc3]
4530 ; X64-LABEL: test_mask_packus_epi32_rmkz_256:
4532 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4533 ; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07]
4534 ; X64-NEXT: retq # encoding: [0xc3]
4535 %b = load <8 x i32>, ptr %ptr_b
4536 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4540 define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, ptr %ptr_b) {
4541 ; X86-LABEL: test_mask_packus_epi32_rmb_256:
4543 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4544 ; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00]
4545 ; X86-NEXT: retl # encoding: [0xc3]
4547 ; X64-LABEL: test_mask_packus_epi32_rmb_256:
4549 ; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07]
4550 ; X64-NEXT: retq # encoding: [0xc3]
4551 %q = load i32, ptr %ptr_b
4552 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4553 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4554 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4558 define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
4559 ; X86-LABEL: test_mask_packus_epi32_rmbk_256:
4561 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4562 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4563 ; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08]
4564 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4565 ; X86-NEXT: retl # encoding: [0xc3]
4567 ; X64-LABEL: test_mask_packus_epi32_rmbk_256:
4569 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4570 ; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f]
4571 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4572 ; X64-NEXT: retq # encoding: [0xc3]
4573 %q = load i32, ptr %ptr_b
4574 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4575 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4576 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4580 define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) {
4581 ; X86-LABEL: test_mask_packus_epi32_rmbkz_256:
4583 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4584 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4585 ; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00]
4586 ; X86-NEXT: retl # encoding: [0xc3]
4588 ; X64-LABEL: test_mask_packus_epi32_rmbkz_256:
4590 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4591 ; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07]
4592 ; X64-NEXT: retq # encoding: [0xc3]
4593 %q = load i32, ptr %ptr_b
4594 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4595 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4596 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4600 declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
4602 define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
4603 ; CHECK-LABEL: test_mask_packus_epi16_rr_128:
4605 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
4606 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4607 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4611 define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
4612 ; X86-LABEL: test_mask_packus_epi16_rrk_128:
4614 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4615 ; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
4616 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4617 ; X86-NEXT: retl # encoding: [0xc3]
4619 ; X64-LABEL: test_mask_packus_epi16_rrk_128:
4621 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4622 ; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
4623 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4624 ; X64-NEXT: retq # encoding: [0xc3]
4625 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4629 define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
4630 ; X86-LABEL: test_mask_packus_epi16_rrkz_128:
4632 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4633 ; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
4634 ; X86-NEXT: retl # encoding: [0xc3]
4636 ; X64-LABEL: test_mask_packus_epi16_rrkz_128:
4638 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4639 ; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
4640 ; X64-NEXT: retq # encoding: [0xc3]
4641 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4645 define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
4646 ; X86-LABEL: test_mask_packus_epi16_rm_128:
4648 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4649 ; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00]
4650 ; X86-NEXT: retl # encoding: [0xc3]
4652 ; X64-LABEL: test_mask_packus_epi16_rm_128:
4654 ; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07]
4655 ; X64-NEXT: retq # encoding: [0xc3]
4656 %b = load <8 x i16>, ptr %ptr_b
4657 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4661 define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
4662 ; X86-LABEL: test_mask_packus_epi16_rmk_128:
4664 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4665 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4666 ; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08]
4667 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4668 ; X86-NEXT: retl # encoding: [0xc3]
4670 ; X64-LABEL: test_mask_packus_epi16_rmk_128:
4672 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4673 ; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f]
4674 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4675 ; X64-NEXT: retq # encoding: [0xc3]
4676 %b = load <8 x i16>, ptr %ptr_b
4677 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4681 define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i16 %mask) {
4682 ; X86-LABEL: test_mask_packus_epi16_rmkz_128:
4684 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4685 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4686 ; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00]
4687 ; X86-NEXT: retl # encoding: [0xc3]
4689 ; X64-LABEL: test_mask_packus_epi16_rmkz_128:
4691 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4692 ; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07]
4693 ; X64-NEXT: retq # encoding: [0xc3]
4694 %b = load <8 x i16>, ptr %ptr_b
4695 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4699 declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
4701 define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
4702 ; CHECK-LABEL: test_mask_packus_epi16_rr_256:
4704 ; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
4705 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4706 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4710 define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
4711 ; X86-LABEL: test_mask_packus_epi16_rrk_256:
4713 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4714 ; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
4715 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4716 ; X86-NEXT: retl # encoding: [0xc3]
4718 ; X64-LABEL: test_mask_packus_epi16_rrk_256:
4720 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4721 ; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
4722 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4723 ; X64-NEXT: retq # encoding: [0xc3]
4724 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4728 define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
4729 ; X86-LABEL: test_mask_packus_epi16_rrkz_256:
4731 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4732 ; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
4733 ; X86-NEXT: retl # encoding: [0xc3]
4735 ; X64-LABEL: test_mask_packus_epi16_rrkz_256:
4737 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4738 ; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
4739 ; X64-NEXT: retq # encoding: [0xc3]
4740 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4744 define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
4745 ; X86-LABEL: test_mask_packus_epi16_rm_256:
4747 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4748 ; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00]
4749 ; X86-NEXT: retl # encoding: [0xc3]
4751 ; X64-LABEL: test_mask_packus_epi16_rm_256:
4753 ; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07]
4754 ; X64-NEXT: retq # encoding: [0xc3]
4755 %b = load <16 x i16>, ptr %ptr_b
4756 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4760 define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
4761 ; X86-LABEL: test_mask_packus_epi16_rmk_256:
4763 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4764 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4765 ; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08]
4766 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4767 ; X86-NEXT: retl # encoding: [0xc3]
4769 ; X64-LABEL: test_mask_packus_epi16_rmk_256:
4771 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4772 ; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f]
4773 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4774 ; X64-NEXT: retq # encoding: [0xc3]
4775 %b = load <16 x i16>, ptr %ptr_b
4776 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4780 define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i32 %mask) {
4781 ; X86-LABEL: test_mask_packus_epi16_rmkz_256:
4783 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4784 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4785 ; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00]
4786 ; X86-NEXT: retl # encoding: [0xc3]
4788 ; X64-LABEL: test_mask_packus_epi16_rmkz_256:
4790 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4791 ; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07]
4792 ; X64-NEXT: retq # encoding: [0xc3]
4793 %b = load <16 x i16>, ptr %ptr_b
4794 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4798 declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
4800 define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
4801 ; X86-LABEL: test_cmp_b_256:
4803 ; X86-NEXT: pushl %ebx # encoding: [0x53]
4804 ; X86-NEXT: .cfi_def_cfa_offset 8
4805 ; X86-NEXT: pushl %edi # encoding: [0x57]
4806 ; X86-NEXT: .cfi_def_cfa_offset 12
4807 ; X86-NEXT: pushl %esi # encoding: [0x56]
4808 ; X86-NEXT: .cfi_def_cfa_offset 16
4809 ; X86-NEXT: .cfi_offset %esi, -16
4810 ; X86-NEXT: .cfi_offset %edi, -12
4811 ; X86-NEXT: .cfi_offset %ebx, -8
4812 ; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
4813 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
4814 ; X86-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
4815 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4816 ; X86-NEXT: vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
4817 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4818 ; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
4819 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4820 ; X86-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05]
4821 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
4822 ; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
4823 ; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
4824 ; X86-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
4825 ; X86-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
4826 ; X86-NEXT: vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02]
4827 ; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
4828 ; X86-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
4829 ; X86-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3]
4830 ; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
4831 ; X86-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
4832 ; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4833 ; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4834 ; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
4835 ; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4836 ; X86-NEXT: # xmm1 = xmm1[0],xmm2[0]
4837 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4838 ; X86-NEXT: popl %esi # encoding: [0x5e]
4839 ; X86-NEXT: .cfi_def_cfa_offset 12
4840 ; X86-NEXT: popl %edi # encoding: [0x5f]
4841 ; X86-NEXT: .cfi_def_cfa_offset 8
4842 ; X86-NEXT: popl %ebx # encoding: [0x5b]
4843 ; X86-NEXT: .cfi_def_cfa_offset 4
4844 ; X86-NEXT: retl # encoding: [0xc3]
4846 ; X64-LABEL: test_cmp_b_256:
4848 ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
4849 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
4850 ; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
4851 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4852 ; X64-NEXT: vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
4853 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4854 ; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
4855 ; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4856 ; X64-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05]
4857 ; X64-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
4858 ; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
4859 ; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
4860 ; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
4861 ; X64-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
4862 ; X64-NEXT: vpinsrd $2, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x02]
4863 ; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
4864 ; X64-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
4865 ; X64-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3]
4866 ; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
4867 ; X64-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
4868 ; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4869 ; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4870 ; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
4871 ; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4872 ; X64-NEXT: # xmm1 = xmm1[0],xmm2[0]
4873 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4874 ; X64-NEXT: retq # encoding: [0xc3]
4875 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
4876 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
4877 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
4878 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
4879 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
4880 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
4881 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
4882 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
4883 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
4884 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
4885 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
4886 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
4887 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
4888 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
4889 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
4890 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
4894 define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
4895 ; X86-LABEL: test_mask_cmp_b_256:
4897 ; X86-NEXT: pushl %ebp # encoding: [0x55]
4898 ; X86-NEXT: .cfi_def_cfa_offset 8
4899 ; X86-NEXT: pushl %ebx # encoding: [0x53]
4900 ; X86-NEXT: .cfi_def_cfa_offset 12
4901 ; X86-NEXT: pushl %edi # encoding: [0x57]
4902 ; X86-NEXT: .cfi_def_cfa_offset 16
4903 ; X86-NEXT: pushl %esi # encoding: [0x56]
4904 ; X86-NEXT: .cfi_def_cfa_offset 20
4905 ; X86-NEXT: .cfi_offset %esi, -20
4906 ; X86-NEXT: .cfi_offset %edi, -16
4907 ; X86-NEXT: .cfi_offset %ebx, -12
4908 ; X86-NEXT: .cfi_offset %ebp, -8
4909 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
4910 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
4911 ; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
4912 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4913 ; X86-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
4914 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4915 ; X86-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
4916 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4917 ; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
4918 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
4919 ; X86-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05]
4920 ; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
4921 ; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
4922 ; X86-NEXT: kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8]
4923 ; X86-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
4924 ; X86-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01]
4925 ; X86-NEXT: vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02]
4926 ; X86-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
4927 ; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
4928 ; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
4929 ; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4930 ; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4931 ; X86-NEXT: vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6]
4932 ; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4933 ; X86-NEXT: # xmm1 = xmm1[0],xmm2[0]
4934 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4935 ; X86-NEXT: popl %esi # encoding: [0x5e]
4936 ; X86-NEXT: .cfi_def_cfa_offset 16
4937 ; X86-NEXT: popl %edi # encoding: [0x5f]
4938 ; X86-NEXT: .cfi_def_cfa_offset 12
4939 ; X86-NEXT: popl %ebx # encoding: [0x5b]
4940 ; X86-NEXT: .cfi_def_cfa_offset 8
4941 ; X86-NEXT: popl %ebp # encoding: [0x5d]
4942 ; X86-NEXT: .cfi_def_cfa_offset 4
4943 ; X86-NEXT: retl # encoding: [0xc3]
4945 ; X64-LABEL: test_mask_cmp_b_256:
4947 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4948 ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
4949 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
4950 ; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
4951 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4952 ; X64-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
4953 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4954 ; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
4955 ; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4956 ; X64-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05]
4957 ; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
4958 ; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
4959 ; X64-NEXT: kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8]
4960 ; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
4961 ; X64-NEXT: vpinsrd $1, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x01]
4962 ; X64-NEXT: vpinsrd $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc1,0x02]
4963 ; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
4964 ; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
4965 ; X64-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
4966 ; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4967 ; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4968 ; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
4969 ; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4970 ; X64-NEXT: # xmm1 = xmm1[0],xmm2[0]
4971 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4972 ; X64-NEXT: retq # encoding: [0xc3]
4973 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
4974 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
4975 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
4976 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
4977 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
4978 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
4979 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
4980 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
4981 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
4982 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
4983 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
4984 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
4985 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
4986 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
4987 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
4988 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
4992 declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
4994 define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
4995 ; X86-LABEL: test_ucmp_b_256:
4997 ; X86-NEXT: pushl %ebx # encoding: [0x53]
4998 ; X86-NEXT: .cfi_def_cfa_offset 8
4999 ; X86-NEXT: pushl %edi # encoding: [0x57]
5000 ; X86-NEXT: .cfi_def_cfa_offset 12
5001 ; X86-NEXT: pushl %esi # encoding: [0x56]
5002 ; X86-NEXT: .cfi_def_cfa_offset 16
5003 ; X86-NEXT: .cfi_offset %esi, -16
5004 ; X86-NEXT: .cfi_offset %edi, -12
5005 ; X86-NEXT: .cfi_offset %ebx, -8
5006 ; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
5007 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5008 ; X86-NEXT: vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
5009 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5010 ; X86-NEXT: vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
5011 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5012 ; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
5013 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5014 ; X86-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
5015 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
5016 ; X86-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
5017 ; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
5018 ; X86-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
5019 ; X86-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
5020 ; X86-NEXT: vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02]
5021 ; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5022 ; X86-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
5023 ; X86-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3]
5024 ; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
5025 ; X86-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
5026 ; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5027 ; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5028 ; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
5029 ; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5030 ; X86-NEXT: # xmm1 = xmm1[0],xmm2[0]
5031 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5032 ; X86-NEXT: popl %esi # encoding: [0x5e]
5033 ; X86-NEXT: .cfi_def_cfa_offset 12
5034 ; X86-NEXT: popl %edi # encoding: [0x5f]
5035 ; X86-NEXT: .cfi_def_cfa_offset 8
5036 ; X86-NEXT: popl %ebx # encoding: [0x5b]
5037 ; X86-NEXT: .cfi_def_cfa_offset 4
5038 ; X86-NEXT: retl # encoding: [0xc3]
5040 ; X64-LABEL: test_ucmp_b_256:
5042 ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
5043 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5044 ; X64-NEXT: vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
5045 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5046 ; X64-NEXT: vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
5047 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5048 ; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
5049 ; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5050 ; X64-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
5051 ; X64-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
5052 ; X64-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
5053 ; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
5054 ; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
5055 ; X64-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
5056 ; X64-NEXT: vpinsrd $2, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x02]
5057 ; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5058 ; X64-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
5059 ; X64-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3]
5060 ; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
5061 ; X64-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
5062 ; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5063 ; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5064 ; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
5065 ; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5066 ; X64-NEXT: # xmm1 = xmm1[0],xmm2[0]
5067 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5068 ; X64-NEXT: retq # encoding: [0xc3]
5069 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
5070 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
5071 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
5072 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
5073 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
5074 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
5075 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
5076 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
5077 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
5078 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
5079 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
5080 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
5081 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
5082 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
5083 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
5084 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
5088 define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
5089 ; X86-LABEL: test_mask_ucmp_b_256:
5091 ; X86-NEXT: pushl %ebp # encoding: [0x55]
5092 ; X86-NEXT: .cfi_def_cfa_offset 8
5093 ; X86-NEXT: pushl %ebx # encoding: [0x53]
5094 ; X86-NEXT: .cfi_def_cfa_offset 12
5095 ; X86-NEXT: pushl %edi # encoding: [0x57]
5096 ; X86-NEXT: .cfi_def_cfa_offset 16
5097 ; X86-NEXT: pushl %esi # encoding: [0x56]
5098 ; X86-NEXT: .cfi_def_cfa_offset 20
5099 ; X86-NEXT: .cfi_offset %esi, -20
5100 ; X86-NEXT: .cfi_offset %edi, -16
5101 ; X86-NEXT: .cfi_offset %ebx, -12
5102 ; X86-NEXT: .cfi_offset %ebp, -8
5103 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
5104 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5105 ; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
5106 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5107 ; X86-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
5108 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5109 ; X86-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
5110 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5111 ; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
5112 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
5113 ; X86-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
5114 ; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
5115 ; X86-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
5116 ; X86-NEXT: kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8]
5117 ; X86-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
5118 ; X86-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01]
5119 ; X86-NEXT: vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02]
5120 ; X86-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
5121 ; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
5122 ; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
5123 ; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5124 ; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5125 ; X86-NEXT: vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6]
5126 ; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5127 ; X86-NEXT: # xmm1 = xmm1[0],xmm2[0]
5128 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5129 ; X86-NEXT: popl %esi # encoding: [0x5e]
5130 ; X86-NEXT: .cfi_def_cfa_offset 16
5131 ; X86-NEXT: popl %edi # encoding: [0x5f]
5132 ; X86-NEXT: .cfi_def_cfa_offset 12
5133 ; X86-NEXT: popl %ebx # encoding: [0x5b]
5134 ; X86-NEXT: .cfi_def_cfa_offset 8
5135 ; X86-NEXT: popl %ebp # encoding: [0x5d]
5136 ; X86-NEXT: .cfi_def_cfa_offset 4
5137 ; X86-NEXT: retl # encoding: [0xc3]
5139 ; X64-LABEL: test_mask_ucmp_b_256:
5141 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5142 ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
5143 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5144 ; X64-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
5145 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5146 ; X64-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
5147 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5148 ; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
5149 ; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5150 ; X64-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
5151 ; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
5152 ; X64-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
5153 ; X64-NEXT: kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8]
5154 ; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
5155 ; X64-NEXT: vpinsrd $1, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x01]
5156 ; X64-NEXT: vpinsrd $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc1,0x02]
5157 ; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
5158 ; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
5159 ; X64-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
5160 ; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5161 ; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5162 ; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
5163 ; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5164 ; X64-NEXT: # xmm1 = xmm1[0],xmm2[0]
5165 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5166 ; X64-NEXT: retq # encoding: [0xc3]
5167 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
5168 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
5169 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
5170 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
5171 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
5172 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
5173 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
5174 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
5175 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
5176 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
5177 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
5178 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
5179 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
5180 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
5181 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
5182 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
5186 declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
5188 define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
5189 ; CHECK-LABEL: test_cmp_w_256:
5191 ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
5192 ; CHECK-NEXT: vpcmpgtw %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x65,0xc8]
5193 ; CHECK-NEXT: vpcmplew %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x02]
5194 ; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04]
5195 ; CHECK-NEXT: vpcmpnltw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x05]
5196 ; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xe9]
5197 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5198 ; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5199 ; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5200 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5201 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5202 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5203 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5204 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5205 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5206 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5207 ; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5208 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5209 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5210 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5211 ; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5212 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
5213 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5214 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
5215 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5216 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
5217 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5218 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
5219 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5220 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
5221 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5222 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
5223 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5224 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
5225 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5226 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
5227 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5228 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
5229 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5233 define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
5234 ; X86-LABEL: test_mask_cmp_w_256:
5236 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5237 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5238 ; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5239 ; X86-NEXT: vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0]
5240 ; X86-NEXT: vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02]
5241 ; X86-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5242 ; X86-NEXT: vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05]
5243 ; X86-NEXT: vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9]
5244 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5245 ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5246 ; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5247 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5248 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5249 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5250 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5251 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5252 ; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5253 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5254 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5255 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5256 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5257 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
5258 ; X86-NEXT: retl # encoding: [0xc3]
5260 ; X64-LABEL: test_mask_cmp_w_256:
5262 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5263 ; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5264 ; X64-NEXT: vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0]
5265 ; X64-NEXT: vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02]
5266 ; X64-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5267 ; X64-NEXT: vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05]
5268 ; X64-NEXT: vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9]
5269 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5270 ; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5271 ; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5272 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5273 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5274 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5275 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5276 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5277 ; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5278 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5279 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5280 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5281 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5282 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
5283 ; X64-NEXT: retq # encoding: [0xc3]
5284 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
5285 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5286 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
5287 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5288 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
5289 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5290 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
5291 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5292 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
5293 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5294 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
5295 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5296 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
5297 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5298 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
5299 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5303 declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
5305 define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
5306 ; CHECK-LABEL: test_ucmp_w_256:
5308 ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
5309 ; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x01]
5310 ; CHECK-NEXT: vpcmpleuw %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x02]
5311 ; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04]
5312 ; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x05]
5313 ; CHECK-NEXT: vpcmpnleuw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x06]
5314 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5315 ; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5316 ; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5317 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5318 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5319 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5320 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5321 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5322 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5323 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5324 ; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5325 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5326 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5327 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5328 ; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5329 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
5330 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5331 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
5332 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5333 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
5334 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5335 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
5336 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5337 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
5338 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5339 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
5340 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5341 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
5342 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5343 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
5344 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5345 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
5346 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5350 define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
5351 ; X86-LABEL: test_mask_ucmp_w_256:
5353 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5354 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5355 ; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5356 ; X86-NEXT: vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01]
5357 ; X86-NEXT: vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02]
5358 ; X86-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5359 ; X86-NEXT: vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05]
5360 ; X86-NEXT: vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06]
5361 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5362 ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5363 ; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5364 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5365 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5366 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5367 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5368 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5369 ; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5370 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5371 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5372 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5373 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5374 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
5375 ; X86-NEXT: retl # encoding: [0xc3]
5377 ; X64-LABEL: test_mask_ucmp_w_256:
5379 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5380 ; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5381 ; X64-NEXT: vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01]
5382 ; X64-NEXT: vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02]
5383 ; X64-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5384 ; X64-NEXT: vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05]
5385 ; X64-NEXT: vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06]
5386 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5387 ; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5388 ; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5389 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5390 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5391 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5392 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5393 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5394 ; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5395 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5396 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5397 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5398 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5399 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
5400 ; X64-NEXT: retq # encoding: [0xc3]
5401 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
5402 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5403 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
5404 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5405 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
5406 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5407 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
5408 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5409 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
5410 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5411 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
5412 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5413 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
5414 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5415 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
5416 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5420 declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
5422 define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
5423 ; CHECK-LABEL: test_cmp_b_128:
5425 ; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
5426 ; CHECK-NEXT: vpcmpgtb %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc8]
5427 ; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x02]
5428 ; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04]
5429 ; CHECK-NEXT: vpcmpnltb %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x05]
5430 ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xe9]
5431 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5432 ; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5433 ; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5434 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5435 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5436 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5437 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5438 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5439 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5440 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5441 ; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5442 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5443 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5444 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5445 ; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5446 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5447 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
5448 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5449 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
5450 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5451 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
5452 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5453 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
5454 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5455 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
5456 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5457 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
5458 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5459 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
5460 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5461 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
5462 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5466 define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
5467 ; X86-LABEL: test_mask_cmp_b_128:
5469 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5470 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5471 ; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5472 ; X86-NEXT: vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0]
5473 ; X86-NEXT: vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02]
5474 ; X86-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5475 ; X86-NEXT: vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05]
5476 ; X86-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9]
5477 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5478 ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5479 ; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5480 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5481 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5482 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5483 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5484 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5485 ; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5486 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5487 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5488 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5489 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5490 ; X86-NEXT: retl # encoding: [0xc3]
5492 ; X64-LABEL: test_mask_cmp_b_128:
5494 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5495 ; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5496 ; X64-NEXT: vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0]
5497 ; X64-NEXT: vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02]
5498 ; X64-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5499 ; X64-NEXT: vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05]
5500 ; X64-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9]
5501 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5502 ; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5503 ; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5504 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5505 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5506 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5507 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5508 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5509 ; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5510 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5511 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5512 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5513 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5514 ; X64-NEXT: retq # encoding: [0xc3]
5515 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
5516 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5517 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
5518 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5519 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
5520 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5521 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
5522 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5523 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
5524 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5525 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
5526 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5527 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
5528 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5529 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
5530 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5534 declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
5536 define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
5537 ; CHECK-LABEL: test_ucmp_b_128:
5539 ; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
5540 ; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x01]
5541 ; CHECK-NEXT: vpcmpleub %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x02]
5542 ; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04]
5543 ; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x05]
5544 ; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x06]
5545 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5546 ; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5547 ; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5548 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5549 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5550 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5551 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5552 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5553 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5554 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5555 ; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5556 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5557 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5558 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5559 ; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5560 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5561 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
5562 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5563 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
5564 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5565 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
5566 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5567 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
5568 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5569 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
5570 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5571 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
5572 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5573 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
5574 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5575 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
5576 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5580 define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
5581 ; X86-LABEL: test_mask_ucmp_b_128:
5583 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5584 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5585 ; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5586 ; X86-NEXT: vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01]
5587 ; X86-NEXT: vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02]
5588 ; X86-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5589 ; X86-NEXT: vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05]
5590 ; X86-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06]
5591 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5592 ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5593 ; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5594 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5595 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5596 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5597 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5598 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5599 ; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5600 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5601 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5602 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5603 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5604 ; X86-NEXT: retl # encoding: [0xc3]
5606 ; X64-LABEL: test_mask_ucmp_b_128:
5608 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5609 ; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5610 ; X64-NEXT: vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01]
5611 ; X64-NEXT: vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02]
5612 ; X64-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5613 ; X64-NEXT: vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05]
5614 ; X64-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06]
5615 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5616 ; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5617 ; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5618 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5619 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5620 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5621 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5622 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5623 ; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5624 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5625 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5626 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5627 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5628 ; X64-NEXT: retq # encoding: [0xc3]
5629 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
5630 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5631 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
5632 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5633 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
5634 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5635 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
5636 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5637 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
5638 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5639 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
5640 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5641 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
5642 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5643 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
5644 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5648 declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
5650 define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
5651 ; CHECK-LABEL: test_cmp_w_128:
5653 ; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
5654 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5655 ; CHECK-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0]
5656 ; CHECK-NEXT: vpcmplew %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x02]
5657 ; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04]
5658 ; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x05]
5659 ; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xe1]
5660 ; CHECK-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5661 ; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5662 ; CHECK-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5663 ; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5664 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5665 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5666 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5667 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5668 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5669 ; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5670 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5671 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5672 ; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
5673 ; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5674 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5675 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
5676 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5677 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
5678 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5679 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
5680 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5681 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
5682 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5683 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
5684 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5685 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
5686 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5687 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
5688 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5689 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
5690 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5694 define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
5695 ; X86-LABEL: test_mask_cmp_w_128:
5697 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5698 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5699 ; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5700 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5701 ; X86-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0]
5702 ; X86-NEXT: vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02]
5703 ; X86-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5704 ; X86-NEXT: vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05]
5705 ; X86-NEXT: vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
5706 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5707 ; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
5708 ; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5709 ; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
5710 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5711 ; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
5712 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5713 ; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
5714 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5715 ; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
5716 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5717 ; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
5718 ; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5719 ; X86-NEXT: retl # encoding: [0xc3]
5721 ; X64-LABEL: test_mask_cmp_w_128:
5723 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5724 ; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5725 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5726 ; X64-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0]
5727 ; X64-NEXT: vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02]
5728 ; X64-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5729 ; X64-NEXT: vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05]
5730 ; X64-NEXT: vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
5731 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5732 ; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5733 ; X64-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5734 ; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5735 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5736 ; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5737 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5738 ; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5739 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5740 ; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5741 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5742 ; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5743 ; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
5744 ; X64-NEXT: retq # encoding: [0xc3]
5745 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
5746 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5747 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
5748 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5749 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
5750 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5751 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
5752 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5753 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
5754 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5755 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
5756 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5757 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
5758 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5759 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
5760 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5764 declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
5766 define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
5767 ; CHECK-LABEL: test_ucmp_w_128:
5769 ; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
5770 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5771 ; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x01]
5772 ; CHECK-NEXT: vpcmpleuw %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x02]
5773 ; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04]
5774 ; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd9,0x05]
5775 ; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x06]
5776 ; CHECK-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5777 ; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5778 ; CHECK-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5779 ; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5780 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5781 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5782 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5783 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5784 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5785 ; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5786 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5787 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5788 ; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
5789 ; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5790 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5791 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
5792 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5793 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
5794 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5795 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
5796 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5797 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
5798 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5799 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
5800 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5801 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
5802 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5803 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
5804 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5805 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
5806 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5810 define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
5811 ; X86-LABEL: test_mask_ucmp_w_128:
5813 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5814 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5815 ; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5816 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5817 ; X86-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01]
5818 ; X86-NEXT: vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02]
5819 ; X86-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5820 ; X86-NEXT: vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05]
5821 ; X86-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
5822 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5823 ; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
5824 ; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5825 ; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
5826 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5827 ; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
5828 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5829 ; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
5830 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5831 ; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
5832 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5833 ; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
5834 ; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5835 ; X86-NEXT: retl # encoding: [0xc3]
5837 ; X64-LABEL: test_mask_ucmp_w_128:
5839 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5840 ; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5841 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5842 ; X64-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01]
5843 ; X64-NEXT: vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02]
5844 ; X64-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5845 ; X64-NEXT: vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05]
5846 ; X64-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
5847 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5848 ; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5849 ; X64-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5850 ; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5851 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5852 ; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5853 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5854 ; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5855 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5856 ; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5857 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5858 ; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5859 ; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
5860 ; X64-NEXT: retq # encoding: [0xc3]
5861 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
5862 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5863 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
5864 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5865 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
5866 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5867 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
5868 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5869 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
5870 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5871 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
5872 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5873 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
5874 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5875 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
5876 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5880 declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
5882 define <16 x i8>@mm_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
5883 ; CHECK-LABEL: mm_avg_epu8:
5885 ; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
5886 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5887 %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
5891 define <16 x i8>@mm_mask_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
5892 ; X86-LABEL: mm_mask_avg_epu8:
5894 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5895 ; X86-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
5896 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5897 ; X86-NEXT: retl # encoding: [0xc3]
5899 ; X64-LABEL: mm_mask_avg_epu8:
5901 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5902 ; X64-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
5903 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5904 ; X64-NEXT: retq # encoding: [0xc3]
5905 %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
5909 declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16)
5911 define <16 x i8>@test_int_x86_avx512_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1) {
5912 ; CHECK-LABEL: test_int_x86_avx512_pabs_b_128:
5914 ; CHECK-NEXT: vpabsb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0]
5915 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5916 %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
5920 define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
5921 ; X86-LABEL: test_int_x86_avx512_mask_pabs_b_128:
5923 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5924 ; X86-NEXT: vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8]
5925 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5926 ; X86-NEXT: retl # encoding: [0xc3]
5928 ; X64-LABEL: test_int_x86_avx512_mask_pabs_b_128:
5930 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5931 ; X64-NEXT: vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8]
5932 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5933 ; X64-NEXT: retq # encoding: [0xc3]
5934 %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
5938 declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
5940 define <32 x i8>@mm256_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
5941 ; CHECK-LABEL: mm256_avg_epu8:
5943 ; CHECK-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xc1]
5944 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5945 %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
5949 define <32 x i8>@mm256_mask_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
5950 ; X86-LABEL: mm256_mask_avg_epu8:
5952 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
5953 ; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
5954 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5955 ; X86-NEXT: retl # encoding: [0xc3]
5957 ; X64-LABEL: mm256_mask_avg_epu8:
5959 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5960 ; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
5961 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5962 ; X64-NEXT: retq # encoding: [0xc3]
5963 %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
5967 declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32)
5969 define <32 x i8>@test_int_x86_avx512_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1) {
5970 ; CHECK-LABEL: test_int_x86_avx512_pabs_b_256:
5972 ; CHECK-NEXT: vpabsb %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0]
5973 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5974 %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1)
5978 define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
5979 ; X86-LABEL: test_int_x86_avx512_mask_pabs_b_256:
5981 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
5982 ; X86-NEXT: vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8]
5983 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5984 ; X86-NEXT: retl # encoding: [0xc3]
5986 ; X64-LABEL: test_int_x86_avx512_mask_pabs_b_256:
5988 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5989 ; X64-NEXT: vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8]
5990 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5991 ; X64-NEXT: retq # encoding: [0xc3]
5992 %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
5996 declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
5998 define <8 x i16>@mm_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
5999 ; CHECK-LABEL: mm_avg_epu16:
6001 ; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
6002 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6003 %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6007 define <8 x i16>@mm_mask_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6008 ; X86-LABEL: mm_mask_avg_epu16:
6010 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6011 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6012 ; X86-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
6013 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6014 ; X86-NEXT: retl # encoding: [0xc3]
6016 ; X64-LABEL: mm_mask_avg_epu16:
6018 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6019 ; X64-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
6020 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6021 ; X64-NEXT: retq # encoding: [0xc3]
6022 %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6026 declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8)
6028 define <8 x i16>@test_int_x86_avx512_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1) {
6029 ; CHECK-LABEL: test_int_x86_avx512_pabs_w_128:
6031 ; CHECK-NEXT: vpabsw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0]
6032 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6033 %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
6037 define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
6038 ; X86-LABEL: test_int_x86_avx512_mask_pabs_w_128:
6040 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6041 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6042 ; X86-NEXT: vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8]
6043 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6044 ; X86-NEXT: retl # encoding: [0xc3]
6046 ; X64-LABEL: test_int_x86_avx512_mask_pabs_w_128:
6048 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6049 ; X64-NEXT: vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8]
6050 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6051 ; X64-NEXT: retq # encoding: [0xc3]
6052 %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
6056 declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6058 define <16 x i16>@mm256_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6059 ; CHECK-LABEL: mm256_avg_epu16:
6061 ; CHECK-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xc1]
6062 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6063 %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6067 define <16 x i16>@mm256_mask_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6068 ; X86-LABEL: mm256_mask_avg_epu16:
6070 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6071 ; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
6072 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6073 ; X86-NEXT: retl # encoding: [0xc3]
6075 ; X64-LABEL: mm256_mask_avg_epu16:
6077 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6078 ; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
6079 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6080 ; X64-NEXT: retq # encoding: [0xc3]
6081 %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6085 declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16)
6087 define <16 x i16>@test_int_x86_avx512_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1) {
6088 ; CHECK-LABEL: test_int_x86_avx512_pabs_w_256:
6090 ; CHECK-NEXT: vpabsw %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0]
6091 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6092 %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1)
6096 define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
6097 ; X86-LABEL: test_int_x86_avx512_mask_pabs_w_256:
6099 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6100 ; X86-NEXT: vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8]
6101 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6102 ; X86-NEXT: retl # encoding: [0xc3]
6104 ; X64-LABEL: test_int_x86_avx512_mask_pabs_w_256:
6106 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6107 ; X64-NEXT: vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8]
6108 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6109 ; X64-NEXT: retq # encoding: [0xc3]
6110 %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
6114 declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6116 declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16)
6118 define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
6119 ; X86-LABEL: test_int_x86_avx512_ptestm_b_128:
6121 ; X86-NEXT: vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
6122 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6123 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6124 ; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6125 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
6126 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
6127 ; X86-NEXT: retl # encoding: [0xc3]
6129 ; X64-LABEL: test_int_x86_avx512_ptestm_b_128:
6131 ; X64-NEXT: vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
6132 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6133 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7]
6134 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
6135 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
6136 ; X64-NEXT: retq # encoding: [0xc3]
6137 %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
6138 %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
6139 %res2 = add i16 %res, %res1
6143 declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32)
6145 define i32@test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
6146 ; X86-LABEL: test_int_x86_avx512_ptestm_b_256:
6148 ; X86-NEXT: vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
6149 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6150 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6151 ; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8]
6152 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
6153 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
6154 ; X86-NEXT: retl # encoding: [0xc3]
6156 ; X64-LABEL: test_int_x86_avx512_ptestm_b_256:
6158 ; X64-NEXT: vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
6159 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6160 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7]
6161 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
6162 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
6163 ; X64-NEXT: retq # encoding: [0xc3]
6164 %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
6165 %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
6166 %res2 = add i32 %res, %res1
6170 declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8)
6172 define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
6173 ; X86-LABEL: test_int_x86_avx512_ptestm_w_128:
6175 ; X86-NEXT: vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
6176 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6177 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6178 ; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8]
6179 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
6180 ; X86-NEXT: retl # encoding: [0xc3]
6182 ; X64-LABEL: test_int_x86_avx512_ptestm_w_128:
6184 ; X64-NEXT: vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
6185 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6186 ; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7]
6187 ; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8]
6188 ; X64-NEXT: # kill: def $al killed $al killed $eax
6189 ; X64-NEXT: retq # encoding: [0xc3]
6190 %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
6191 %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
6192 %res2 = add i8 %res, %res1
6196 declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16)
6198 define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
6199 ; X86-LABEL: test_int_x86_avx512_ptestm_w_256:
6201 ; X86-NEXT: vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
6202 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6203 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6204 ; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6205 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
6206 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
6207 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
6208 ; X86-NEXT: retl # encoding: [0xc3]
6210 ; X64-LABEL: test_int_x86_avx512_ptestm_w_256:
6212 ; X64-NEXT: vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
6213 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6214 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7]
6215 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
6216 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
6217 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
6218 ; X64-NEXT: retq # encoding: [0xc3]
6219 %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
6220 %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
6221 %res2 = add i16 %res, %res1
6225 declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16)
6227 define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
6228 ; X86-LABEL: test_int_x86_avx512_ptestnm_b_128:
6230 ; X86-NEXT: vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
6231 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6232 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6233 ; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6234 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
6235 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
6236 ; X86-NEXT: retl # encoding: [0xc3]
6238 ; X64-LABEL: test_int_x86_avx512_ptestnm_b_128:
6240 ; X64-NEXT: vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
6241 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6242 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7]
6243 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
6244 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
6245 ; X64-NEXT: retq # encoding: [0xc3]
6246 %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
6247 %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
6248 %res2 = add i16 %res, %res1
6252 declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32)
6254 define i32@test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
6255 ; X86-LABEL: test_int_x86_avx512_ptestnm_b_256:
6257 ; X86-NEXT: vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
6258 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6259 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6260 ; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8]
6261 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
6262 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
6263 ; X86-NEXT: retl # encoding: [0xc3]
6265 ; X64-LABEL: test_int_x86_avx512_ptestnm_b_256:
6267 ; X64-NEXT: vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
6268 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6269 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7]
6270 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
6271 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
6272 ; X64-NEXT: retq # encoding: [0xc3]
6273 %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
6274 %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
6275 %res2 = add i32 %res, %res1
6279 declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2)
6281 define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
6282 ; X86-LABEL: test_int_x86_avx512_ptestnm_w_128:
6284 ; X86-NEXT: vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
6285 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6286 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6287 ; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8]
6288 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
6289 ; X86-NEXT: retl # encoding: [0xc3]
6291 ; X64-LABEL: test_int_x86_avx512_ptestnm_w_128:
6293 ; X64-NEXT: vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
6294 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6295 ; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7]
6296 ; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8]
6297 ; X64-NEXT: # kill: def $al killed $al killed $eax
6298 ; X64-NEXT: retq # encoding: [0xc3]
6299 %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
6300 %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
6301 %res2 = add i8 %res, %res1
6305 declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2)
6307 define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
6308 ; X86-LABEL: test_int_x86_avx512_ptestnm_w_256:
6310 ; X86-NEXT: vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
6311 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6312 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6313 ; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6314 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
6315 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
6316 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
6317 ; X86-NEXT: retl # encoding: [0xc3]
6319 ; X64-LABEL: test_int_x86_avx512_ptestnm_w_256:
6321 ; X64-NEXT: vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
6322 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6323 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7]
6324 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
6325 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
6326 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
6327 ; X64-NEXT: retq # encoding: [0xc3]
6328 %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
6329 %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
6330 %res2 = add i16 %res, %res1
6334 declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
6336 define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
6337 ; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
6339 ; CHECK-NEXT: vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0]
6340 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
6341 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6342 %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
6346 declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
6348 define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
6349 ; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
6351 ; CHECK-NEXT: vpmovmskb %ymm0, %eax # encoding: [0xc5,0xfd,0xd7,0xc0]
6352 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
6353 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6354 %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
6358 declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>)
6360 define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
6361 ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128:
6363 ; CHECK-NEXT: vpmovw2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0]
6364 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6365 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
6366 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6367 %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
6371 declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>)
6373 define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
6374 ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256:
6376 ; CHECK-NEXT: vpmovw2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0]
6377 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6378 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
6379 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
6380 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6381 %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
6385 declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6387 define <8 x i16>@test_int_x86_avx512_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6388 ; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_128:
6390 ; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
6391 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6392 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6396 define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6397 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
6399 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6400 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6401 ; X86-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
6402 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6403 ; X86-NEXT: retl # encoding: [0xc3]
6405 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
6407 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6408 ; X64-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
6409 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6410 ; X64-NEXT: retq # encoding: [0xc3]
6411 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6415 declare <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6417 define <16 x i16>@test_int_x86_avx512_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6418 ; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_256:
6420 ; CHECK-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1]
6421 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6422 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6426 define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6427 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
6429 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6430 ; X86-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
6431 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6432 ; X86-NEXT: retl # encoding: [0xc3]
6434 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
6436 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6437 ; X64-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
6438 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6439 ; X64-NEXT: retq # encoding: [0xc3]
6440 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6444 declare <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6446 define <8 x i16>@test_int_x86_avx512_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6447 ; CHECK-LABEL: test_int_x86_avx512_pmulh_w_128:
6449 ; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1]
6450 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6451 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6455 define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6456 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
6458 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6459 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6460 ; X86-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
6461 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6462 ; X86-NEXT: retl # encoding: [0xc3]
6464 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
6466 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6467 ; X64-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
6468 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6469 ; X64-NEXT: retq # encoding: [0xc3]
6470 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6474 declare <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6476 define <16 x i16>@test_int_x86_avx512_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6477 ; CHECK-LABEL: test_int_x86_avx512_pmulh_w_256:
6479 ; CHECK-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1]
6480 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6481 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6485 define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6486 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
6488 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6489 ; X86-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
6490 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6491 ; X86-NEXT: retl # encoding: [0xc3]
6493 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
6495 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6496 ; X64-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
6497 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6498 ; X64-NEXT: retq # encoding: [0xc3]
6499 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6503 declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6505 define <8 x i16>@test_int_x86_avx512_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6506 ; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_128:
6508 ; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1]
6509 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6510 %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6514 define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6515 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
6517 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6518 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6519 ; X86-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
6520 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6521 ; X86-NEXT: retl # encoding: [0xc3]
6523 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
6525 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6526 ; X64-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
6527 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6528 ; X64-NEXT: retq # encoding: [0xc3]
6529 %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6533 declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6535 define <16 x i16>@test_int_x86_avx512_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6536 ; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_256:
6538 ; CHECK-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
6539 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6540 %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6544 define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6545 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
6547 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6548 ; X86-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
6549 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6550 ; X86-NEXT: retl # encoding: [0xc3]
6552 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
6554 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6555 ; X64-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
6556 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6557 ; X64-NEXT: retq # encoding: [0xc3]
6558 %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6562 declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8)
6564 define <8 x i16>@test_int_x86_avx512_ask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2) {
6565 ; CHECK-LABEL: test_int_x86_avx512_ask_pmaddubs_w_128:
6567 ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1]
6568 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6569 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1)
6573 define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
6574 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
6576 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6577 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6578 ; X86-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
6579 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6580 ; X86-NEXT: retl # encoding: [0xc3]
6582 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
6584 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6585 ; X64-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
6586 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6587 ; X64-NEXT: retq # encoding: [0xc3]
6588 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3)
6592 declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16)
6594 define <16 x i16>@test_int_x86_avx512_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2) {
6595 ; CHECK-LABEL: test_int_x86_avx512_pmaddubs_w_256:
6597 ; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
6598 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6599 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1)
6603 define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
6604 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
6606 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6607 ; X86-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
6608 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6609 ; X86-NEXT: retl # encoding: [0xc3]
6611 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
6613 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6614 ; X64-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
6615 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6616 ; X64-NEXT: retq # encoding: [0xc3]
6617 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3)
6621 declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8)
6623 define <4 x i32>@test_int_x86_avx512_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2) {
6624 ; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_128:
6626 ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
6627 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6628 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1)
6632 define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
6633 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
6635 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6636 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6637 ; X86-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
6638 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6639 ; X86-NEXT: retl # encoding: [0xc3]
6641 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
6643 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6644 ; X64-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
6645 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6646 ; X64-NEXT: retq # encoding: [0xc3]
6647 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3)
6651 declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8)
6653 define <8 x i32>@test_int_x86_avx512_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2) {
6654 ; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_256:
6656 ; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1]
6657 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6658 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1)
6662 define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
6663 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
6665 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6666 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6667 ; X86-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
6668 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6669 ; X86-NEXT: retl # encoding: [0xc3]
6671 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
6673 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6674 ; X64-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
6675 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6676 ; X64-NEXT: retq # encoding: [0xc3]
6677 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3)
6681 declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6683 define <8 x i16>@test_int_x86_avx512_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6684 ; CHECK-LABEL: test_int_x86_avx512_permvar_hi_128:
6686 ; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0]
6687 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6688 %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6692 define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6693 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
6695 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6696 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6697 ; X86-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
6698 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6699 ; X86-NEXT: retl # encoding: [0xc3]
6701 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
6703 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6704 ; X64-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
6705 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6706 ; X64-NEXT: retq # encoding: [0xc3]
6707 %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6711 define <8 x i16>@test_int_x86_avx512_maskz_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
6712 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
6714 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6715 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6716 ; X86-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
6717 ; X86-NEXT: retl # encoding: [0xc3]
6719 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
6721 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6722 ; X64-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
6723 ; X64-NEXT: retq # encoding: [0xc3]
6724 %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
6728 declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6730 define <16 x i16>@test_int_x86_avx512_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6731 ; CHECK-LABEL: test_int_x86_avx512_permvar_hi_256:
6733 ; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0]
6734 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6735 %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6739 define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6740 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
6742 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6743 ; X86-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
6744 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6745 ; X86-NEXT: retl # encoding: [0xc3]
6747 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
6749 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6750 ; X64-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
6751 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6752 ; X64-NEXT: retq # encoding: [0xc3]
6753 %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6757 define <16 x i16>@test_int_x86_avx512_maskz_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
6758 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
6760 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6761 ; X86-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
6762 ; X86-NEXT: retl # encoding: [0xc3]
6764 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
6766 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6767 ; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
6768 ; X64-NEXT: retq # encoding: [0xc3]
6769 %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
6773 declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6775 define <8 x i16>@test_int_x86_avx512_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6776 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_128:
6778 ; CHECK-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x75,0xc2]
6779 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6780 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6784 define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6785 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
6787 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6788 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6789 ; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
6790 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6791 ; X86-NEXT: retl # encoding: [0xc3]
6793 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
6795 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6796 ; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
6797 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6798 ; X64-NEXT: retq # encoding: [0xc3]
6799 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6803 declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6805 define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6806 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
6808 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6809 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6810 ; X86-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
6811 ; X86-NEXT: retl # encoding: [0xc3]
6813 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
6815 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6816 ; X64-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
6817 ; X64-NEXT: retq # encoding: [0xc3]
6818 %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6822 declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6824 define <16 x i16>@test_int_x86_avx512_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6825 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_256:
6827 ; CHECK-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x75,0xc2]
6828 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6829 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6833 define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6834 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
6836 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6837 ; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
6838 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6839 ; X86-NEXT: retl # encoding: [0xc3]
6841 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
6843 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6844 ; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
6845 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6846 ; X64-NEXT: retq # encoding: [0xc3]
6847 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6851 declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6853 define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6854 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
6856 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6857 ; X86-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
6858 ; X86-NEXT: retl # encoding: [0xc3]
6860 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
6862 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6863 ; X64-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
6864 ; X64-NEXT: retq # encoding: [0xc3]
6865 %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6869 declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6871 define <8 x i16>@test_int_x86_avx512_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6872 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_128:
6874 ; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xc2]
6875 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6876 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6880 define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6881 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
6883 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6884 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6885 ; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
6886 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6887 ; X86-NEXT: retl # encoding: [0xc3]
6889 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
6891 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6892 ; X64-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
6893 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6894 ; X64-NEXT: retq # encoding: [0xc3]
6895 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6899 declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6901 define <16 x i16>@test_int_x86_avx512_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6902 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_256:
6904 ; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xc2]
6905 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6906 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6910 define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6911 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
6913 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6914 ; X86-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
6915 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6916 ; X86-NEXT: retl # encoding: [0xc3]
6918 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
6920 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6921 ; X64-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
6922 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6923 ; X64-NEXT: retq # encoding: [0xc3]
6924 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6928 declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8)
6930 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
6931 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
6933 ; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
6934 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6935 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6936 ; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02]
6937 ; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
6938 ; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04]
6939 ; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
6940 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
6941 ; X86-NEXT: retl # encoding: [0xc3]
6943 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
6945 ; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
6946 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6947 ; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02]
6948 ; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
6949 ; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04]
6950 ; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
6951 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
6952 ; X64-NEXT: retq # encoding: [0xc3]
6953 %res0 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4)
6954 %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3, <8 x i16> zeroinitializer, i8 %x4)
6955 %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4, <8 x i16> %x3, i8 -1)
6956 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
6957 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1
6958 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2
6959 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
6962 declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16)
6964 define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
6965 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
6967 ; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
6968 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6969 ; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02]
6970 ; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
6971 ; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04]
6972 ; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
6973 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
6974 ; X86-NEXT: retl # encoding: [0xc3]
6976 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
6978 ; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
6979 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6980 ; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02]
6981 ; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
6982 ; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04]
6983 ; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
6984 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
6985 ; X64-NEXT: retq # encoding: [0xc3]
6986 %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4)
6987 %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3, <16 x i16> zeroinitializer, i16 %x4)
6988 %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4, <16 x i16> %x3, i16 -1)
6989 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0
6990 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1
6991 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2
6992 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
6995 define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
6996 ; CHECK-LABEL: test_mask_adds_epu16_rr_128:
6998 ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
6999 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7000 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7004 define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
7005 ; X86-LABEL: test_mask_adds_epu16_rrk_128:
7007 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7008 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7009 ; X86-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1]
7010 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7011 ; X86-NEXT: retl # encoding: [0xc3]
7013 ; X64-LABEL: test_mask_adds_epu16_rrk_128:
7015 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7016 ; X64-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1]
7017 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7018 ; X64-NEXT: retq # encoding: [0xc3]
7019 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7023 define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
7024 ; X86-LABEL: test_mask_adds_epu16_rrkz_128:
7026 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7027 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7028 ; X86-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1]
7029 ; X86-NEXT: retl # encoding: [0xc3]
7031 ; X64-LABEL: test_mask_adds_epu16_rrkz_128:
7033 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7034 ; X64-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1]
7035 ; X64-NEXT: retq # encoding: [0xc3]
7036 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7040 define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, ptr %ptr_b) {
7041 ; X86-LABEL: test_mask_adds_epu16_rm_128:
7043 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7044 ; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x00]
7045 ; X86-NEXT: retl # encoding: [0xc3]
7047 ; X64-LABEL: test_mask_adds_epu16_rm_128:
7049 ; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x07]
7050 ; X64-NEXT: retq # encoding: [0xc3]
7051 %b = load <8 x i16>, ptr %ptr_b
7052 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7056 define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
7057 ; X86-LABEL: test_mask_adds_epu16_rmk_128:
7059 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7060 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7061 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7062 ; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x08]
7063 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7064 ; X86-NEXT: retl # encoding: [0xc3]
7066 ; X64-LABEL: test_mask_adds_epu16_rmk_128:
7068 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7069 ; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f]
7070 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7071 ; X64-NEXT: retq # encoding: [0xc3]
7072 %b = load <8 x i16>, ptr %ptr_b
7073 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7077 define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
7078 ; X86-LABEL: test_mask_adds_epu16_rmkz_128:
7080 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7081 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7082 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7083 ; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x00]
7084 ; X86-NEXT: retl # encoding: [0xc3]
7086 ; X64-LABEL: test_mask_adds_epu16_rmkz_128:
7088 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7089 ; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07]
7090 ; X64-NEXT: retq # encoding: [0xc3]
7091 %b = load <8 x i16>, ptr %ptr_b
7092 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7096 declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
7098 define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
7099 ; CHECK-LABEL: test_mask_adds_epu16_rr_256:
7101 ; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]
7102 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7103 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7107 define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
7108 ; X86-LABEL: test_mask_adds_epu16_rrk_256:
7110 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7111 ; X86-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1]
7112 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7113 ; X86-NEXT: retl # encoding: [0xc3]
7115 ; X64-LABEL: test_mask_adds_epu16_rrk_256:
7117 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7118 ; X64-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1]
7119 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7120 ; X64-NEXT: retq # encoding: [0xc3]
7121 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7125 define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
7126 ; X86-LABEL: test_mask_adds_epu16_rrkz_256:
7128 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7129 ; X86-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1]
7130 ; X86-NEXT: retl # encoding: [0xc3]
7132 ; X64-LABEL: test_mask_adds_epu16_rrkz_256:
7134 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7135 ; X64-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1]
7136 ; X64-NEXT: retq # encoding: [0xc3]
7137 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7141 define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, ptr %ptr_b) {
7142 ; X86-LABEL: test_mask_adds_epu16_rm_256:
7144 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7145 ; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x00]
7146 ; X86-NEXT: retl # encoding: [0xc3]
7148 ; X64-LABEL: test_mask_adds_epu16_rm_256:
7150 ; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x07]
7151 ; X64-NEXT: retq # encoding: [0xc3]
7152 %b = load <16 x i16>, ptr %ptr_b
7153 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7157 define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
7158 ; X86-LABEL: test_mask_adds_epu16_rmk_256:
7160 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7161 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7162 ; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x08]
7163 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7164 ; X86-NEXT: retl # encoding: [0xc3]
7166 ; X64-LABEL: test_mask_adds_epu16_rmk_256:
7168 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7169 ; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f]
7170 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7171 ; X64-NEXT: retq # encoding: [0xc3]
7172 %b = load <16 x i16>, ptr %ptr_b
7173 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7177 define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
7178 ; X86-LABEL: test_mask_adds_epu16_rmkz_256:
7180 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7181 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7182 ; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x00]
7183 ; X86-NEXT: retl # encoding: [0xc3]
7185 ; X64-LABEL: test_mask_adds_epu16_rmkz_256:
7187 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7188 ; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07]
7189 ; X64-NEXT: retq # encoding: [0xc3]
7190 %b = load <16 x i16>, ptr %ptr_b
7191 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7195 declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
7197 define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
7198 ; CHECK-LABEL: test_mask_subs_epu16_rr_128:
7200 ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
7201 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7202 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7206 define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
7207 ; X86-LABEL: test_mask_subs_epu16_rrk_128:
7209 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7210 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7211 ; X86-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1]
7212 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7213 ; X86-NEXT: retl # encoding: [0xc3]
7215 ; X64-LABEL: test_mask_subs_epu16_rrk_128:
7217 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7218 ; X64-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1]
7219 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7220 ; X64-NEXT: retq # encoding: [0xc3]
7221 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7225 define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
7226 ; X86-LABEL: test_mask_subs_epu16_rrkz_128:
7228 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7229 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7230 ; X86-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1]
7231 ; X86-NEXT: retl # encoding: [0xc3]
7233 ; X64-LABEL: test_mask_subs_epu16_rrkz_128:
7235 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7236 ; X64-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1]
7237 ; X64-NEXT: retq # encoding: [0xc3]
7238 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7242 define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, ptr %ptr_b) {
7243 ; X86-LABEL: test_mask_subs_epu16_rm_128:
7245 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7246 ; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x00]
7247 ; X86-NEXT: retl # encoding: [0xc3]
7249 ; X64-LABEL: test_mask_subs_epu16_rm_128:
7251 ; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x07]
7252 ; X64-NEXT: retq # encoding: [0xc3]
7253 %b = load <8 x i16>, ptr %ptr_b
7254 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7258 define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
7259 ; X86-LABEL: test_mask_subs_epu16_rmk_128:
7261 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7262 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7263 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7264 ; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x08]
7265 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7266 ; X86-NEXT: retl # encoding: [0xc3]
7268 ; X64-LABEL: test_mask_subs_epu16_rmk_128:
7270 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7271 ; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f]
7272 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7273 ; X64-NEXT: retq # encoding: [0xc3]
7274 %b = load <8 x i16>, ptr %ptr_b
7275 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7279 define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
7280 ; X86-LABEL: test_mask_subs_epu16_rmkz_128:
7282 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7283 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7284 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7285 ; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x00]
7286 ; X86-NEXT: retl # encoding: [0xc3]
7288 ; X64-LABEL: test_mask_subs_epu16_rmkz_128:
7290 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7291 ; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07]
7292 ; X64-NEXT: retq # encoding: [0xc3]
7293 %b = load <8 x i16>, ptr %ptr_b
7294 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7298 declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
7300 define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
7301 ; CHECK-LABEL: test_mask_subs_epu16_rr_256:
7303 ; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]
7304 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7305 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7309 define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
7310 ; X86-LABEL: test_mask_subs_epu16_rrk_256:
7312 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7313 ; X86-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1]
7314 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7315 ; X86-NEXT: retl # encoding: [0xc3]
7317 ; X64-LABEL: test_mask_subs_epu16_rrk_256:
7319 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7320 ; X64-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1]
7321 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7322 ; X64-NEXT: retq # encoding: [0xc3]
7323 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7327 define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
7328 ; X86-LABEL: test_mask_subs_epu16_rrkz_256:
7330 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7331 ; X86-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1]
7332 ; X86-NEXT: retl # encoding: [0xc3]
7334 ; X64-LABEL: test_mask_subs_epu16_rrkz_256:
7336 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7337 ; X64-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1]
7338 ; X64-NEXT: retq # encoding: [0xc3]
7339 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7343 define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, ptr %ptr_b) {
7344 ; X86-LABEL: test_mask_subs_epu16_rm_256:
7346 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7347 ; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x00]
7348 ; X86-NEXT: retl # encoding: [0xc3]
7350 ; X64-LABEL: test_mask_subs_epu16_rm_256:
7352 ; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x07]
7353 ; X64-NEXT: retq # encoding: [0xc3]
7354 %b = load <16 x i16>, ptr %ptr_b
7355 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7359 define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
7360 ; X86-LABEL: test_mask_subs_epu16_rmk_256:
7362 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7363 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7364 ; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x08]
7365 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7366 ; X86-NEXT: retl # encoding: [0xc3]
7368 ; X64-LABEL: test_mask_subs_epu16_rmk_256:
7370 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7371 ; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f]
7372 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7373 ; X64-NEXT: retq # encoding: [0xc3]
7374 %b = load <16 x i16>, ptr %ptr_b
7375 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7379 define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
7380 ; X86-LABEL: test_mask_subs_epu16_rmkz_256:
7382 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7383 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7384 ; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x00]
7385 ; X86-NEXT: retl # encoding: [0xc3]
7387 ; X64-LABEL: test_mask_subs_epu16_rmkz_256:
7389 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7390 ; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07]
7391 ; X64-NEXT: retq # encoding: [0xc3]
7392 %b = load <16 x i16>, ptr %ptr_b
7393 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7397 declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
7399 define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
7400 ; CHECK-LABEL: test_mask_adds_epu8_rr_128:
7402 ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
7403 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7404 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7408 define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
7409 ; X86-LABEL: test_mask_adds_epu8_rrk_128:
7411 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7412 ; X86-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1]
7413 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7414 ; X86-NEXT: retl # encoding: [0xc3]
7416 ; X64-LABEL: test_mask_adds_epu8_rrk_128:
7418 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7419 ; X64-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1]
7420 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7421 ; X64-NEXT: retq # encoding: [0xc3]
7422 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7426 define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
7427 ; X86-LABEL: test_mask_adds_epu8_rrkz_128:
7429 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7430 ; X86-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1]
7431 ; X86-NEXT: retl # encoding: [0xc3]
7433 ; X64-LABEL: test_mask_adds_epu8_rrkz_128:
7435 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7436 ; X64-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1]
7437 ; X64-NEXT: retq # encoding: [0xc3]
7438 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7442 define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, ptr %ptr_b) {
7443 ; X86-LABEL: test_mask_adds_epu8_rm_128:
7445 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7446 ; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x00]
7447 ; X86-NEXT: retl # encoding: [0xc3]
7449 ; X64-LABEL: test_mask_adds_epu8_rm_128:
7451 ; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x07]
7452 ; X64-NEXT: retq # encoding: [0xc3]
7453 %b = load <16 x i8>, ptr %ptr_b
7454 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7458 define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
7459 ; X86-LABEL: test_mask_adds_epu8_rmk_128:
7461 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7462 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7463 ; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x08]
7464 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7465 ; X86-NEXT: retl # encoding: [0xc3]
7467 ; X64-LABEL: test_mask_adds_epu8_rmk_128:
7469 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7470 ; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f]
7471 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7472 ; X64-NEXT: retq # encoding: [0xc3]
7473 %b = load <16 x i8>, ptr %ptr_b
7474 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7478 define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
7479 ; X86-LABEL: test_mask_adds_epu8_rmkz_128:
7481 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7482 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7483 ; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x00]
7484 ; X86-NEXT: retl # encoding: [0xc3]
7486 ; X64-LABEL: test_mask_adds_epu8_rmkz_128:
7488 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7489 ; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07]
7490 ; X64-NEXT: retq # encoding: [0xc3]
7491 %b = load <16 x i8>, ptr %ptr_b
7492 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7496 declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
7498 define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
7499 ; CHECK-LABEL: test_mask_adds_epu8_rr_256:
7501 ; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]
7502 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7503 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7507 define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
7508 ; X86-LABEL: test_mask_adds_epu8_rrk_256:
7510 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7511 ; X86-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1]
7512 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7513 ; X86-NEXT: retl # encoding: [0xc3]
7515 ; X64-LABEL: test_mask_adds_epu8_rrk_256:
7517 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7518 ; X64-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1]
7519 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7520 ; X64-NEXT: retq # encoding: [0xc3]
7521 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7525 define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
7526 ; X86-LABEL: test_mask_adds_epu8_rrkz_256:
7528 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7529 ; X86-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1]
7530 ; X86-NEXT: retl # encoding: [0xc3]
7532 ; X64-LABEL: test_mask_adds_epu8_rrkz_256:
7534 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7535 ; X64-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1]
7536 ; X64-NEXT: retq # encoding: [0xc3]
7537 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7541 define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, ptr %ptr_b) {
7542 ; X86-LABEL: test_mask_adds_epu8_rm_256:
7544 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7545 ; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x00]
7546 ; X86-NEXT: retl # encoding: [0xc3]
7548 ; X64-LABEL: test_mask_adds_epu8_rm_256:
7550 ; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x07]
7551 ; X64-NEXT: retq # encoding: [0xc3]
7552 %b = load <32 x i8>, ptr %ptr_b
7553 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7557 define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
7558 ; X86-LABEL: test_mask_adds_epu8_rmk_256:
7560 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7561 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7562 ; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x08]
7563 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7564 ; X86-NEXT: retl # encoding: [0xc3]
7566 ; X64-LABEL: test_mask_adds_epu8_rmk_256:
7568 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7569 ; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f]
7570 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7571 ; X64-NEXT: retq # encoding: [0xc3]
7572 %b = load <32 x i8>, ptr %ptr_b
7573 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7577 define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
7578 ; X86-LABEL: test_mask_adds_epu8_rmkz_256:
7580 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7581 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7582 ; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x00]
7583 ; X86-NEXT: retl # encoding: [0xc3]
7585 ; X64-LABEL: test_mask_adds_epu8_rmkz_256:
7587 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7588 ; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07]
7589 ; X64-NEXT: retq # encoding: [0xc3]
7590 %b = load <32 x i8>, ptr %ptr_b
7591 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7595 declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
7597 define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
7598 ; CHECK-LABEL: test_mask_subs_epu8_rr_128:
7600 ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
7601 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7602 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7606 define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
7607 ; X86-LABEL: test_mask_subs_epu8_rrk_128:
7609 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7610 ; X86-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1]
7611 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7612 ; X86-NEXT: retl # encoding: [0xc3]
7614 ; X64-LABEL: test_mask_subs_epu8_rrk_128:
7616 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7617 ; X64-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1]
7618 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7619 ; X64-NEXT: retq # encoding: [0xc3]
7620 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7624 define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
7625 ; X86-LABEL: test_mask_subs_epu8_rrkz_128:
7627 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7628 ; X86-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1]
7629 ; X86-NEXT: retl # encoding: [0xc3]
7631 ; X64-LABEL: test_mask_subs_epu8_rrkz_128:
7633 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7634 ; X64-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1]
7635 ; X64-NEXT: retq # encoding: [0xc3]
7636 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7640 define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, ptr %ptr_b) {
7641 ; X86-LABEL: test_mask_subs_epu8_rm_128:
7643 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7644 ; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x00]
7645 ; X86-NEXT: retl # encoding: [0xc3]
7647 ; X64-LABEL: test_mask_subs_epu8_rm_128:
7649 ; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x07]
7650 ; X64-NEXT: retq # encoding: [0xc3]
7651 %b = load <16 x i8>, ptr %ptr_b
7652 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7656 define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
7657 ; X86-LABEL: test_mask_subs_epu8_rmk_128:
7659 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7660 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7661 ; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x08]
7662 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7663 ; X86-NEXT: retl # encoding: [0xc3]
7665 ; X64-LABEL: test_mask_subs_epu8_rmk_128:
7667 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7668 ; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f]
7669 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7670 ; X64-NEXT: retq # encoding: [0xc3]
7671 %b = load <16 x i8>, ptr %ptr_b
7672 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7676 define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
7677 ; X86-LABEL: test_mask_subs_epu8_rmkz_128:
7679 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7680 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7681 ; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x00]
7682 ; X86-NEXT: retl # encoding: [0xc3]
7684 ; X64-LABEL: test_mask_subs_epu8_rmkz_128:
7686 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7687 ; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07]
7688 ; X64-NEXT: retq # encoding: [0xc3]
7689 %b = load <16 x i8>, ptr %ptr_b
7690 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7694 declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
7696 define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
7697 ; CHECK-LABEL: test_mask_subs_epu8_rr_256:
7699 ; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]
7700 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7701 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7705 define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
7706 ; X86-LABEL: test_mask_subs_epu8_rrk_256:
7708 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7709 ; X86-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1]
7710 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7711 ; X86-NEXT: retl # encoding: [0xc3]
7713 ; X64-LABEL: test_mask_subs_epu8_rrk_256:
7715 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7716 ; X64-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1]
7717 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7718 ; X64-NEXT: retq # encoding: [0xc3]
7719 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7723 define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
7724 ; X86-LABEL: test_mask_subs_epu8_rrkz_256:
7726 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7727 ; X86-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1]
7728 ; X86-NEXT: retl # encoding: [0xc3]
7730 ; X64-LABEL: test_mask_subs_epu8_rrkz_256:
7732 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7733 ; X64-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1]
7734 ; X64-NEXT: retq # encoding: [0xc3]
7735 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7739 define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, ptr %ptr_b) {
7740 ; X86-LABEL: test_mask_subs_epu8_rm_256:
7742 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7743 ; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x00]
7744 ; X86-NEXT: retl # encoding: [0xc3]
7746 ; X64-LABEL: test_mask_subs_epu8_rm_256:
7748 ; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x07]
7749 ; X64-NEXT: retq # encoding: [0xc3]
7750 %b = load <32 x i8>, ptr %ptr_b
7751 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7755 define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
7756 ; X86-LABEL: test_mask_subs_epu8_rmk_256:
7758 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7759 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7760 ; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x08]
7761 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7762 ; X86-NEXT: retl # encoding: [0xc3]
7764 ; X64-LABEL: test_mask_subs_epu8_rmk_256:
7766 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7767 ; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f]
7768 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7769 ; X64-NEXT: retq # encoding: [0xc3]
7770 %b = load <32 x i8>, ptr %ptr_b
7771 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7775 define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
7776 ; X86-LABEL: test_mask_subs_epu8_rmkz_256:
7778 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7779 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7780 ; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x00]
7781 ; X86-NEXT: retl # encoding: [0xc3]
7783 ; X64-LABEL: test_mask_subs_epu8_rmkz_256:
7785 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7786 ; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07]
7787 ; X64-NEXT: retq # encoding: [0xc3]
7788 %b = load <32 x i8>, ptr %ptr_b
7789 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7793 declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
7795 define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
7796 ; CHECK-LABEL: test_mask_adds_epi16_rr_128:
7798 ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
7799 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7800 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7804 define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
7805 ; X86-LABEL: test_mask_adds_epi16_rrk_128:
7807 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7808 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7809 ; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
7810 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7811 ; X86-NEXT: retl # encoding: [0xc3]
7813 ; X64-LABEL: test_mask_adds_epi16_rrk_128:
7815 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7816 ; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
7817 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7818 ; X64-NEXT: retq # encoding: [0xc3]
7819 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7823 define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
7824 ; X86-LABEL: test_mask_adds_epi16_rrkz_128:
7826 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7827 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7828 ; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
7829 ; X86-NEXT: retl # encoding: [0xc3]
7831 ; X64-LABEL: test_mask_adds_epi16_rrkz_128:
7833 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7834 ; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
7835 ; X64-NEXT: retq # encoding: [0xc3]
7836 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7840 define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
7841 ; X86-LABEL: test_mask_adds_epi16_rm_128:
7843 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7844 ; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x00]
7845 ; X86-NEXT: retl # encoding: [0xc3]
7847 ; X64-LABEL: test_mask_adds_epi16_rm_128:
7849 ; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x07]
7850 ; X64-NEXT: retq # encoding: [0xc3]
7851 %b = load <8 x i16>, ptr %ptr_b
7852 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7856 define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
7857 ; X86-LABEL: test_mask_adds_epi16_rmk_128:
7859 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7860 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7861 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7862 ; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08]
7863 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7864 ; X86-NEXT: retl # encoding: [0xc3]
7866 ; X64-LABEL: test_mask_adds_epi16_rmk_128:
7868 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7869 ; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f]
7870 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7871 ; X64-NEXT: retq # encoding: [0xc3]
7872 %b = load <8 x i16>, ptr %ptr_b
7873 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7877 define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
7878 ; X86-LABEL: test_mask_adds_epi16_rmkz_128:
7880 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7881 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7882 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7883 ; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00]
7884 ; X86-NEXT: retl # encoding: [0xc3]
7886 ; X64-LABEL: test_mask_adds_epi16_rmkz_128:
7888 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7889 ; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07]
7890 ; X64-NEXT: retq # encoding: [0xc3]
7891 %b = load <8 x i16>, ptr %ptr_b
7892 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7896 declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
7898 define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
7899 ; CHECK-LABEL: test_mask_adds_epi16_rr_256:
7901 ; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]
7902 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7903 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7907 define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
7908 ; X86-LABEL: test_mask_adds_epi16_rrk_256:
7910 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7911 ; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
7912 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7913 ; X86-NEXT: retl # encoding: [0xc3]
7915 ; X64-LABEL: test_mask_adds_epi16_rrk_256:
7917 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7918 ; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
7919 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7920 ; X64-NEXT: retq # encoding: [0xc3]
7921 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7925 define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
7926 ; X86-LABEL: test_mask_adds_epi16_rrkz_256:
7928 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7929 ; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
7930 ; X86-NEXT: retl # encoding: [0xc3]
7932 ; X64-LABEL: test_mask_adds_epi16_rrkz_256:
7934 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7935 ; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
7936 ; X64-NEXT: retq # encoding: [0xc3]
7937 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7941 define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
7942 ; X86-LABEL: test_mask_adds_epi16_rm_256:
7944 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7945 ; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x00]
7946 ; X86-NEXT: retl # encoding: [0xc3]
7948 ; X64-LABEL: test_mask_adds_epi16_rm_256:
7950 ; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x07]
7951 ; X64-NEXT: retq # encoding: [0xc3]
7952 %b = load <16 x i16>, ptr %ptr_b
7953 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7957 define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
7958 ; X86-LABEL: test_mask_adds_epi16_rmk_256:
7960 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7961 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7962 ; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08]
7963 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7964 ; X86-NEXT: retl # encoding: [0xc3]
7966 ; X64-LABEL: test_mask_adds_epi16_rmk_256:
7968 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7969 ; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f]
7970 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7971 ; X64-NEXT: retq # encoding: [0xc3]
7972 %b = load <16 x i16>, ptr %ptr_b
7973 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7977 define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
7978 ; X86-LABEL: test_mask_adds_epi16_rmkz_256:
7980 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7981 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7982 ; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00]
7983 ; X86-NEXT: retl # encoding: [0xc3]
7985 ; X64-LABEL: test_mask_adds_epi16_rmkz_256:
7987 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7988 ; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07]
7989 ; X64-NEXT: retq # encoding: [0xc3]
7990 %b = load <16 x i16>, ptr %ptr_b
7991 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7995 declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
7997 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) #0
7999 define <8 x i16> @test_test_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
8000 ; X86-LABEL: test_test_subs_epi16_rrk_128:
8002 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8003 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8004 ; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
8005 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8006 ; X86-NEXT: retl # encoding: [0xc3]
8008 ; X64-LABEL: test_test_subs_epi16_rrk_128:
8010 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8011 ; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
8012 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8013 ; X64-NEXT: retq # encoding: [0xc3]
8014 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8015 %2 = bitcast i8 %mask to <8 x i1>
8016 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8020 define <8 x i16> @test_test_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
8021 ; X86-LABEL: test_test_subs_epi16_rrkz_128:
8023 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8024 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8025 ; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8026 ; X86-NEXT: retl # encoding: [0xc3]
8028 ; X64-LABEL: test_test_subs_epi16_rrkz_128:
8030 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8031 ; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8032 ; X64-NEXT: retq # encoding: [0xc3]
8033 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8034 %2 = bitcast i8 %mask to <8 x i1>
8035 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8039 define <8 x i16> @test_test_subs_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
8040 ; X86-LABEL: test_test_subs_epi16_rmk_128:
8042 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8043 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8044 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8045 ; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08]
8046 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8047 ; X86-NEXT: retl # encoding: [0xc3]
8049 ; X64-LABEL: test_test_subs_epi16_rmk_128:
8051 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8052 ; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f]
8053 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8054 ; X64-NEXT: retq # encoding: [0xc3]
8055 %b = load <8 x i16>, ptr %ptr_b
8056 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8057 %2 = bitcast i8 %mask to <8 x i1>
8058 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8062 define <8 x i16> @test_test_subs_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
8063 ; X86-LABEL: test_test_subs_epi16_rmkz_128:
8065 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8066 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8067 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8068 ; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00]
8069 ; X86-NEXT: retl # encoding: [0xc3]
8071 ; X64-LABEL: test_test_subs_epi16_rmkz_128:
8073 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8074 ; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07]
8075 ; X64-NEXT: retq # encoding: [0xc3]
8076 %b = load <8 x i16>, ptr %ptr_b
8077 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8078 %2 = bitcast i8 %mask to <8 x i1>
8079 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8083 declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) #0
8085 define <16 x i16> @test_test_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
8086 ; X86-LABEL: test_test_subs_epi16_rrk_256:
8088 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8089 ; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8090 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8091 ; X86-NEXT: retl # encoding: [0xc3]
8093 ; X64-LABEL: test_test_subs_epi16_rrk_256:
8095 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8096 ; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8097 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8098 ; X64-NEXT: retq # encoding: [0xc3]
8099 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8100 %2 = bitcast i16 %mask to <16 x i1>
8101 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8105 define <16 x i16> @test_test_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
8106 ; X86-LABEL: test_test_subs_epi16_rrkz_256:
8108 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8109 ; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8110 ; X86-NEXT: retl # encoding: [0xc3]
8112 ; X64-LABEL: test_test_subs_epi16_rrkz_256:
8114 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8115 ; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8116 ; X64-NEXT: retq # encoding: [0xc3]
8117 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8118 %2 = bitcast i16 %mask to <16 x i1>
8119 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8123 define <16 x i16> @test_test_subs_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
8124 ; X86-LABEL: test_test_subs_epi16_rmk_256:
8126 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8127 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8128 ; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08]
8129 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8130 ; X86-NEXT: retl # encoding: [0xc3]
8132 ; X64-LABEL: test_test_subs_epi16_rmk_256:
8134 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8135 ; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f]
8136 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8137 ; X64-NEXT: retq # encoding: [0xc3]
8138 %b = load <16 x i16>, ptr %ptr_b
8139 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8140 %2 = bitcast i16 %mask to <16 x i1>
8141 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8145 define <16 x i16> @test_test_subs_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
8146 ; X86-LABEL: test_test_subs_epi16_rmkz_256:
8148 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8149 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8150 ; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00]
8151 ; X86-NEXT: retl # encoding: [0xc3]
8153 ; X64-LABEL: test_test_subs_epi16_rmkz_256:
8155 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8156 ; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07]
8157 ; X64-NEXT: retq # encoding: [0xc3]
8158 %b = load <16 x i16>, ptr %ptr_b
8159 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8160 %2 = bitcast i16 %mask to <16 x i1>
8161 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8165 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) #0
8167 define <16 x i8> @test_test_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
8168 ; X86-LABEL: test_test_subs_epi8_rrk_128:
8170 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8171 ; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
8172 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8173 ; X86-NEXT: retl # encoding: [0xc3]
8175 ; X64-LABEL: test_test_subs_epi8_rrk_128:
8177 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8178 ; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
8179 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8180 ; X64-NEXT: retq # encoding: [0xc3]
8181 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8182 %2 = bitcast i16 %mask to <16 x i1>
8183 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8187 define <16 x i8> @test_test_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
8188 ; X86-LABEL: test_test_subs_epi8_rrkz_128:
8190 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8191 ; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
8192 ; X86-NEXT: retl # encoding: [0xc3]
8194 ; X64-LABEL: test_test_subs_epi8_rrkz_128:
8196 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8197 ; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
8198 ; X64-NEXT: retq # encoding: [0xc3]
8199 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8200 %2 = bitcast i16 %mask to <16 x i1>
8201 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8205 define <16 x i8> @test_test_subs_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
8206 ; X86-LABEL: test_test_subs_epi8_rmk_128:
8208 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8209 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8210 ; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08]
8211 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8212 ; X86-NEXT: retl # encoding: [0xc3]
8214 ; X64-LABEL: test_test_subs_epi8_rmk_128:
8216 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8217 ; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f]
8218 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8219 ; X64-NEXT: retq # encoding: [0xc3]
8220 %b = load <16 x i8>, ptr %ptr_b
8221 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8222 %2 = bitcast i16 %mask to <16 x i1>
8223 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8227 define <16 x i8> @test_test_subs_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
8228 ; X86-LABEL: test_test_subs_epi8_rmkz_128:
8230 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8231 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8232 ; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00]
8233 ; X86-NEXT: retl # encoding: [0xc3]
8235 ; X64-LABEL: test_test_subs_epi8_rmkz_128:
8237 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8238 ; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07]
8239 ; X64-NEXT: retq # encoding: [0xc3]
8240 %b = load <16 x i8>, ptr %ptr_b
8241 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8242 %2 = bitcast i16 %mask to <16 x i1>
8243 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8247 declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) #0
8249 define <32 x i8> @test_test_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
8250 ; X86-LABEL: test_test_subs_epi8_rrk_256:
8252 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8253 ; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
8254 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8255 ; X86-NEXT: retl # encoding: [0xc3]
8257 ; X64-LABEL: test_test_subs_epi8_rrk_256:
8259 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8260 ; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
8261 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8262 ; X64-NEXT: retq # encoding: [0xc3]
8263 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8264 %2 = bitcast i32 %mask to <32 x i1>
8265 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8269 define <32 x i8> @test_test_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
8270 ; X86-LABEL: test_test_subs_epi8_rrkz_256:
8272 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8273 ; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
8274 ; X86-NEXT: retl # encoding: [0xc3]
8276 ; X64-LABEL: test_test_subs_epi8_rrkz_256:
8278 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8279 ; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
8280 ; X64-NEXT: retq # encoding: [0xc3]
8281 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8282 %2 = bitcast i32 %mask to <32 x i1>
8283 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8287 define <32 x i8> @test_test_subs_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
8288 ; X86-LABEL: test_test_subs_epi8_rmk_256:
8290 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8291 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8292 ; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08]
8293 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8294 ; X86-NEXT: retl # encoding: [0xc3]
8296 ; X64-LABEL: test_test_subs_epi8_rmk_256:
8298 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8299 ; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f]
8300 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8301 ; X64-NEXT: retq # encoding: [0xc3]
8302 %b = load <32 x i8>, ptr %ptr_b
8303 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8304 %2 = bitcast i32 %mask to <32 x i1>
8305 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8309 define <32 x i8> @test_test_subs_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
8310 ; X86-LABEL: test_test_subs_epi8_rmkz_256:
8312 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8313 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8314 ; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00]
8315 ; X86-NEXT: retl # encoding: [0xc3]
8317 ; X64-LABEL: test_test_subs_epi8_rmkz_256:
8319 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8320 ; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07]
8321 ; X64-NEXT: retq # encoding: [0xc3]
8322 %b = load <32 x i8>, ptr %ptr_b
8323 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8324 %2 = bitcast i32 %mask to <32 x i1>
8325 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8329 define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
8330 ; CHECK-LABEL: test_mask_subs_epi16_rr_128:
8332 ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
8333 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8334 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
8338 define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
8339 ; X86-LABEL: test_mask_subs_epi16_rrk_128:
8341 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8342 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8343 ; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
8344 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8345 ; X86-NEXT: retl # encoding: [0xc3]
8347 ; X64-LABEL: test_mask_subs_epi16_rrk_128:
8349 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8350 ; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
8351 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8352 ; X64-NEXT: retq # encoding: [0xc3]
8353 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
8357 define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
8358 ; X86-LABEL: test_mask_subs_epi16_rrkz_128:
8360 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8361 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8362 ; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8363 ; X86-NEXT: retl # encoding: [0xc3]
8365 ; X64-LABEL: test_mask_subs_epi16_rrkz_128:
8367 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8368 ; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8369 ; X64-NEXT: retq # encoding: [0xc3]
8370 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
8374 define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
8375 ; X86-LABEL: test_mask_subs_epi16_rm_128:
8377 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8378 ; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x00]
8379 ; X86-NEXT: retl # encoding: [0xc3]
8381 ; X64-LABEL: test_mask_subs_epi16_rm_128:
8383 ; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x07]
8384 ; X64-NEXT: retq # encoding: [0xc3]
8385 %b = load <8 x i16>, ptr %ptr_b
8386 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
8390 define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
8391 ; X86-LABEL: test_mask_subs_epi16_rmk_128:
8393 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8394 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8395 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8396 ; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08]
8397 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8398 ; X86-NEXT: retl # encoding: [0xc3]
8400 ; X64-LABEL: test_mask_subs_epi16_rmk_128:
8402 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8403 ; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f]
8404 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8405 ; X64-NEXT: retq # encoding: [0xc3]
8406 %b = load <8 x i16>, ptr %ptr_b
8407 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
8411 define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
8412 ; X86-LABEL: test_mask_subs_epi16_rmkz_128:
8414 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8415 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8416 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8417 ; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00]
8418 ; X86-NEXT: retl # encoding: [0xc3]
8420 ; X64-LABEL: test_mask_subs_epi16_rmkz_128:
8422 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8423 ; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07]
8424 ; X64-NEXT: retq # encoding: [0xc3]
8425 %b = load <8 x i16>, ptr %ptr_b
8426 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
8430 declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
8432 define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
8433 ; CHECK-LABEL: test_mask_subs_epi16_rr_256:
8435 ; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]
8436 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8437 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
8441 define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
8442 ; X86-LABEL: test_mask_subs_epi16_rrk_256:
8444 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8445 ; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8446 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8447 ; X86-NEXT: retl # encoding: [0xc3]
8449 ; X64-LABEL: test_mask_subs_epi16_rrk_256:
8451 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8452 ; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8453 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8454 ; X64-NEXT: retq # encoding: [0xc3]
8455 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
8459 define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
8460 ; X86-LABEL: test_mask_subs_epi16_rrkz_256:
8462 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8463 ; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8464 ; X86-NEXT: retl # encoding: [0xc3]
8466 ; X64-LABEL: test_mask_subs_epi16_rrkz_256:
8468 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8469 ; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8470 ; X64-NEXT: retq # encoding: [0xc3]
8471 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
8475 define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
8476 ; X86-LABEL: test_mask_subs_epi16_rm_256:
8478 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8479 ; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x00]
8480 ; X86-NEXT: retl # encoding: [0xc3]
8482 ; X64-LABEL: test_mask_subs_epi16_rm_256:
8484 ; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x07]
8485 ; X64-NEXT: retq # encoding: [0xc3]
8486 %b = load <16 x i16>, ptr %ptr_b
8487 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
8491 define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
8492 ; X86-LABEL: test_mask_subs_epi16_rmk_256:
8494 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8495 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8496 ; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08]
8497 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8498 ; X86-NEXT: retl # encoding: [0xc3]
8500 ; X64-LABEL: test_mask_subs_epi16_rmk_256:
8502 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8503 ; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f]
8504 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8505 ; X64-NEXT: retq # encoding: [0xc3]
8506 %b = load <16 x i16>, ptr %ptr_b
8507 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
8511 define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
8512 ; X86-LABEL: test_mask_subs_epi16_rmkz_256:
8514 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8515 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8516 ; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00]
8517 ; X86-NEXT: retl # encoding: [0xc3]
8519 ; X64-LABEL: test_mask_subs_epi16_rmkz_256:
8521 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8522 ; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07]
8523 ; X64-NEXT: retq # encoding: [0xc3]
8524 %b = load <16 x i16>, ptr %ptr_b
8525 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
8529 declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
8531 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) #0
8533 define <8 x i16> @test_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
8534 ; X86-LABEL: test_adds_epi16_rrk_128:
8536 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8537 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8538 ; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
8539 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8540 ; X86-NEXT: retl # encoding: [0xc3]
8542 ; X64-LABEL: test_adds_epi16_rrk_128:
8544 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8545 ; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
8546 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8547 ; X64-NEXT: retq # encoding: [0xc3]
8548 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8549 %2 = bitcast i8 %mask to <8 x i1>
8550 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8554 define <8 x i16> @test_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
8555 ; X86-LABEL: test_adds_epi16_rrkz_128:
8557 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8558 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8559 ; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
8560 ; X86-NEXT: retl # encoding: [0xc3]
8562 ; X64-LABEL: test_adds_epi16_rrkz_128:
8564 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8565 ; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
8566 ; X64-NEXT: retq # encoding: [0xc3]
8567 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8568 %2 = bitcast i8 %mask to <8 x i1>
8569 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8573 define <8 x i16> @test_adds_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
8574 ; X86-LABEL: test_adds_epi16_rmk_128:
8576 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8577 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8578 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8579 ; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08]
8580 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8581 ; X86-NEXT: retl # encoding: [0xc3]
8583 ; X64-LABEL: test_adds_epi16_rmk_128:
8585 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8586 ; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f]
8587 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8588 ; X64-NEXT: retq # encoding: [0xc3]
8589 %b = load <8 x i16>, ptr %ptr_b
8590 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8591 %2 = bitcast i8 %mask to <8 x i1>
8592 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8596 define <8 x i16> @test_adds_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
8597 ; X86-LABEL: test_adds_epi16_rmkz_128:
8599 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8600 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8601 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8602 ; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00]
8603 ; X86-NEXT: retl # encoding: [0xc3]
8605 ; X64-LABEL: test_adds_epi16_rmkz_128:
8607 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8608 ; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07]
8609 ; X64-NEXT: retq # encoding: [0xc3]
8610 %b = load <8 x i16>, ptr %ptr_b
8611 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8612 %2 = bitcast i8 %mask to <8 x i1>
8613 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8617 declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) #0
8619 define <16 x i16> @test_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
8620 ; X86-LABEL: test_adds_epi16_rrk_256:
8622 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8623 ; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
8624 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8625 ; X86-NEXT: retl # encoding: [0xc3]
8627 ; X64-LABEL: test_adds_epi16_rrk_256:
8629 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8630 ; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
8631 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8632 ; X64-NEXT: retq # encoding: [0xc3]
8633 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8634 %2 = bitcast i16 %mask to <16 x i1>
8635 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8639 define <16 x i16> @test_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
8640 ; X86-LABEL: test_adds_epi16_rrkz_256:
8642 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8643 ; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
8644 ; X86-NEXT: retl # encoding: [0xc3]
8646 ; X64-LABEL: test_adds_epi16_rrkz_256:
8648 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8649 ; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
8650 ; X64-NEXT: retq # encoding: [0xc3]
8651 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8652 %2 = bitcast i16 %mask to <16 x i1>
8653 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8657 define <16 x i16> @test_adds_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
8658 ; X86-LABEL: test_adds_epi16_rmk_256:
8660 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8661 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8662 ; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08]
8663 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8664 ; X86-NEXT: retl # encoding: [0xc3]
8666 ; X64-LABEL: test_adds_epi16_rmk_256:
8668 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8669 ; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f]
8670 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8671 ; X64-NEXT: retq # encoding: [0xc3]
8672 %b = load <16 x i16>, ptr %ptr_b
8673 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8674 %2 = bitcast i16 %mask to <16 x i1>
8675 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8679 define <16 x i16> @test_adds_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
8680 ; X86-LABEL: test_adds_epi16_rmkz_256:
8682 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8683 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8684 ; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00]
8685 ; X86-NEXT: retl # encoding: [0xc3]
8687 ; X64-LABEL: test_adds_epi16_rmkz_256:
8689 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8690 ; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07]
8691 ; X64-NEXT: retq # encoding: [0xc3]
8692 %b = load <16 x i16>, ptr %ptr_b
8693 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8694 %2 = bitcast i16 %mask to <16 x i1>
8695 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8699 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) #0
8701 define <16 x i8> @test_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
8702 ; X86-LABEL: test_adds_epi8_rrk_128:
8704 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8705 ; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8706 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8707 ; X86-NEXT: retl # encoding: [0xc3]
8709 ; X64-LABEL: test_adds_epi8_rrk_128:
8711 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8712 ; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8713 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8714 ; X64-NEXT: retq # encoding: [0xc3]
8715 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8716 %2 = bitcast i16 %mask to <16 x i1>
8717 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8721 define <16 x i8> @test_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
8722 ; X86-LABEL: test_adds_epi8_rrkz_128:
8724 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8725 ; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8726 ; X86-NEXT: retl # encoding: [0xc3]
8728 ; X64-LABEL: test_adds_epi8_rrkz_128:
8730 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8731 ; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8732 ; X64-NEXT: retq # encoding: [0xc3]
8733 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8734 %2 = bitcast i16 %mask to <16 x i1>
8735 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8739 define <16 x i8> @test_adds_epi8_rm_128(<16 x i8> %a, ptr %ptr_b) {
8740 ; X86-LABEL: test_adds_epi8_rm_128:
8742 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8743 ; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00]
8744 ; X86-NEXT: retl # encoding: [0xc3]
8746 ; X64-LABEL: test_adds_epi8_rm_128:
8748 ; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07]
8749 ; X64-NEXT: retq # encoding: [0xc3]
8750 %b = load <16 x i8>, ptr %ptr_b
8751 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8755 define <16 x i8> @test_adds_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
8756 ; X86-LABEL: test_adds_epi8_rmk_128:
8758 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8759 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8760 ; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08]
8761 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8762 ; X86-NEXT: retl # encoding: [0xc3]
8764 ; X64-LABEL: test_adds_epi8_rmk_128:
8766 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8767 ; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f]
8768 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8769 ; X64-NEXT: retq # encoding: [0xc3]
8770 %b = load <16 x i8>, ptr %ptr_b
8771 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8772 %2 = bitcast i16 %mask to <16 x i1>
8773 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8777 define <16 x i8> @test_adds_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
8778 ; X86-LABEL: test_adds_epi8_rmkz_128:
8780 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8781 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8782 ; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00]
8783 ; X86-NEXT: retl # encoding: [0xc3]
8785 ; X64-LABEL: test_adds_epi8_rmkz_128:
8787 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8788 ; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07]
8789 ; X64-NEXT: retq # encoding: [0xc3]
8790 %b = load <16 x i8>, ptr %ptr_b
8791 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8792 %2 = bitcast i16 %mask to <16 x i1>
8793 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8797 declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) #0
8799 define <32 x i8> @test_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
8800 ; X86-LABEL: test_adds_epi8_rrk_256:
8802 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8803 ; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8804 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8805 ; X86-NEXT: retl # encoding: [0xc3]
8807 ; X64-LABEL: test_adds_epi8_rrk_256:
8809 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8810 ; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8811 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8812 ; X64-NEXT: retq # encoding: [0xc3]
8813 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8814 %2 = bitcast i32 %mask to <32 x i1>
8815 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8819 define <32 x i8> @test_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
8820 ; X86-LABEL: test_adds_epi8_rrkz_256:
8822 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8823 ; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
8824 ; X86-NEXT: retl # encoding: [0xc3]
8826 ; X64-LABEL: test_adds_epi8_rrkz_256:
8828 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8829 ; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
8830 ; X64-NEXT: retq # encoding: [0xc3]
8831 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8832 %2 = bitcast i32 %mask to <32 x i1>
8833 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8837 define <32 x i8> @test_adds_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
8838 ; X86-LABEL: test_adds_epi8_rmk_256:
8840 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8841 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8842 ; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08]
8843 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8844 ; X86-NEXT: retl # encoding: [0xc3]
8846 ; X64-LABEL: test_adds_epi8_rmk_256:
8848 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8849 ; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f]
8850 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8851 ; X64-NEXT: retq # encoding: [0xc3]
8852 %b = load <32 x i8>, ptr %ptr_b
8853 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8854 %2 = bitcast i32 %mask to <32 x i1>
8855 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8859 define <32 x i8> @test_adds_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
8860 ; X86-LABEL: test_adds_epi8_rmkz_256:
8862 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8863 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8864 ; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00]
8865 ; X86-NEXT: retl # encoding: [0xc3]
8867 ; X64-LABEL: test_adds_epi8_rmkz_256:
8869 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8870 ; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07]
8871 ; X64-NEXT: retq # encoding: [0xc3]
8872 %b = load <32 x i8>, ptr %ptr_b
8873 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8874 %2 = bitcast i32 %mask to <32 x i1>
8875 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8879 define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
8880 ; CHECK-LABEL: test_mask_adds_epi8_rr_128:
8882 ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
8883 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8884 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
8888 define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
8889 ; X86-LABEL: test_mask_adds_epi8_rrk_128:
8891 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8892 ; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8893 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8894 ; X86-NEXT: retl # encoding: [0xc3]
8896 ; X64-LABEL: test_mask_adds_epi8_rrk_128:
8898 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8899 ; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8900 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8901 ; X64-NEXT: retq # encoding: [0xc3]
8902 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
8906 define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
8907 ; X86-LABEL: test_mask_adds_epi8_rrkz_128:
8909 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8910 ; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8911 ; X86-NEXT: retl # encoding: [0xc3]
8913 ; X64-LABEL: test_mask_adds_epi8_rrkz_128:
8915 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8916 ; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8917 ; X64-NEXT: retq # encoding: [0xc3]
8918 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
8922 define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, ptr %ptr_b) {
8923 ; X86-LABEL: test_mask_adds_epi8_rm_128:
8925 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8926 ; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00]
8927 ; X86-NEXT: retl # encoding: [0xc3]
8929 ; X64-LABEL: test_mask_adds_epi8_rm_128:
8931 ; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07]
8932 ; X64-NEXT: retq # encoding: [0xc3]
8933 %b = load <16 x i8>, ptr %ptr_b
8934 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
8938 define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
8939 ; X86-LABEL: test_mask_adds_epi8_rmk_128:
8941 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8942 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8943 ; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08]
8944 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8945 ; X86-NEXT: retl # encoding: [0xc3]
8947 ; X64-LABEL: test_mask_adds_epi8_rmk_128:
8949 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8950 ; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f]
8951 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8952 ; X64-NEXT: retq # encoding: [0xc3]
8953 %b = load <16 x i8>, ptr %ptr_b
8954 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
8958 define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
8959 ; X86-LABEL: test_mask_adds_epi8_rmkz_128:
8961 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8962 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8963 ; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00]
8964 ; X86-NEXT: retl # encoding: [0xc3]
8966 ; X64-LABEL: test_mask_adds_epi8_rmkz_128:
8968 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8969 ; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07]
8970 ; X64-NEXT: retq # encoding: [0xc3]
8971 %b = load <16 x i8>, ptr %ptr_b
8972 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
8976 declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
8978 define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
8979 ; CHECK-LABEL: test_mask_adds_epi8_rr_256:
8981 ; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]
8982 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8983 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
8987 define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
8988 ; X86-LABEL: test_mask_adds_epi8_rrk_256:
8990 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8991 ; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8992 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8993 ; X86-NEXT: retl # encoding: [0xc3]
8995 ; X64-LABEL: test_mask_adds_epi8_rrk_256:
8997 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8998 ; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8999 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9000 ; X64-NEXT: retq # encoding: [0xc3]
9001 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
9005 define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
9006 ; X86-LABEL: test_mask_adds_epi8_rrkz_256:
9008 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
9009 ; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
9010 ; X86-NEXT: retl # encoding: [0xc3]
9012 ; X64-LABEL: test_mask_adds_epi8_rrkz_256:
9014 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9015 ; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
9016 ; X64-NEXT: retq # encoding: [0xc3]
9017 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9021 define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, ptr %ptr_b) {
9022 ; X86-LABEL: test_mask_adds_epi8_rm_256:
9024 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9025 ; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x00]
9026 ; X86-NEXT: retl # encoding: [0xc3]
9028 ; X64-LABEL: test_mask_adds_epi8_rm_256:
9030 ; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x07]
9031 ; X64-NEXT: retq # encoding: [0xc3]
9032 %b = load <32 x i8>, ptr %ptr_b
9033 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
9037 define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
9038 ; X86-LABEL: test_mask_adds_epi8_rmk_256:
9040 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9041 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9042 ; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08]
9043 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9044 ; X86-NEXT: retl # encoding: [0xc3]
9046 ; X64-LABEL: test_mask_adds_epi8_rmk_256:
9048 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9049 ; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f]
9050 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9051 ; X64-NEXT: retq # encoding: [0xc3]
9052 %b = load <32 x i8>, ptr %ptr_b
9053 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
9057 define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
9058 ; X86-LABEL: test_mask_adds_epi8_rmkz_256:
9060 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9061 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9062 ; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00]
9063 ; X86-NEXT: retl # encoding: [0xc3]
9065 ; X64-LABEL: test_mask_adds_epi8_rmkz_256:
9067 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9068 ; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07]
9069 ; X64-NEXT: retq # encoding: [0xc3]
9070 %b = load <32 x i8>, ptr %ptr_b
9071 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9075 declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
9077 define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
9078 ; CHECK-LABEL: test_mask_subs_epi8_rr_128:
9080 ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
9081 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9082 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
9086 define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
9087 ; X86-LABEL: test_mask_subs_epi8_rrk_128:
9089 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9090 ; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
9091 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9092 ; X86-NEXT: retl # encoding: [0xc3]
9094 ; X64-LABEL: test_mask_subs_epi8_rrk_128:
9096 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9097 ; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
9098 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9099 ; X64-NEXT: retq # encoding: [0xc3]
9100 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
9104 define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
9105 ; X86-LABEL: test_mask_subs_epi8_rrkz_128:
9107 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9108 ; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
9109 ; X86-NEXT: retl # encoding: [0xc3]
9111 ; X64-LABEL: test_mask_subs_epi8_rrkz_128:
9113 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9114 ; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
9115 ; X64-NEXT: retq # encoding: [0xc3]
9116 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
9120 define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, ptr %ptr_b) {
9121 ; X86-LABEL: test_mask_subs_epi8_rm_128:
9123 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9124 ; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x00]
9125 ; X86-NEXT: retl # encoding: [0xc3]
9127 ; X64-LABEL: test_mask_subs_epi8_rm_128:
9129 ; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x07]
9130 ; X64-NEXT: retq # encoding: [0xc3]
9131 %b = load <16 x i8>, ptr %ptr_b
9132 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
9136 define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
9137 ; X86-LABEL: test_mask_subs_epi8_rmk_128:
9139 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9140 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9141 ; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08]
9142 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9143 ; X86-NEXT: retl # encoding: [0xc3]
9145 ; X64-LABEL: test_mask_subs_epi8_rmk_128:
9147 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9148 ; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f]
9149 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9150 ; X64-NEXT: retq # encoding: [0xc3]
9151 %b = load <16 x i8>, ptr %ptr_b
9152 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
9156 define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
9157 ; X86-LABEL: test_mask_subs_epi8_rmkz_128:
9159 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9160 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9161 ; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00]
9162 ; X86-NEXT: retl # encoding: [0xc3]
9164 ; X64-LABEL: test_mask_subs_epi8_rmkz_128:
9166 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9167 ; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07]
9168 ; X64-NEXT: retq # encoding: [0xc3]
9169 %b = load <16 x i8>, ptr %ptr_b
9170 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
9174 declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
9176 define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
9177 ; CHECK-LABEL: test_mask_subs_epi8_rr_256:
9179 ; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]
9180 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9181 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
9185 define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
9186 ; X86-LABEL: test_mask_subs_epi8_rrk_256:
9188 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
9189 ; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
9190 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9191 ; X86-NEXT: retl # encoding: [0xc3]
9193 ; X64-LABEL: test_mask_subs_epi8_rrk_256:
9195 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9196 ; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
9197 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9198 ; X64-NEXT: retq # encoding: [0xc3]
9199 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
9203 define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
9204 ; X86-LABEL: test_mask_subs_epi8_rrkz_256:
9206 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
9207 ; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
9208 ; X86-NEXT: retl # encoding: [0xc3]
9210 ; X64-LABEL: test_mask_subs_epi8_rrkz_256:
9212 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9213 ; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
9214 ; X64-NEXT: retq # encoding: [0xc3]
9215 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9219 define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, ptr %ptr_b) {
9220 ; X86-LABEL: test_mask_subs_epi8_rm_256:
9222 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9223 ; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x00]
9224 ; X86-NEXT: retl # encoding: [0xc3]
9226 ; X64-LABEL: test_mask_subs_epi8_rm_256:
9228 ; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x07]
9229 ; X64-NEXT: retq # encoding: [0xc3]
9230 %b = load <32 x i8>, ptr %ptr_b
9231 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
9235 define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
9236 ; X86-LABEL: test_mask_subs_epi8_rmk_256:
9238 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9239 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9240 ; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08]
9241 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9242 ; X86-NEXT: retl # encoding: [0xc3]
9244 ; X64-LABEL: test_mask_subs_epi8_rmk_256:
9246 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9247 ; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f]
9248 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9249 ; X64-NEXT: retq # encoding: [0xc3]
9250 %b = load <32 x i8>, ptr %ptr_b
9251 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
9255 define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
9256 ; X86-LABEL: test_mask_subs_epi8_rmkz_256:
9258 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9259 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9260 ; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00]
9261 ; X86-NEXT: retl # encoding: [0xc3]
9263 ; X64-LABEL: test_mask_subs_epi8_rmkz_256:
9265 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9266 ; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07]
9267 ; X64-NEXT: retq # encoding: [0xc3]
9268 %b = load <32 x i8>, ptr %ptr_b
9269 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9273 declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
9275 declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
9277 define <16 x i16>@test_int_x86_avx512_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
9278 ; CHECK-LABEL: test_int_x86_avx512_psrav16_hi:
9280 ; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1]
9281 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9282 %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
9286 define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
9287 ; X86-LABEL: test_int_x86_avx512_mask_psrav16_hi:
9289 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9290 ; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
9291 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9292 ; X86-NEXT: retl # encoding: [0xc3]
9294 ; X64-LABEL: test_int_x86_avx512_mask_psrav16_hi:
9296 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9297 ; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
9298 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9299 ; X64-NEXT: retq # encoding: [0xc3]
9300 %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
9304 define <16 x i16>@test_int_x86_avx512_maskz_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
9305 ; X86-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
9307 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9308 ; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
9309 ; X86-NEXT: retl # encoding: [0xc3]
9311 ; X64-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
9313 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9314 ; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
9315 ; X64-NEXT: retq # encoding: [0xc3]
9316 %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
9320 declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
9322 define <8 x i16>@test_int_x86_avx512_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
9323 ; CHECK-LABEL: test_int_x86_avx512_psrav8_hi:
9325 ; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1]
9326 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9327 %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
9331 define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
9332 ; X86-LABEL: test_int_x86_avx512_mask_psrav8_hi:
9334 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9335 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9336 ; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
9337 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9338 ; X86-NEXT: retl # encoding: [0xc3]
9340 ; X64-LABEL: test_int_x86_avx512_mask_psrav8_hi:
9342 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9343 ; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
9344 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9345 ; X64-NEXT: retq # encoding: [0xc3]
9346 %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
9350 define <8 x i16>@test_int_x86_avx512_maskz_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
9351 ; X86-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
9353 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9354 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9355 ; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
9356 ; X86-NEXT: retl # encoding: [0xc3]
9358 ; X64-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
9360 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9361 ; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
9362 ; X64-NEXT: retq # encoding: [0xc3]
9363 %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
9367 declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
9369 define <16 x i16>@test_int_x86_avx512_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
9370 ; CHECK-LABEL: test_int_x86_avx512_psllv16_hi:
9372 ; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1]
9373 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9374 %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
9378 define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
9379 ; X86-LABEL: test_int_x86_avx512_mask_psllv16_hi:
9381 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9382 ; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
9383 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9384 ; X86-NEXT: retl # encoding: [0xc3]
9386 ; X64-LABEL: test_int_x86_avx512_mask_psllv16_hi:
9388 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9389 ; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
9390 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9391 ; X64-NEXT: retq # encoding: [0xc3]
9392 %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
9396 define <16 x i16>@test_int_x86_avx512_maskz_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
9397 ; X86-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
9399 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9400 ; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
9401 ; X86-NEXT: retl # encoding: [0xc3]
9403 ; X64-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
9405 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9406 ; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
9407 ; X64-NEXT: retq # encoding: [0xc3]
9408 %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
9412 declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
9414 define <8 x i16>@test_int_x86_avx512_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
9415 ; CHECK-LABEL: test_int_x86_avx512_psllv8_hi:
9417 ; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1]
9418 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9419 %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
9423 define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
9424 ; X86-LABEL: test_int_x86_avx512_mask_psllv8_hi:
9426 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9427 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9428 ; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
9429 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9430 ; X86-NEXT: retl # encoding: [0xc3]
9432 ; X64-LABEL: test_int_x86_avx512_mask_psllv8_hi:
9434 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9435 ; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
9436 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9437 ; X64-NEXT: retq # encoding: [0xc3]
9438 %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
9442 define <8 x i16>@test_int_x86_avx512_maskz_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
9443 ; X86-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
9445 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9446 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9447 ; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
9448 ; X86-NEXT: retl # encoding: [0xc3]
9450 ; X64-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
9452 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9453 ; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
9454 ; X64-NEXT: retq # encoding: [0xc3]
9455 %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
9459 declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
9461 define <16 x i16>@test_int_x86_avx512_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
9462 ; CHECK-LABEL: test_int_x86_avx512_psrlv16_hi:
9464 ; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1]
9465 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9466 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
9470 define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
9471 ; X86-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
9473 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9474 ; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
9475 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9476 ; X86-NEXT: retl # encoding: [0xc3]
9478 ; X64-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
9480 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9481 ; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
9482 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9483 ; X64-NEXT: retq # encoding: [0xc3]
9484 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
9488 define <16 x i16>@test_int_x86_avx512_maskz_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
9489 ; X86-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
9491 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9492 ; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
9493 ; X86-NEXT: retl # encoding: [0xc3]
9495 ; X64-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
9497 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9498 ; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
9499 ; X64-NEXT: retq # encoding: [0xc3]
9500 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
9504 declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
9506 define <8 x i16>@test_int_x86_avx512_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
9507 ; CHECK-LABEL: test_int_x86_avx512_psrlv8_hi:
9509 ; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1]
9510 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9511 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
9515 define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
9516 ; X86-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
9518 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9519 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9520 ; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
9521 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9522 ; X86-NEXT: retl # encoding: [0xc3]
9524 ; X64-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
9526 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9527 ; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
9528 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9529 ; X64-NEXT: retq # encoding: [0xc3]
9530 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
9534 define <8 x i16>@test_int_x86_avx512_maskz_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
9535 ; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
9537 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9538 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9539 ; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
9540 ; X86-NEXT: retl # encoding: [0xc3]
9542 ; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
9544 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9545 ; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
9546 ; X64-NEXT: retq # encoding: [0xc3]
9547 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
9551 declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
9553 define <16 x i8>@test_int_x86_avx512_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1) {
9554 ; CHECK-LABEL: test_int_x86_avx512_pmov_wb_256:
9556 ; CHECK-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0]
9557 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9558 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9559 %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
9563 define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
9564 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
9566 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9567 ; X86-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
9568 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9569 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9570 ; X86-NEXT: retl # encoding: [0xc3]
9572 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
9574 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9575 ; X64-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
9576 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9577 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9578 ; X64-NEXT: retq # encoding: [0xc3]
9579 %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
9583 define <16 x i8>@test_int_x86_avx512_maskz_pmov_wb_256(<16 x i16> %x0, i16 %x2) {
9584 ; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
9586 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9587 ; X86-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
9588 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9589 ; X86-NEXT: retl # encoding: [0xc3]
9591 ; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
9593 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9594 ; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
9595 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9596 ; X64-NEXT: retq # encoding: [0xc3]
9597 %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)