1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
4 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c
11 define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
12 ; CHECK-LABEL: test_mask_adds_epi16_rr_128:
14 ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
15 ; CHECK-NEXT: retq ## encoding: [0xc3]
16 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
19 declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
21 define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
22 ; CHECK-LABEL: test_mask_adds_epi16_rrk_128:
24 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
25 ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
26 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
27 ; CHECK-NEXT: retq ## encoding: [0xc3]
28 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
29 %2 = bitcast i8 %mask to <8 x i1>
30 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
34 define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
35 ; CHECK-LABEL: test_mask_adds_epi16_rrkz_128:
37 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
38 ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
39 ; CHECK-NEXT: retq ## encoding: [0xc3]
40 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
41 %2 = bitcast i8 %mask to <8 x i1>
42 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
46 define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
47 ; CHECK-LABEL: test_mask_adds_epi16_rm_128:
49 ; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x07]
50 ; CHECK-NEXT: retq ## encoding: [0xc3]
51 %b = load <8 x i16>, <8 x i16>* %ptr_b
52 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
56 define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
57 ; CHECK-LABEL: test_mask_adds_epi16_rmk_128:
59 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
60 ; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f]
61 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
62 ; CHECK-NEXT: retq ## encoding: [0xc3]
63 %b = load <8 x i16>, <8 x i16>* %ptr_b
64 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
65 %2 = bitcast i8 %mask to <8 x i1>
66 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
70 define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
71 ; CHECK-LABEL: test_mask_adds_epi16_rmkz_128:
73 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
74 ; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07]
75 ; CHECK-NEXT: retq ## encoding: [0xc3]
76 %b = load <8 x i16>, <8 x i16>* %ptr_b
77 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
78 %2 = bitcast i8 %mask to <8 x i1>
79 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
83 define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
84 ; CHECK-LABEL: test_mask_adds_epi16_rr_256:
86 ; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]
87 ; CHECK-NEXT: retq ## encoding: [0xc3]
88 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
91 declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>)
93 define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
94 ; CHECK-LABEL: test_mask_adds_epi16_rrk_256:
96 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
97 ; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
98 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
99 ; CHECK-NEXT: retq ## encoding: [0xc3]
100 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
101 %2 = bitcast i16 %mask to <16 x i1>
102 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
106 define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
107 ; CHECK-LABEL: test_mask_adds_epi16_rrkz_256:
109 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
110 ; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
111 ; CHECK-NEXT: retq ## encoding: [0xc3]
112 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
113 %2 = bitcast i16 %mask to <16 x i1>
114 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
118 define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
119 ; CHECK-LABEL: test_mask_adds_epi16_rm_256:
121 ; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x07]
122 ; CHECK-NEXT: retq ## encoding: [0xc3]
123 %b = load <16 x i16>, <16 x i16>* %ptr_b
124 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
128 define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
129 ; CHECK-LABEL: test_mask_adds_epi16_rmk_256:
131 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
132 ; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f]
133 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
134 ; CHECK-NEXT: retq ## encoding: [0xc3]
135 %b = load <16 x i16>, <16 x i16>* %ptr_b
136 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
137 %2 = bitcast i16 %mask to <16 x i1>
138 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
142 define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
143 ; CHECK-LABEL: test_mask_adds_epi16_rmkz_256:
145 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
146 ; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07]
147 ; CHECK-NEXT: retq ## encoding: [0xc3]
148 %b = load <16 x i16>, <16 x i16>* %ptr_b
149 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
150 %2 = bitcast i16 %mask to <16 x i1>
151 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
155 define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
156 ; CHECK-LABEL: test_mask_subs_epi16_rr_128:
158 ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
159 ; CHECK-NEXT: retq ## encoding: [0xc3]
160 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
163 declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
165 define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
166 ; CHECK-LABEL: test_mask_subs_epi16_rrk_128:
168 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
169 ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
170 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
171 ; CHECK-NEXT: retq ## encoding: [0xc3]
172 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
173 %bc = bitcast i8 %mask to <8 x i1>
174 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
178 define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
179 ; CHECK-LABEL: test_mask_subs_epi16_rrkz_128:
181 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
182 ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
183 ; CHECK-NEXT: retq ## encoding: [0xc3]
184 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
185 %bc = bitcast i8 %mask to <8 x i1>
186 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
190 define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
191 ; CHECK-LABEL: test_mask_subs_epi16_rm_128:
193 ; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x07]
194 ; CHECK-NEXT: retq ## encoding: [0xc3]
195 %b = load <8 x i16>, <8 x i16>* %ptr_b
196 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
200 define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
201 ; CHECK-LABEL: test_mask_subs_epi16_rmk_128:
203 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
204 ; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f]
205 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
206 ; CHECK-NEXT: retq ## encoding: [0xc3]
207 %b = load <8 x i16>, <8 x i16>* %ptr_b
208 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
209 %bc = bitcast i8 %mask to <8 x i1>
210 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
214 define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
215 ; CHECK-LABEL: test_mask_subs_epi16_rmkz_128:
217 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
218 ; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07]
219 ; CHECK-NEXT: retq ## encoding: [0xc3]
220 %b = load <8 x i16>, <8 x i16>* %ptr_b
221 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
222 %bc = bitcast i8 %mask to <8 x i1>
223 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
227 define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
228 ; CHECK-LABEL: test_mask_subs_epi16_rr_256:
230 ; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]
231 ; CHECK-NEXT: retq ## encoding: [0xc3]
232 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
235 declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>)
237 define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
238 ; CHECK-LABEL: test_mask_subs_epi16_rrk_256:
240 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
241 ; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
242 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
243 ; CHECK-NEXT: retq ## encoding: [0xc3]
244 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
245 %bc = bitcast i16 %mask to <16 x i1>
246 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
250 define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
251 ; CHECK-LABEL: test_mask_subs_epi16_rrkz_256:
253 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
254 ; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
255 ; CHECK-NEXT: retq ## encoding: [0xc3]
256 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
257 %bc = bitcast i16 %mask to <16 x i1>
258 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
262 define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
263 ; CHECK-LABEL: test_mask_subs_epi16_rm_256:
265 ; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x07]
266 ; CHECK-NEXT: retq ## encoding: [0xc3]
267 %b = load <16 x i16>, <16 x i16>* %ptr_b
268 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
272 define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
273 ; CHECK-LABEL: test_mask_subs_epi16_rmk_256:
275 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
276 ; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f]
277 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
278 ; CHECK-NEXT: retq ## encoding: [0xc3]
279 %b = load <16 x i16>, <16 x i16>* %ptr_b
280 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
281 %bc = bitcast i16 %mask to <16 x i1>
282 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
286 define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
287 ; CHECK-LABEL: test_mask_subs_epi16_rmkz_256:
289 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
290 ; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07]
291 ; CHECK-NEXT: retq ## encoding: [0xc3]
292 %b = load <16 x i16>, <16 x i16>* %ptr_b
293 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
294 %bc = bitcast i16 %mask to <16 x i1>
295 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
299 define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
300 ; CHECK-LABEL: test_mask_adds_epi8_rr_128:
302 ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
303 ; CHECK-NEXT: retq ## encoding: [0xc3]
304 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
307 declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
309 define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
310 ; CHECK-LABEL: test_mask_adds_epi8_rrk_128:
312 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
313 ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
314 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
315 ; CHECK-NEXT: retq ## encoding: [0xc3]
316 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
317 %2 = bitcast i16 %mask to <16 x i1>
318 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
322 define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
323 ; CHECK-LABEL: test_mask_adds_epi8_rrkz_128:
325 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
326 ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
327 ; CHECK-NEXT: retq ## encoding: [0xc3]
328 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
329 %2 = bitcast i16 %mask to <16 x i1>
330 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
334 define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
335 ; CHECK-LABEL: test_mask_adds_epi8_rm_128:
337 ; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07]
338 ; CHECK-NEXT: retq ## encoding: [0xc3]
339 %b = load <16 x i8>, <16 x i8>* %ptr_b
340 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
344 define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
345 ; CHECK-LABEL: test_mask_adds_epi8_rmk_128:
347 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
348 ; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f]
349 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
350 ; CHECK-NEXT: retq ## encoding: [0xc3]
351 %b = load <16 x i8>, <16 x i8>* %ptr_b
352 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
353 %2 = bitcast i16 %mask to <16 x i1>
354 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
358 define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
359 ; CHECK-LABEL: test_mask_adds_epi8_rmkz_128:
361 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
362 ; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07]
363 ; CHECK-NEXT: retq ## encoding: [0xc3]
364 %b = load <16 x i8>, <16 x i8>* %ptr_b
365 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
366 %2 = bitcast i16 %mask to <16 x i1>
367 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
371 define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
372 ; CHECK-LABEL: test_mask_adds_epi8_rr_256:
374 ; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]
375 ; CHECK-NEXT: retq ## encoding: [0xc3]
376 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
379 declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>)
381 define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
382 ; CHECK-LABEL: test_mask_adds_epi8_rrk_256:
384 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
385 ; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
386 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
387 ; CHECK-NEXT: retq ## encoding: [0xc3]
388 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
389 %2 = bitcast i32 %mask to <32 x i1>
390 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
394 define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
395 ; CHECK-LABEL: test_mask_adds_epi8_rrkz_256:
397 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
398 ; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
399 ; CHECK-NEXT: retq ## encoding: [0xc3]
400 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
401 %2 = bitcast i32 %mask to <32 x i1>
402 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
406 define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
407 ; CHECK-LABEL: test_mask_adds_epi8_rm_256:
409 ; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x07]
410 ; CHECK-NEXT: retq ## encoding: [0xc3]
411 %b = load <32 x i8>, <32 x i8>* %ptr_b
412 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
416 define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
417 ; CHECK-LABEL: test_mask_adds_epi8_rmk_256:
419 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
420 ; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f]
421 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
422 ; CHECK-NEXT: retq ## encoding: [0xc3]
423 %b = load <32 x i8>, <32 x i8>* %ptr_b
424 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
425 %2 = bitcast i32 %mask to <32 x i1>
426 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
430 define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
431 ; CHECK-LABEL: test_mask_adds_epi8_rmkz_256:
433 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
434 ; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07]
435 ; CHECK-NEXT: retq ## encoding: [0xc3]
436 %b = load <32 x i8>, <32 x i8>* %ptr_b
437 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
438 %2 = bitcast i32 %mask to <32 x i1>
439 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
443 define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
444 ; CHECK-LABEL: test_mask_subs_epi8_rr_128:
446 ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
447 ; CHECK-NEXT: retq ## encoding: [0xc3]
448 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
451 declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
453 define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
454 ; CHECK-LABEL: test_mask_subs_epi8_rrk_128:
456 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
457 ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
458 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
459 ; CHECK-NEXT: retq ## encoding: [0xc3]
460 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
461 %bc = bitcast i16 %mask to <16 x i1>
462 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
466 define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
467 ; CHECK-LABEL: test_mask_subs_epi8_rrkz_128:
469 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
470 ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
471 ; CHECK-NEXT: retq ## encoding: [0xc3]
472 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
473 %bc = bitcast i16 %mask to <16 x i1>
474 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
478 define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
479 ; CHECK-LABEL: test_mask_subs_epi8_rm_128:
481 ; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x07]
482 ; CHECK-NEXT: retq ## encoding: [0xc3]
483 %b = load <16 x i8>, <16 x i8>* %ptr_b
484 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
488 define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
489 ; CHECK-LABEL: test_mask_subs_epi8_rmk_128:
491 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
492 ; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f]
493 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
494 ; CHECK-NEXT: retq ## encoding: [0xc3]
495 %b = load <16 x i8>, <16 x i8>* %ptr_b
496 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
497 %bc = bitcast i16 %mask to <16 x i1>
498 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
502 define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
503 ; CHECK-LABEL: test_mask_subs_epi8_rmkz_128:
505 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
506 ; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07]
507 ; CHECK-NEXT: retq ## encoding: [0xc3]
508 %b = load <16 x i8>, <16 x i8>* %ptr_b
509 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
510 %bc = bitcast i16 %mask to <16 x i1>
511 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
515 define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
516 ; CHECK-LABEL: test_mask_subs_epi8_rr_256:
518 ; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]
519 ; CHECK-NEXT: retq ## encoding: [0xc3]
520 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
523 declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>)
525 define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
526 ; CHECK-LABEL: test_mask_subs_epi8_rrk_256:
528 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
529 ; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
530 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
531 ; CHECK-NEXT: retq ## encoding: [0xc3]
532 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
533 %bc = bitcast i32 %mask to <32 x i1>
534 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
538 define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
539 ; CHECK-LABEL: test_mask_subs_epi8_rrkz_256:
541 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
542 ; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
543 ; CHECK-NEXT: retq ## encoding: [0xc3]
544 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
545 %bc = bitcast i32 %mask to <32 x i1>
546 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
550 define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
551 ; CHECK-LABEL: test_mask_subs_epi8_rm_256:
553 ; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x07]
554 ; CHECK-NEXT: retq ## encoding: [0xc3]
555 %b = load <32 x i8>, <32 x i8>* %ptr_b
556 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
560 define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
561 ; CHECK-LABEL: test_mask_subs_epi8_rmk_256:
563 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
564 ; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f]
565 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
566 ; CHECK-NEXT: retq ## encoding: [0xc3]
567 %b = load <32 x i8>, <32 x i8>* %ptr_b
568 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
569 %bc = bitcast i32 %mask to <32 x i1>
570 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
574 define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
575 ; CHECK-LABEL: test_mask_subs_epi8_rmkz_256:
577 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
578 ; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07]
579 ; CHECK-NEXT: retq ## encoding: [0xc3]
580 %b = load <32 x i8>, <32 x i8>* %ptr_b
581 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
582 %bc = bitcast i32 %mask to <32 x i1>
583 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
588 ; Unsigned Saturation
591 define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
592 ; CHECK-LABEL: test_mask_adds_epu16_rr_128:
594 ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
595 ; CHECK-NEXT: retq ## encoding: [0xc3]
596 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
599 declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
601 define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
602 ; CHECK-LABEL: test_mask_adds_epu16_rrk_128:
604 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
605 ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1]
606 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
607 ; CHECK-NEXT: retq ## encoding: [0xc3]
608 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
609 %2 = bitcast i8 %mask to <8 x i1>
610 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
614 define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
615 ; CHECK-LABEL: test_mask_adds_epu16_rrkz_128:
617 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
618 ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1]
619 ; CHECK-NEXT: retq ## encoding: [0xc3]
620 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
621 %2 = bitcast i8 %mask to <8 x i1>
622 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
626 define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
627 ; CHECK-LABEL: test_mask_adds_epu16_rm_128:
629 ; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x07]
630 ; CHECK-NEXT: retq ## encoding: [0xc3]
631 %b = load <8 x i16>, <8 x i16>* %ptr_b
632 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
636 define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
637 ; CHECK-LABEL: test_mask_adds_epu16_rmk_128:
639 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
640 ; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f]
641 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
642 ; CHECK-NEXT: retq ## encoding: [0xc3]
643 %b = load <8 x i16>, <8 x i16>* %ptr_b
644 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
645 %2 = bitcast i8 %mask to <8 x i1>
646 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
650 define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
651 ; CHECK-LABEL: test_mask_adds_epu16_rmkz_128:
653 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
654 ; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07]
655 ; CHECK-NEXT: retq ## encoding: [0xc3]
656 %b = load <8 x i16>, <8 x i16>* %ptr_b
657 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
658 %2 = bitcast i8 %mask to <8 x i1>
659 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
663 define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
664 ; CHECK-LABEL: test_mask_adds_epu16_rr_256:
666 ; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]
667 ; CHECK-NEXT: retq ## encoding: [0xc3]
668 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
671 declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>)
673 define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
674 ; CHECK-LABEL: test_mask_adds_epu16_rrk_256:
676 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
677 ; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1]
678 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
679 ; CHECK-NEXT: retq ## encoding: [0xc3]
680 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
681 %2 = bitcast i16 %mask to <16 x i1>
682 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
686 define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
687 ; CHECK-LABEL: test_mask_adds_epu16_rrkz_256:
689 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
690 ; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1]
691 ; CHECK-NEXT: retq ## encoding: [0xc3]
692 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
693 %2 = bitcast i16 %mask to <16 x i1>
694 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
698 define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
699 ; CHECK-LABEL: test_mask_adds_epu16_rm_256:
701 ; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x07]
702 ; CHECK-NEXT: retq ## encoding: [0xc3]
703 %b = load <16 x i16>, <16 x i16>* %ptr_b
704 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
708 define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
709 ; CHECK-LABEL: test_mask_adds_epu16_rmk_256:
711 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
712 ; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f]
713 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
714 ; CHECK-NEXT: retq ## encoding: [0xc3]
715 %b = load <16 x i16>, <16 x i16>* %ptr_b
716 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
717 %2 = bitcast i16 %mask to <16 x i1>
718 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
722 define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
723 ; CHECK-LABEL: test_mask_adds_epu16_rmkz_256:
725 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
726 ; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07]
727 ; CHECK-NEXT: retq ## encoding: [0xc3]
728 %b = load <16 x i16>, <16 x i16>* %ptr_b
729 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
730 %2 = bitcast i16 %mask to <16 x i1>
731 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
735 define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
736 ; CHECK-LABEL: test_mask_subs_epu16_rr_128:
738 ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
739 ; CHECK-NEXT: retq ## encoding: [0xc3]
740 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
743 declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
745 define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
746 ; CHECK-LABEL: test_mask_subs_epu16_rrk_128:
748 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
749 ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1]
750 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
751 ; CHECK-NEXT: retq ## encoding: [0xc3]
752 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
753 %bc = bitcast i8 %mask to <8 x i1>
754 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
758 define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
759 ; CHECK-LABEL: test_mask_subs_epu16_rrkz_128:
761 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
762 ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1]
763 ; CHECK-NEXT: retq ## encoding: [0xc3]
764 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
765 %bc = bitcast i8 %mask to <8 x i1>
766 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
770 define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
771 ; CHECK-LABEL: test_mask_subs_epu16_rm_128:
773 ; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x07]
774 ; CHECK-NEXT: retq ## encoding: [0xc3]
775 %b = load <8 x i16>, <8 x i16>* %ptr_b
776 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
780 define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
781 ; CHECK-LABEL: test_mask_subs_epu16_rmk_128:
783 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
784 ; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f]
785 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
786 ; CHECK-NEXT: retq ## encoding: [0xc3]
787 %b = load <8 x i16>, <8 x i16>* %ptr_b
788 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
789 %bc = bitcast i8 %mask to <8 x i1>
790 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
794 define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
795 ; CHECK-LABEL: test_mask_subs_epu16_rmkz_128:
797 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
798 ; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07]
799 ; CHECK-NEXT: retq ## encoding: [0xc3]
800 %b = load <8 x i16>, <8 x i16>* %ptr_b
801 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
802 %bc = bitcast i8 %mask to <8 x i1>
803 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
807 define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
808 ; CHECK-LABEL: test_mask_subs_epu16_rr_256:
810 ; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]
811 ; CHECK-NEXT: retq ## encoding: [0xc3]
812 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
815 declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>)
817 define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
818 ; CHECK-LABEL: test_mask_subs_epu16_rrk_256:
820 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
821 ; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1]
822 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
823 ; CHECK-NEXT: retq ## encoding: [0xc3]
824 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
825 %bc = bitcast i16 %mask to <16 x i1>
826 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
830 define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
831 ; CHECK-LABEL: test_mask_subs_epu16_rrkz_256:
833 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
834 ; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1]
835 ; CHECK-NEXT: retq ## encoding: [0xc3]
836 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
837 %bc = bitcast i16 %mask to <16 x i1>
838 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
842 define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
843 ; CHECK-LABEL: test_mask_subs_epu16_rm_256:
845 ; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x07]
846 ; CHECK-NEXT: retq ## encoding: [0xc3]
847 %b = load <16 x i16>, <16 x i16>* %ptr_b
848 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
852 define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
853 ; CHECK-LABEL: test_mask_subs_epu16_rmk_256:
855 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
856 ; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f]
857 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
858 ; CHECK-NEXT: retq ## encoding: [0xc3]
859 %b = load <16 x i16>, <16 x i16>* %ptr_b
860 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
861 %bc = bitcast i16 %mask to <16 x i1>
862 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
866 define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
867 ; CHECK-LABEL: test_mask_subs_epu16_rmkz_256:
869 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
870 ; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07]
871 ; CHECK-NEXT: retq ## encoding: [0xc3]
872 %b = load <16 x i16>, <16 x i16>* %ptr_b
873 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
874 %bc = bitcast i16 %mask to <16 x i1>
875 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
879 define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
880 ; CHECK-LABEL: test_mask_adds_epu8_rr_128:
882 ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
883 ; CHECK-NEXT: retq ## encoding: [0xc3]
884 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
887 declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
889 define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
890 ; CHECK-LABEL: test_mask_adds_epu8_rrk_128:
892 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
893 ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1]
894 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
895 ; CHECK-NEXT: retq ## encoding: [0xc3]
896 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
897 %2 = bitcast i16 %mask to <16 x i1>
898 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
902 define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
903 ; CHECK-LABEL: test_mask_adds_epu8_rrkz_128:
905 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
906 ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1]
907 ; CHECK-NEXT: retq ## encoding: [0xc3]
908 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
909 %2 = bitcast i16 %mask to <16 x i1>
910 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
914 define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
915 ; CHECK-LABEL: test_mask_adds_epu8_rm_128:
917 ; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x07]
918 ; CHECK-NEXT: retq ## encoding: [0xc3]
919 %b = load <16 x i8>, <16 x i8>* %ptr_b
920 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
924 define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
925 ; CHECK-LABEL: test_mask_adds_epu8_rmk_128:
927 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
928 ; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f]
929 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
930 ; CHECK-NEXT: retq ## encoding: [0xc3]
931 %b = load <16 x i8>, <16 x i8>* %ptr_b
932 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
933 %2 = bitcast i16 %mask to <16 x i1>
934 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
938 define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
939 ; CHECK-LABEL: test_mask_adds_epu8_rmkz_128:
941 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
942 ; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07]
943 ; CHECK-NEXT: retq ## encoding: [0xc3]
944 %b = load <16 x i8>, <16 x i8>* %ptr_b
945 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
946 %2 = bitcast i16 %mask to <16 x i1>
947 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
951 define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
952 ; CHECK-LABEL: test_mask_adds_epu8_rr_256:
954 ; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]
955 ; CHECK-NEXT: retq ## encoding: [0xc3]
956 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
959 declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>)
961 define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
962 ; CHECK-LABEL: test_mask_adds_epu8_rrk_256:
964 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
965 ; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1]
966 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
967 ; CHECK-NEXT: retq ## encoding: [0xc3]
968 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
969 %2 = bitcast i32 %mask to <32 x i1>
970 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
974 define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
975 ; CHECK-LABEL: test_mask_adds_epu8_rrkz_256:
977 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
978 ; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1]
979 ; CHECK-NEXT: retq ## encoding: [0xc3]
980 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
981 %2 = bitcast i32 %mask to <32 x i1>
982 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
986 define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
987 ; CHECK-LABEL: test_mask_adds_epu8_rm_256:
989 ; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x07]
990 ; CHECK-NEXT: retq ## encoding: [0xc3]
991 %b = load <32 x i8>, <32 x i8>* %ptr_b
992 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
996 define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
997 ; CHECK-LABEL: test_mask_adds_epu8_rmk_256:
999 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1000 ; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f]
1001 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1002 ; CHECK-NEXT: retq ## encoding: [0xc3]
1003 %b = load <32 x i8>, <32 x i8>* %ptr_b
1004 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1005 %2 = bitcast i32 %mask to <32 x i1>
1006 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
1010 define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
1011 ; CHECK-LABEL: test_mask_adds_epu8_rmkz_256:
1013 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1014 ; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07]
1015 ; CHECK-NEXT: retq ## encoding: [0xc3]
1016 %b = load <32 x i8>, <32 x i8>* %ptr_b
1017 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1018 %2 = bitcast i32 %mask to <32 x i1>
1019 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
1023 define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
1024 ; CHECK-LABEL: test_mask_subs_epu8_rr_128:
1026 ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
1027 ; CHECK-NEXT: retq ## encoding: [0xc3]
1028 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1031 declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
1033 define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
1034 ; CHECK-LABEL: test_mask_subs_epu8_rrk_128:
1036 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1037 ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1]
1038 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1039 ; CHECK-NEXT: retq ## encoding: [0xc3]
1040 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1041 %bc = bitcast i16 %mask to <16 x i1>
1042 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
1046 define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
1047 ; CHECK-LABEL: test_mask_subs_epu8_rrkz_128:
1049 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1050 ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1]
1051 ; CHECK-NEXT: retq ## encoding: [0xc3]
1052 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1053 %bc = bitcast i16 %mask to <16 x i1>
1054 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
1058 define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
1059 ; CHECK-LABEL: test_mask_subs_epu8_rm_128:
1061 ; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x07]
1062 ; CHECK-NEXT: retq ## encoding: [0xc3]
1063 %b = load <16 x i8>, <16 x i8>* %ptr_b
1064 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1068 define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
1069 ; CHECK-LABEL: test_mask_subs_epu8_rmk_128:
1071 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1072 ; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f]
1073 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1074 ; CHECK-NEXT: retq ## encoding: [0xc3]
1075 %b = load <16 x i8>, <16 x i8>* %ptr_b
1076 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1077 %bc = bitcast i16 %mask to <16 x i1>
1078 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
1082 define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
1083 ; CHECK-LABEL: test_mask_subs_epu8_rmkz_128:
1085 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1086 ; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07]
1087 ; CHECK-NEXT: retq ## encoding: [0xc3]
1088 %b = load <16 x i8>, <16 x i8>* %ptr_b
1089 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1090 %bc = bitcast i16 %mask to <16 x i1>
1091 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
1095 define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
1096 ; CHECK-LABEL: test_mask_subs_epu8_rr_256:
1098 ; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]
1099 ; CHECK-NEXT: retq ## encoding: [0xc3]
1100 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1103 declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>)
1105 define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
1106 ; CHECK-LABEL: test_mask_subs_epu8_rrk_256:
1108 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1109 ; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1]
1110 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1111 ; CHECK-NEXT: retq ## encoding: [0xc3]
1112 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1113 %bc = bitcast i32 %mask to <32 x i1>
1114 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
1118 define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
1119 ; CHECK-LABEL: test_mask_subs_epu8_rrkz_256:
1121 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1122 ; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1]
1123 ; CHECK-NEXT: retq ## encoding: [0xc3]
1124 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1125 %bc = bitcast i32 %mask to <32 x i1>
1126 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
1130 define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
1131 ; CHECK-LABEL: test_mask_subs_epu8_rm_256:
1133 ; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x07]
1134 ; CHECK-NEXT: retq ## encoding: [0xc3]
1135 %b = load <32 x i8>, <32 x i8>* %ptr_b
1136 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1140 define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
1141 ; CHECK-LABEL: test_mask_subs_epu8_rmk_256:
1143 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1144 ; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f]
1145 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1146 ; CHECK-NEXT: retq ## encoding: [0xc3]
1147 %b = load <32 x i8>, <32 x i8>* %ptr_b
1148 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1149 %bc = bitcast i32 %mask to <32 x i1>
1150 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
1154 define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
1155 ; CHECK-LABEL: test_mask_subs_epu8_rmkz_256:
1157 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1158 ; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07]
1159 ; CHECK-NEXT: retq ## encoding: [0xc3]
1160 %b = load <32 x i8>, <32 x i8>* %ptr_b
1161 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1162 %bc = bitcast i32 %mask to <32 x i1>
1163 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer