1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ANY,SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE42
5 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ANY,AVX,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=ANY,AVX,AVX512
8 ; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not.
9 ; Test each of those patterns with i8/i16/i32/i64.
10 ; Test each of those with a constant operand and a variable operand.
11 ; Test each of those with a 128-bit vector type.
13 define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
14 ; ANY-LABEL: unsigned_sat_constant_i8_using_min:
16 ; ANY-NEXT: cmpb $-43, %dil
17 ; ANY-NEXT: movl $213, %eax
18 ; ANY-NEXT: cmovbl %edi, %eax
19 ; ANY-NEXT: addb $42, %al
20 ; ANY-NEXT: # kill: def $al killed $al killed $eax
22 %c = icmp ult i8 %x, -43
23 %s = select i1 %c, i8 %x, i8 -43
28 define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
29 ; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
31 ; ANY-NEXT: addb $42, %dil
32 ; ANY-NEXT: movzbl %dil, %ecx
33 ; ANY-NEXT: movl $255, %eax
34 ; ANY-NEXT: cmovael %ecx, %eax
35 ; ANY-NEXT: # kill: def $al killed $al killed $eax
38 %c = icmp ugt i8 %x, %a
39 %r = select i1 %c, i8 -1, i8 %a
43 define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
44 ; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval:
46 ; ANY-NEXT: addb $42, %dil
47 ; ANY-NEXT: movzbl %dil, %ecx
48 ; ANY-NEXT: movl $255, %eax
49 ; ANY-NEXT: cmovael %ecx, %eax
50 ; ANY-NEXT: # kill: def $al killed $al killed $eax
53 %c = icmp ugt i8 %x, -43
54 %r = select i1 %c, i8 -1, i8 %a
58 define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
59 ; ANY-LABEL: unsigned_sat_constant_i16_using_min:
61 ; ANY-NEXT: cmpw $-43, %di
62 ; ANY-NEXT: movl $65493, %eax # imm = 0xFFD5
63 ; ANY-NEXT: cmovbl %edi, %eax
64 ; ANY-NEXT: addl $42, %eax
65 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
67 %c = icmp ult i16 %x, -43
68 %s = select i1 %c, i16 %x, i16 -43
73 define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
74 ; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
76 ; ANY-NEXT: addw $42, %di
77 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
78 ; ANY-NEXT: cmovael %edi, %eax
79 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
82 %c = icmp ugt i16 %x, %a
83 %r = select i1 %c, i16 -1, i16 %a
87 define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
88 ; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
90 ; ANY-NEXT: addw $42, %di
91 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
92 ; ANY-NEXT: cmovael %edi, %eax
93 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
96 %c = icmp ugt i16 %x, -43
97 %r = select i1 %c, i16 -1, i16 %a
101 define i32 @unsigned_sat_constant_i32_using_min(i32 %x) {
102 ; ANY-LABEL: unsigned_sat_constant_i32_using_min:
104 ; ANY-NEXT: cmpl $-43, %edi
105 ; ANY-NEXT: movl $-43, %eax
106 ; ANY-NEXT: cmovbl %edi, %eax
107 ; ANY-NEXT: addl $42, %eax
109 %c = icmp ult i32 %x, -43
110 %s = select i1 %c, i32 %x, i32 -43
115 define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
116 ; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
118 ; ANY-NEXT: addl $42, %edi
119 ; ANY-NEXT: movl $-1, %eax
120 ; ANY-NEXT: cmovael %edi, %eax
123 %c = icmp ugt i32 %x, %a
124 %r = select i1 %c, i32 -1, i32 %a
128 define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
129 ; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_notval:
131 ; ANY-NEXT: addl $42, %edi
132 ; ANY-NEXT: movl $-1, %eax
133 ; ANY-NEXT: cmovael %edi, %eax
136 %c = icmp ugt i32 %x, -43
137 %r = select i1 %c, i32 -1, i32 %a
141 define i64 @unsigned_sat_constant_i64_using_min(i64 %x) {
142 ; ANY-LABEL: unsigned_sat_constant_i64_using_min:
144 ; ANY-NEXT: cmpq $-43, %rdi
145 ; ANY-NEXT: movq $-43, %rax
146 ; ANY-NEXT: cmovbq %rdi, %rax
147 ; ANY-NEXT: addq $42, %rax
149 %c = icmp ult i64 %x, -43
150 %s = select i1 %c, i64 %x, i64 -43
155 define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
156 ; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_sum:
158 ; ANY-NEXT: addq $42, %rdi
159 ; ANY-NEXT: movq $-1, %rax
160 ; ANY-NEXT: cmovaeq %rdi, %rax
163 %c = icmp ugt i64 %x, %a
164 %r = select i1 %c, i64 -1, i64 %a
168 define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
169 ; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
171 ; ANY-NEXT: addq $42, %rdi
172 ; ANY-NEXT: movq $-1, %rax
173 ; ANY-NEXT: cmovaeq %rdi, %rax
176 %c = icmp ugt i64 %x, -43
177 %r = select i1 %c, i64 -1, i64 %a
181 define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
182 ; ANY-LABEL: unsigned_sat_variable_i8_using_min:
184 ; ANY-NEXT: movl %esi, %eax
186 ; ANY-NEXT: cmpb %al, %dil
187 ; ANY-NEXT: movzbl %al, %eax
188 ; ANY-NEXT: cmovbl %edi, %eax
189 ; ANY-NEXT: addb %sil, %al
190 ; ANY-NEXT: # kill: def $al killed $al killed $eax
192 %noty = xor i8 %y, -1
193 %c = icmp ult i8 %x, %noty
194 %s = select i1 %c, i8 %x, i8 %noty
199 define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
200 ; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
202 ; ANY-NEXT: addb %sil, %dil
203 ; ANY-NEXT: movzbl %dil, %ecx
204 ; ANY-NEXT: movl $255, %eax
205 ; ANY-NEXT: cmovael %ecx, %eax
206 ; ANY-NEXT: # kill: def $al killed $al killed $eax
209 %c = icmp ugt i8 %x, %a
210 %r = select i1 %c, i8 -1, i8 %a
214 define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
215 ; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
217 ; ANY-NEXT: addb %dil, %sil
218 ; ANY-NEXT: movzbl %sil, %ecx
219 ; ANY-NEXT: movl $255, %eax
220 ; ANY-NEXT: cmovael %ecx, %eax
221 ; ANY-NEXT: # kill: def $al killed $al killed $eax
223 %noty = xor i8 %y, -1
225 %c = icmp ugt i8 %x, %noty
226 %r = select i1 %c, i8 -1, i8 %a
230 define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
231 ; ANY-LABEL: unsigned_sat_variable_i16_using_min:
233 ; ANY-NEXT: movl %esi, %eax
234 ; ANY-NEXT: notl %eax
235 ; ANY-NEXT: cmpw %ax, %di
236 ; ANY-NEXT: cmovbl %edi, %eax
237 ; ANY-NEXT: addl %esi, %eax
238 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
240 %noty = xor i16 %y, -1
241 %c = icmp ult i16 %x, %noty
242 %s = select i1 %c, i16 %x, i16 %noty
247 define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
248 ; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
250 ; ANY-NEXT: addw %si, %di
251 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
252 ; ANY-NEXT: cmovael %edi, %eax
253 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
256 %c = icmp ugt i16 %x, %a
257 %r = select i1 %c, i16 -1, i16 %a
261 define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
262 ; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
264 ; ANY-NEXT: addw %di, %si
265 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
266 ; ANY-NEXT: cmovael %esi, %eax
267 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
269 %noty = xor i16 %y, -1
271 %c = icmp ugt i16 %x, %noty
272 %r = select i1 %c, i16 -1, i16 %a
276 define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) {
277 ; ANY-LABEL: unsigned_sat_variable_i32_using_min:
279 ; ANY-NEXT: movl %esi, %eax
280 ; ANY-NEXT: notl %eax
281 ; ANY-NEXT: cmpl %eax, %edi
282 ; ANY-NEXT: cmovbl %edi, %eax
283 ; ANY-NEXT: addl %esi, %eax
285 %noty = xor i32 %y, -1
286 %c = icmp ult i32 %x, %noty
287 %s = select i1 %c, i32 %x, i32 %noty
292 define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
293 ; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
295 ; ANY-NEXT: addl %esi, %edi
296 ; ANY-NEXT: movl $-1, %eax
297 ; ANY-NEXT: cmovael %edi, %eax
300 %c = icmp ugt i32 %x, %a
301 %r = select i1 %c, i32 -1, i32 %a
305 define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) {
306 ; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_notval:
308 ; ANY-NEXT: addl %esi, %edi
309 ; ANY-NEXT: movl $-1, %eax
310 ; ANY-NEXT: cmovael %edi, %eax
312 %noty = xor i32 %y, -1
314 %c = icmp ugt i32 %x, %noty
315 %r = select i1 %c, i32 -1, i32 %a
319 define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) {
320 ; ANY-LABEL: unsigned_sat_variable_i64_using_min:
322 ; ANY-NEXT: movq %rsi, %rax
323 ; ANY-NEXT: notq %rax
324 ; ANY-NEXT: cmpq %rax, %rdi
325 ; ANY-NEXT: cmovbq %rdi, %rax
326 ; ANY-NEXT: addq %rsi, %rax
328 %noty = xor i64 %y, -1
329 %c = icmp ult i64 %x, %noty
330 %s = select i1 %c, i64 %x, i64 %noty
335 define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
336 ; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_sum:
338 ; ANY-NEXT: addq %rsi, %rdi
339 ; ANY-NEXT: movq $-1, %rax
340 ; ANY-NEXT: cmovaeq %rdi, %rax
343 %c = icmp ugt i64 %x, %a
344 %r = select i1 %c, i64 -1, i64 %a
348 define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) {
349 ; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_notval:
351 ; ANY-NEXT: addq %rsi, %rdi
352 ; ANY-NEXT: movq $-1, %rax
353 ; ANY-NEXT: cmovaeq %rdi, %rax
355 %noty = xor i64 %y, -1
357 %c = icmp ugt i64 %x, %noty
358 %r = select i1 %c, i64 -1, i64 %a
362 define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) {
363 ; SSE-LABEL: unsigned_sat_constant_v16i8_using_min:
365 ; SSE-NEXT: pminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
366 ; SSE-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
369 ; AVX-LABEL: unsigned_sat_constant_v16i8_using_min:
371 ; AVX-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
372 ; AVX-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
374 %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
375 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
376 %r = add <16 x i8> %s, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
380 define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
381 ; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
383 ; SSE-NEXT: paddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
386 ; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
388 ; AVX-NEXT: vpaddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
390 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
391 %c = icmp ugt <16 x i8> %x, %a
392 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
396 define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) {
397 ; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
399 ; SSE-NEXT: paddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
402 ; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
404 ; AVX-NEXT: vpaddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
406 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
407 %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
408 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
412 define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) {
413 ; SSE2-LABEL: unsigned_sat_constant_v8i16_using_min:
415 ; SSE2-NEXT: movdqa %xmm0, %xmm1
416 ; SSE2-NEXT: psubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
417 ; SSE2-NEXT: psubw %xmm1, %xmm0
418 ; SSE2-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
421 ; SSE4-LABEL: unsigned_sat_constant_v8i16_using_min:
423 ; SSE4-NEXT: pminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
424 ; SSE4-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
427 ; AVX-LABEL: unsigned_sat_constant_v8i16_using_min:
429 ; AVX-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
430 ; AVX-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
432 %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
433 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
434 %r = add <8 x i16> %s, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
438 define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
439 ; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
441 ; SSE-NEXT: paddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
444 ; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
446 ; AVX-NEXT: vpaddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
448 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
449 %c = icmp ugt <8 x i16> %x, %a
450 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
454 define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) {
455 ; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
457 ; SSE-NEXT: paddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
460 ; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
462 ; AVX-NEXT: vpaddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
464 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
465 %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
466 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
470 define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) {
471 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_min:
473 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
474 ; SSE2-NEXT: pxor %xmm0, %xmm1
475 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
476 ; SSE2-NEXT: movdqa %xmm1, %xmm2
477 ; SSE2-NEXT: pandn %xmm0, %xmm2
478 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
479 ; SSE2-NEXT: por %xmm2, %xmm1
480 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
481 ; SSE2-NEXT: movdqa %xmm1, %xmm0
484 ; SSE4-LABEL: unsigned_sat_constant_v4i32_using_min:
486 ; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
487 ; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
490 ; AVX2-LABEL: unsigned_sat_constant_v4i32_using_min:
492 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967253,4294967253,4294967253,4294967253]
493 ; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
494 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
495 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
498 ; AVX512-LABEL: unsigned_sat_constant_v4i32_using_min:
500 ; AVX512-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
501 ; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
503 %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
504 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -43, i32 -43, i32 -43, i32 -43>
505 %r = add <4 x i32> %s, <i32 42, i32 42, i32 42, i32 42>
509 define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
510 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
512 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
513 ; SSE2-NEXT: paddd %xmm0, %xmm1
514 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
515 ; SSE2-NEXT: pxor %xmm2, %xmm0
516 ; SSE2-NEXT: pxor %xmm1, %xmm2
517 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
518 ; SSE2-NEXT: por %xmm1, %xmm0
521 ; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
523 ; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
524 ; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
527 ; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
529 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
530 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253]
531 ; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0
532 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
535 ; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
537 ; AVX512-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
538 ; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
540 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
541 %c = icmp ugt <4 x i32> %x, %a
542 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
546 define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) {
547 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
549 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
550 ; SSE2-NEXT: paddd %xmm0, %xmm1
551 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
552 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
553 ; SSE2-NEXT: por %xmm1, %xmm0
556 ; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
558 ; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
559 ; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
562 ; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
564 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
565 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253]
566 ; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0
567 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
570 ; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
572 ; AVX512-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
573 ; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
575 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
576 %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
577 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
581 define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat(<4 x i32> %x) {
582 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
584 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [43,44,45,46]
585 ; SSE2-NEXT: paddd %xmm0, %xmm1
586 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
587 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
588 ; SSE2-NEXT: por %xmm1, %xmm0
591 ; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
593 ; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
594 ; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
597 ; AVX-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
599 ; AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
600 ; AVX-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
602 %a = add <4 x i32> %x, <i32 43, i32 44, i32 45, i32 46>
603 %c = icmp ugt <4 x i32> %x, <i32 -44, i32 -45, i32 -46, i32 -47>
604 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
608 define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) {
609 ; SSE2-LABEL: unsigned_sat_constant_v2i64_using_min:
611 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
612 ; SSE2-NEXT: pxor %xmm0, %xmm1
613 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
614 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
615 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
616 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
617 ; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
618 ; SSE2-NEXT: pand %xmm3, %xmm1
619 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
620 ; SSE2-NEXT: por %xmm1, %xmm2
621 ; SSE2-NEXT: pand %xmm2, %xmm0
622 ; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
623 ; SSE2-NEXT: por %xmm2, %xmm0
624 ; SSE2-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
627 ; SSE41-LABEL: unsigned_sat_constant_v2i64_using_min:
629 ; SSE41-NEXT: movdqa %xmm0, %xmm1
630 ; SSE41-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573]
631 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
632 ; SSE41-NEXT: pxor %xmm1, %xmm0
633 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372034707292117,9223372034707292117]
634 ; SSE41-NEXT: movdqa %xmm3, %xmm4
635 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
636 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
637 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
638 ; SSE41-NEXT: pand %xmm4, %xmm0
639 ; SSE41-NEXT: por %xmm3, %xmm0
640 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
641 ; SSE41-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
642 ; SSE41-NEXT: movdqa %xmm2, %xmm0
645 ; SSE42-LABEL: unsigned_sat_constant_v2i64_using_min:
647 ; SSE42-NEXT: movdqa %xmm0, %xmm1
648 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
649 ; SSE42-NEXT: pxor %xmm1, %xmm0
650 ; SSE42-NEXT: pcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
651 ; SSE42-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
652 ; SSE42-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
653 ; SSE42-NEXT: movdqa %xmm1, %xmm0
656 ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_min:
658 ; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
659 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
660 ; AVX2-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
661 ; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
664 ; AVX512-LABEL: unsigned_sat_constant_v2i64_using_min:
666 ; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
667 ; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
669 %c = icmp ult <2 x i64> %x, <i64 -43, i64 -43>
670 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> <i64 -43, i64 -43>
671 %r = add <2 x i64> %s, <i64 42, i64 42>
675 define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
676 ; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
678 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
679 ; SSE2-NEXT: paddq %xmm0, %xmm1
680 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
681 ; SSE2-NEXT: pxor %xmm2, %xmm0
682 ; SSE2-NEXT: pxor %xmm1, %xmm2
683 ; SSE2-NEXT: movdqa %xmm0, %xmm3
684 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
685 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
686 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
687 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
688 ; SSE2-NEXT: pand %xmm4, %xmm2
689 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
690 ; SSE2-NEXT: por %xmm1, %xmm0
691 ; SSE2-NEXT: por %xmm2, %xmm0
694 ; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
696 ; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [42,42]
697 ; SSE41-NEXT: paddq %xmm0, %xmm1
698 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
699 ; SSE41-NEXT: pxor %xmm2, %xmm0
700 ; SSE41-NEXT: pxor %xmm1, %xmm2
701 ; SSE41-NEXT: movdqa %xmm0, %xmm3
702 ; SSE41-NEXT: pcmpgtd %xmm2, %xmm3
703 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
704 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
705 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
706 ; SSE41-NEXT: pand %xmm4, %xmm2
707 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
708 ; SSE41-NEXT: por %xmm1, %xmm0
709 ; SSE41-NEXT: por %xmm2, %xmm0
712 ; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
714 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
715 ; SSE42-NEXT: movdqa %xmm0, %xmm2
716 ; SSE42-NEXT: pxor %xmm1, %xmm2
717 ; SSE42-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
718 ; SSE42-NEXT: pxor %xmm0, %xmm1
719 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm2
720 ; SSE42-NEXT: por %xmm2, %xmm0
723 ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
725 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
726 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2
727 ; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
728 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
729 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
730 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
733 ; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
735 ; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
736 ; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
738 %a = add <2 x i64> %x, <i64 42, i64 42>
739 %c = icmp ugt <2 x i64> %x, %a
740 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
744 define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
745 ; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
747 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
748 ; SSE2-NEXT: paddq %xmm0, %xmm1
749 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
750 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
751 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
752 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
753 ; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
754 ; SSE2-NEXT: pand %xmm2, %xmm0
755 ; SSE2-NEXT: por %xmm1, %xmm0
758 ; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
760 ; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [42,42]
761 ; SSE41-NEXT: paddq %xmm0, %xmm1
762 ; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
763 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
764 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
765 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
766 ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
767 ; SSE41-NEXT: pand %xmm2, %xmm0
768 ; SSE41-NEXT: por %xmm1, %xmm0
771 ; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
773 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
774 ; SSE42-NEXT: movdqa %xmm0, %xmm2
775 ; SSE42-NEXT: pxor %xmm1, %xmm2
776 ; SSE42-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
777 ; SSE42-NEXT: pxor %xmm0, %xmm1
778 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm2
779 ; SSE42-NEXT: por %xmm2, %xmm0
782 ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
784 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
785 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2
786 ; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
787 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
788 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
789 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
792 ; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
794 ; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
795 ; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
797 %a = add <2 x i64> %x, <i64 42, i64 42>
798 %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43>
799 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
803 define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8> %y) {
804 ; SSE-LABEL: unsigned_sat_variable_v16i8_using_min:
806 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
807 ; SSE-NEXT: pxor %xmm1, %xmm2
808 ; SSE-NEXT: pminub %xmm2, %xmm0
809 ; SSE-NEXT: paddb %xmm1, %xmm0
812 ; AVX2-LABEL: unsigned_sat_variable_v16i8_using_min:
814 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
815 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
816 ; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
817 ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
820 ; AVX512-LABEL: unsigned_sat_variable_v16i8_using_min:
822 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
823 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
824 ; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
825 ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
827 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
828 %c = icmp ult <16 x i8> %x, %noty
829 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %noty
830 %r = add <16 x i8> %s, %y
834 define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) {
835 ; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
837 ; SSE-NEXT: paddusb %xmm1, %xmm0
840 ; AVX-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
842 ; AVX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
844 %a = add <16 x i8> %x, %y
845 %c = icmp ugt <16 x i8> %x, %a
846 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
850 define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16 x i8> %y) {
851 ; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
853 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
854 ; SSE-NEXT: movdqa %xmm0, %xmm3
855 ; SSE-NEXT: paddb %xmm1, %xmm3
856 ; SSE-NEXT: pxor %xmm2, %xmm1
857 ; SSE-NEXT: pminub %xmm0, %xmm1
858 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
859 ; SSE-NEXT: pxor %xmm2, %xmm0
860 ; SSE-NEXT: por %xmm3, %xmm0
863 ; AVX2-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
865 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
866 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
867 ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm1
868 ; AVX2-NEXT: vpminub %xmm3, %xmm0, %xmm3
869 ; AVX2-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
870 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
871 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
874 ; AVX512-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
876 ; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
877 ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm3
878 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
879 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm1
880 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
881 ; AVX512-NEXT: vpternlogq $222, %xmm2, %xmm3, %xmm0
883 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
884 %a = add <16 x i8> %x, %y
885 %c = icmp ugt <16 x i8> %x, %noty
886 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
890 define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16> %y) {
891 ; SSE2-LABEL: unsigned_sat_variable_v8i16_using_min:
893 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
894 ; SSE2-NEXT: pxor %xmm1, %xmm2
895 ; SSE2-NEXT: movdqa %xmm0, %xmm3
896 ; SSE2-NEXT: psubusw %xmm2, %xmm3
897 ; SSE2-NEXT: psubw %xmm3, %xmm0
898 ; SSE2-NEXT: paddw %xmm1, %xmm0
901 ; SSE4-LABEL: unsigned_sat_variable_v8i16_using_min:
903 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm2
904 ; SSE4-NEXT: pxor %xmm1, %xmm2
905 ; SSE4-NEXT: pminuw %xmm2, %xmm0
906 ; SSE4-NEXT: paddw %xmm1, %xmm0
909 ; AVX2-LABEL: unsigned_sat_variable_v8i16_using_min:
911 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
912 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
913 ; AVX2-NEXT: vpminuw %xmm2, %xmm0, %xmm0
914 ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
917 ; AVX512-LABEL: unsigned_sat_variable_v8i16_using_min:
919 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
920 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
921 ; AVX512-NEXT: vpminuw %xmm2, %xmm0, %xmm0
922 ; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
924 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
925 %c = icmp ult <8 x i16> %x, %noty
926 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %noty
927 %r = add <8 x i16> %s, %y
931 define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) {
932 ; SSE-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
934 ; SSE-NEXT: paddusw %xmm1, %xmm0
937 ; AVX-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
939 ; AVX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
941 %a = add <8 x i16> %x, %y
942 %c = icmp ugt <8 x i16> %x, %a
943 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
947 define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8 x i16> %y) {
948 ; SSE2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
950 ; SSE2-NEXT: movdqa %xmm0, %xmm2
951 ; SSE2-NEXT: paddw %xmm1, %xmm2
952 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
953 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
954 ; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
955 ; SSE2-NEXT: por %xmm2, %xmm0
958 ; SSE4-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
960 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm2
961 ; SSE4-NEXT: movdqa %xmm0, %xmm3
962 ; SSE4-NEXT: paddw %xmm1, %xmm3
963 ; SSE4-NEXT: pxor %xmm2, %xmm1
964 ; SSE4-NEXT: pminuw %xmm0, %xmm1
965 ; SSE4-NEXT: pcmpeqw %xmm1, %xmm0
966 ; SSE4-NEXT: pxor %xmm2, %xmm0
967 ; SSE4-NEXT: por %xmm3, %xmm0
970 ; AVX2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
972 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
973 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
974 ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm1
975 ; AVX2-NEXT: vpminuw %xmm3, %xmm0, %xmm3
976 ; AVX2-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
977 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
978 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
981 ; AVX512-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
983 ; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
984 ; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm3
985 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
986 ; AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm1
987 ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
988 ; AVX512-NEXT: vpternlogq $222, %xmm2, %xmm3, %xmm0
990 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
991 %a = add <8 x i16> %x, %y
992 %c = icmp ugt <8 x i16> %x, %noty
993 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
997 define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> %y) {
998 ; SSE2-LABEL: unsigned_sat_variable_v4i32_using_min:
1000 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1001 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
1002 ; SSE2-NEXT: pxor %xmm0, %xmm3
1003 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647,2147483647,2147483647]
1004 ; SSE2-NEXT: pxor %xmm1, %xmm4
1005 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1006 ; SSE2-NEXT: pand %xmm4, %xmm0
1007 ; SSE2-NEXT: por %xmm1, %xmm4
1008 ; SSE2-NEXT: pxor %xmm2, %xmm4
1009 ; SSE2-NEXT: por %xmm4, %xmm0
1010 ; SSE2-NEXT: paddd %xmm1, %xmm0
1013 ; SSE4-LABEL: unsigned_sat_variable_v4i32_using_min:
1015 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm2
1016 ; SSE4-NEXT: pxor %xmm1, %xmm2
1017 ; SSE4-NEXT: pminud %xmm2, %xmm0
1018 ; SSE4-NEXT: paddd %xmm1, %xmm0
1021 ; AVX2-LABEL: unsigned_sat_variable_v4i32_using_min:
1023 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1024 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
1025 ; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0
1026 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1029 ; AVX512-LABEL: unsigned_sat_variable_v4i32_using_min:
1031 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
1032 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
1033 ; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0
1034 ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1036 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1037 %c = icmp ult <4 x i32> %x, %noty
1038 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %noty
1039 %r = add <4 x i32> %s, %y
1043 define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) {
1044 ; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1046 ; SSE2-NEXT: paddd %xmm0, %xmm1
1047 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1048 ; SSE2-NEXT: pxor %xmm2, %xmm0
1049 ; SSE2-NEXT: pxor %xmm1, %xmm2
1050 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
1051 ; SSE2-NEXT: por %xmm1, %xmm0
1054 ; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1056 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm2
1057 ; SSE4-NEXT: pxor %xmm1, %xmm2
1058 ; SSE4-NEXT: pminud %xmm2, %xmm0
1059 ; SSE4-NEXT: paddd %xmm1, %xmm0
1062 ; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1064 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1065 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
1066 ; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0
1067 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1070 ; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1072 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
1073 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
1074 ; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0
1075 ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1077 %a = add <4 x i32> %x, %y
1078 %c = icmp ugt <4 x i32> %x, %a
1079 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
1083 define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 x i32> %y) {
1084 ; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1086 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1087 ; SSE2-NEXT: paddd %xmm1, %xmm2
1088 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1089 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1090 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
1091 ; SSE2-NEXT: por %xmm2, %xmm0
1094 ; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1096 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm2
1097 ; SSE4-NEXT: movdqa %xmm0, %xmm3
1098 ; SSE4-NEXT: paddd %xmm1, %xmm3
1099 ; SSE4-NEXT: pxor %xmm2, %xmm1
1100 ; SSE4-NEXT: pminud %xmm0, %xmm1
1101 ; SSE4-NEXT: pcmpeqd %xmm1, %xmm0
1102 ; SSE4-NEXT: pxor %xmm2, %xmm0
1103 ; SSE4-NEXT: por %xmm3, %xmm0
1106 ; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1108 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1109 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
1110 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
1111 ; AVX2-NEXT: vpminud %xmm3, %xmm0, %xmm3
1112 ; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
1113 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
1114 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1117 ; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1119 ; AVX512-NEXT: vmovdqa %xmm1, %xmm3
1120 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm3
1121 ; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1122 ; AVX512-NEXT: vpcmpleud %xmm3, %xmm0, %k1
1123 ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1}
1124 ; AVX512-NEXT: vmovdqa %xmm2, %xmm0
1126 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1127 %a = add <4 x i32> %x, %y
1128 %c = icmp ugt <4 x i32> %x, %noty
1129 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
1133 define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> %y) {
1134 ; SSE2-LABEL: unsigned_sat_variable_v2i64_using_min:
1136 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1137 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
1138 ; SSE2-NEXT: pxor %xmm0, %xmm3
1139 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159]
1140 ; SSE2-NEXT: pxor %xmm1, %xmm4
1141 ; SSE2-NEXT: movdqa %xmm4, %xmm5
1142 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
1143 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1144 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm4
1145 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1146 ; SSE2-NEXT: pand %xmm6, %xmm3
1147 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1148 ; SSE2-NEXT: por %xmm3, %xmm4
1149 ; SSE2-NEXT: pand %xmm4, %xmm0
1150 ; SSE2-NEXT: por %xmm1, %xmm4
1151 ; SSE2-NEXT: pxor %xmm2, %xmm4
1152 ; SSE2-NEXT: por %xmm4, %xmm0
1153 ; SSE2-NEXT: paddq %xmm1, %xmm0
1156 ; SSE41-LABEL: unsigned_sat_variable_v2i64_using_min:
1158 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1159 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
1160 ; SSE41-NEXT: pxor %xmm1, %xmm3
1161 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
1162 ; SSE41-NEXT: pxor %xmm2, %xmm0
1163 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159]
1164 ; SSE41-NEXT: pxor %xmm1, %xmm4
1165 ; SSE41-NEXT: movdqa %xmm4, %xmm5
1166 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm5
1167 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
1168 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
1169 ; SSE41-NEXT: pand %xmm5, %xmm0
1170 ; SSE41-NEXT: por %xmm4, %xmm0
1171 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1172 ; SSE41-NEXT: paddq %xmm1, %xmm3
1173 ; SSE41-NEXT: movdqa %xmm3, %xmm0
1176 ; SSE42-LABEL: unsigned_sat_variable_v2i64_using_min:
1178 ; SSE42-NEXT: movdqa %xmm0, %xmm2
1179 ; SSE42-NEXT: pcmpeqd %xmm3, %xmm3
1180 ; SSE42-NEXT: pxor %xmm1, %xmm3
1181 ; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
1182 ; SSE42-NEXT: pxor %xmm0, %xmm4
1183 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775807]
1184 ; SSE42-NEXT: pxor %xmm1, %xmm0
1185 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm0
1186 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1187 ; SSE42-NEXT: paddq %xmm1, %xmm3
1188 ; SSE42-NEXT: movdqa %xmm3, %xmm0
1191 ; AVX2-LABEL: unsigned_sat_variable_v2i64_using_min:
1193 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1194 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
1195 ; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1196 ; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm4
1197 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1198 ; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
1199 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
1202 ; AVX512-LABEL: unsigned_sat_variable_v2i64_using_min:
1204 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
1205 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
1206 ; AVX512-NEXT: vpminuq %xmm2, %xmm0, %xmm0
1207 ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
1209 %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
1210 %c = icmp ult <2 x i64> %x, %noty
1211 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %noty
1212 %r = add <2 x i64> %s, %y
1216 define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) {
1217 ; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1219 ; SSE2-NEXT: paddq %xmm0, %xmm1
1220 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1221 ; SSE2-NEXT: pxor %xmm2, %xmm0
1222 ; SSE2-NEXT: pxor %xmm1, %xmm2
1223 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1224 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1225 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1226 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
1227 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1228 ; SSE2-NEXT: pand %xmm4, %xmm2
1229 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1230 ; SSE2-NEXT: por %xmm1, %xmm0
1231 ; SSE2-NEXT: por %xmm2, %xmm0
1234 ; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1236 ; SSE41-NEXT: paddq %xmm0, %xmm1
1237 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1238 ; SSE41-NEXT: pxor %xmm2, %xmm0
1239 ; SSE41-NEXT: pxor %xmm1, %xmm2
1240 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1241 ; SSE41-NEXT: pcmpgtd %xmm2, %xmm3
1242 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1243 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
1244 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1245 ; SSE41-NEXT: pand %xmm4, %xmm2
1246 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1247 ; SSE41-NEXT: por %xmm1, %xmm0
1248 ; SSE41-NEXT: por %xmm2, %xmm0
1251 ; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1253 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1254 ; SSE42-NEXT: movdqa %xmm0, %xmm3
1255 ; SSE42-NEXT: pxor %xmm2, %xmm3
1256 ; SSE42-NEXT: paddq %xmm1, %xmm0
1257 ; SSE42-NEXT: pxor %xmm0, %xmm2
1258 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm3
1259 ; SSE42-NEXT: por %xmm3, %xmm0
1262 ; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1264 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1265 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
1266 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
1267 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1
1268 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
1269 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1272 ; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1274 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
1275 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
1276 ; AVX512-NEXT: vpminuq %xmm2, %xmm0, %xmm0
1277 ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
1279 %a = add <2 x i64> %x, %y
1280 %c = icmp ugt <2 x i64> %x, %a
1281 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
1285 define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 x i64> %y) {
1286 ; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1288 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1289 ; SSE2-NEXT: paddq %xmm1, %xmm2
1290 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1291 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1292 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1293 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
1294 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1295 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
1296 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1297 ; SSE2-NEXT: pand %xmm4, %xmm1
1298 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1299 ; SSE2-NEXT: por %xmm2, %xmm0
1300 ; SSE2-NEXT: por %xmm1, %xmm0
1303 ; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1305 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1306 ; SSE41-NEXT: paddq %xmm1, %xmm2
1307 ; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1308 ; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1309 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1310 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm3
1311 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1312 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
1313 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1314 ; SSE41-NEXT: pand %xmm4, %xmm1
1315 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1316 ; SSE41-NEXT: por %xmm2, %xmm0
1317 ; SSE41-NEXT: por %xmm1, %xmm0
1320 ; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1322 ; SSE42-NEXT: movdqa %xmm0, %xmm2
1323 ; SSE42-NEXT: paddq %xmm1, %xmm2
1324 ; SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1325 ; SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1326 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1327 ; SSE42-NEXT: por %xmm2, %xmm0
1330 ; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1332 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2
1333 ; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1334 ; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1335 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
1336 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
1339 ; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1341 ; AVX512-NEXT: vmovdqa %xmm1, %xmm3
1342 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm3
1343 ; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1344 ; AVX512-NEXT: vpcmpleuq %xmm3, %xmm0, %k1
1345 ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm2 {%k1}
1346 ; AVX512-NEXT: vmovdqa %xmm2, %xmm0
1348 %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
1349 %a = add <2 x i64> %x, %y
1350 %c = icmp ugt <2 x i64> %x, %noty
1351 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a