1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ANY,SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE42
5 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ANY,AVX,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=ANY,AVX,AVX512
8 ; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not.
9 ; Test each of those patterns with i8/i16/i32/i64.
10 ; Test each of those with a constant operand and a variable operand.
11 ; Test each of those with a 128-bit vector type.
13 define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
14 ; ANY-LABEL: unsigned_sat_constant_i8_using_min:
16 ; ANY-NEXT: cmpb $-43, %dil
17 ; ANY-NEXT: movl $213, %eax
18 ; ANY-NEXT: cmovbl %edi, %eax
19 ; ANY-NEXT: addb $42, %al
20 ; ANY-NEXT: # kill: def $al killed $al killed $eax
22 %c = icmp ult i8 %x, -43
23 %s = select i1 %c, i8 %x, i8 -43
28 define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
29 ; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
31 ; ANY-NEXT: addb $42, %dil
32 ; ANY-NEXT: movzbl %dil, %ecx
33 ; ANY-NEXT: movl $255, %eax
34 ; ANY-NEXT: cmovael %ecx, %eax
35 ; ANY-NEXT: # kill: def $al killed $al killed $eax
38 %c = icmp ugt i8 %x, %a
39 %r = select i1 %c, i8 -1, i8 %a
43 define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
44 ; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval:
46 ; ANY-NEXT: addb $42, %dil
47 ; ANY-NEXT: movzbl %dil, %ecx
48 ; ANY-NEXT: movl $255, %eax
49 ; ANY-NEXT: cmovael %ecx, %eax
50 ; ANY-NEXT: # kill: def $al killed $al killed $eax
53 %c = icmp ugt i8 %x, -43
54 %r = select i1 %c, i8 -1, i8 %a
58 define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
59 ; ANY-LABEL: unsigned_sat_constant_i16_using_min:
61 ; ANY-NEXT: cmpw $-43, %di
62 ; ANY-NEXT: movl $65493, %eax # imm = 0xFFD5
63 ; ANY-NEXT: cmovbl %edi, %eax
64 ; ANY-NEXT: addl $42, %eax
65 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
67 %c = icmp ult i16 %x, -43
68 %s = select i1 %c, i16 %x, i16 -43
73 define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
74 ; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
76 ; ANY-NEXT: addw $42, %di
77 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
78 ; ANY-NEXT: cmovael %edi, %eax
79 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
82 %c = icmp ugt i16 %x, %a
83 %r = select i1 %c, i16 -1, i16 %a
87 define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
88 ; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
90 ; ANY-NEXT: addw $42, %di
91 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
92 ; ANY-NEXT: cmovael %edi, %eax
93 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
96 %c = icmp ugt i16 %x, -43
97 %r = select i1 %c, i16 -1, i16 %a
101 define i32 @unsigned_sat_constant_i32_using_min(i32 %x) {
102 ; ANY-LABEL: unsigned_sat_constant_i32_using_min:
104 ; ANY-NEXT: cmpl $-43, %edi
105 ; ANY-NEXT: movl $-43, %eax
106 ; ANY-NEXT: cmovbl %edi, %eax
107 ; ANY-NEXT: addl $42, %eax
109 %c = icmp ult i32 %x, -43
110 %s = select i1 %c, i32 %x, i32 -43
115 define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
116 ; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
118 ; ANY-NEXT: addl $42, %edi
119 ; ANY-NEXT: movl $-1, %eax
120 ; ANY-NEXT: cmovael %edi, %eax
123 %c = icmp ugt i32 %x, %a
124 %r = select i1 %c, i32 -1, i32 %a
128 define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
129 ; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_notval:
131 ; ANY-NEXT: addl $42, %edi
132 ; ANY-NEXT: movl $-1, %eax
133 ; ANY-NEXT: cmovael %edi, %eax
136 %c = icmp ugt i32 %x, -43
137 %r = select i1 %c, i32 -1, i32 %a
141 define i64 @unsigned_sat_constant_i64_using_min(i64 %x) {
142 ; ANY-LABEL: unsigned_sat_constant_i64_using_min:
144 ; ANY-NEXT: cmpq $-43, %rdi
145 ; ANY-NEXT: movq $-43, %rax
146 ; ANY-NEXT: cmovbq %rdi, %rax
147 ; ANY-NEXT: addq $42, %rax
149 %c = icmp ult i64 %x, -43
150 %s = select i1 %c, i64 %x, i64 -43
155 define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
156 ; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_sum:
158 ; ANY-NEXT: addq $42, %rdi
159 ; ANY-NEXT: movq $-1, %rax
160 ; ANY-NEXT: cmovaeq %rdi, %rax
163 %c = icmp ugt i64 %x, %a
164 %r = select i1 %c, i64 -1, i64 %a
168 define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
169 ; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
171 ; ANY-NEXT: addq $42, %rdi
172 ; ANY-NEXT: movq $-1, %rax
173 ; ANY-NEXT: cmovaeq %rdi, %rax
176 %c = icmp ugt i64 %x, -43
177 %r = select i1 %c, i64 -1, i64 %a
181 define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
182 ; ANY-LABEL: unsigned_sat_variable_i8_using_min:
184 ; ANY-NEXT: movl %esi, %eax
186 ; ANY-NEXT: cmpb %al, %dil
187 ; ANY-NEXT: movzbl %al, %eax
188 ; ANY-NEXT: cmovbl %edi, %eax
189 ; ANY-NEXT: addb %sil, %al
190 ; ANY-NEXT: # kill: def $al killed $al killed $eax
192 %noty = xor i8 %y, -1
193 %c = icmp ult i8 %x, %noty
194 %s = select i1 %c, i8 %x, i8 %noty
199 define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
200 ; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
202 ; ANY-NEXT: addb %sil, %dil
203 ; ANY-NEXT: movzbl %dil, %ecx
204 ; ANY-NEXT: movl $255, %eax
205 ; ANY-NEXT: cmovael %ecx, %eax
206 ; ANY-NEXT: # kill: def $al killed $al killed $eax
209 %c = icmp ugt i8 %x, %a
210 %r = select i1 %c, i8 -1, i8 %a
214 define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
215 ; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
217 ; ANY-NEXT: addb %dil, %sil
218 ; ANY-NEXT: movzbl %sil, %ecx
219 ; ANY-NEXT: movl $255, %eax
220 ; ANY-NEXT: cmovael %ecx, %eax
221 ; ANY-NEXT: # kill: def $al killed $al killed $eax
223 %noty = xor i8 %y, -1
225 %c = icmp ugt i8 %x, %noty
226 %r = select i1 %c, i8 -1, i8 %a
230 define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
231 ; ANY-LABEL: unsigned_sat_variable_i16_using_min:
233 ; ANY-NEXT: movl %esi, %eax
234 ; ANY-NEXT: notl %eax
235 ; ANY-NEXT: cmpw %ax, %di
236 ; ANY-NEXT: cmovbl %edi, %eax
237 ; ANY-NEXT: addl %esi, %eax
238 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
240 %noty = xor i16 %y, -1
241 %c = icmp ult i16 %x, %noty
242 %s = select i1 %c, i16 %x, i16 %noty
247 define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
248 ; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
250 ; ANY-NEXT: addw %si, %di
251 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
252 ; ANY-NEXT: cmovael %edi, %eax
253 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
256 %c = icmp ugt i16 %x, %a
257 %r = select i1 %c, i16 -1, i16 %a
261 define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
262 ; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
264 ; ANY-NEXT: addw %di, %si
265 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
266 ; ANY-NEXT: cmovael %esi, %eax
267 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
269 %noty = xor i16 %y, -1
271 %c = icmp ugt i16 %x, %noty
272 %r = select i1 %c, i16 -1, i16 %a
276 define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) {
277 ; ANY-LABEL: unsigned_sat_variable_i32_using_min:
279 ; ANY-NEXT: movl %esi, %eax
280 ; ANY-NEXT: notl %eax
281 ; ANY-NEXT: cmpl %eax, %edi
282 ; ANY-NEXT: cmovbl %edi, %eax
283 ; ANY-NEXT: addl %esi, %eax
285 %noty = xor i32 %y, -1
286 %c = icmp ult i32 %x, %noty
287 %s = select i1 %c, i32 %x, i32 %noty
292 define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
293 ; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
295 ; ANY-NEXT: addl %esi, %edi
296 ; ANY-NEXT: movl $-1, %eax
297 ; ANY-NEXT: cmovael %edi, %eax
300 %c = icmp ugt i32 %x, %a
301 %r = select i1 %c, i32 -1, i32 %a
305 define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) {
306 ; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_notval:
308 ; ANY-NEXT: addl %esi, %edi
309 ; ANY-NEXT: movl $-1, %eax
310 ; ANY-NEXT: cmovael %edi, %eax
312 %noty = xor i32 %y, -1
314 %c = icmp ugt i32 %x, %noty
315 %r = select i1 %c, i32 -1, i32 %a
319 define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) {
320 ; ANY-LABEL: unsigned_sat_variable_i64_using_min:
322 ; ANY-NEXT: movq %rsi, %rax
323 ; ANY-NEXT: notq %rax
324 ; ANY-NEXT: cmpq %rax, %rdi
325 ; ANY-NEXT: cmovbq %rdi, %rax
326 ; ANY-NEXT: addq %rsi, %rax
328 %noty = xor i64 %y, -1
329 %c = icmp ult i64 %x, %noty
330 %s = select i1 %c, i64 %x, i64 %noty
335 define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
336 ; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_sum:
338 ; ANY-NEXT: addq %rsi, %rdi
339 ; ANY-NEXT: movq $-1, %rax
340 ; ANY-NEXT: cmovaeq %rdi, %rax
343 %c = icmp ugt i64 %x, %a
344 %r = select i1 %c, i64 -1, i64 %a
348 define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) {
349 ; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_notval:
351 ; ANY-NEXT: addq %rsi, %rdi
352 ; ANY-NEXT: movq $-1, %rax
353 ; ANY-NEXT: cmovaeq %rdi, %rax
355 %noty = xor i64 %y, -1
357 %c = icmp ugt i64 %x, %noty
358 %r = select i1 %c, i64 -1, i64 %a
362 define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) {
363 ; SSE-LABEL: unsigned_sat_constant_v16i8_using_min:
365 ; SSE-NEXT: pminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
366 ; SSE-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
369 ; AVX-LABEL: unsigned_sat_constant_v16i8_using_min:
371 ; AVX-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
372 ; AVX-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
374 %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
375 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
376 %r = add <16 x i8> %s, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
380 define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
381 ; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
383 ; SSE-NEXT: paddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
386 ; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
388 ; AVX-NEXT: vpaddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
390 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
391 %c = icmp ugt <16 x i8> %x, %a
392 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
396 define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) {
397 ; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
399 ; SSE-NEXT: paddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
402 ; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
404 ; AVX-NEXT: vpaddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
406 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
407 %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
408 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
412 define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) {
413 ; SSE2-LABEL: unsigned_sat_constant_v8i16_using_min:
415 ; SSE2-NEXT: movdqa %xmm0, %xmm1
416 ; SSE2-NEXT: psubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
417 ; SSE2-NEXT: psubw %xmm1, %xmm0
418 ; SSE2-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
421 ; SSE4-LABEL: unsigned_sat_constant_v8i16_using_min:
423 ; SSE4-NEXT: pminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
424 ; SSE4-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
427 ; AVX-LABEL: unsigned_sat_constant_v8i16_using_min:
429 ; AVX-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
430 ; AVX-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
432 %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
433 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
434 %r = add <8 x i16> %s, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
438 define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
439 ; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
441 ; SSE-NEXT: paddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
444 ; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
446 ; AVX-NEXT: vpaddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
448 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
449 %c = icmp ugt <8 x i16> %x, %a
450 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
454 define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) {
455 ; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
457 ; SSE-NEXT: paddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
460 ; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
462 ; AVX-NEXT: vpaddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
464 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
465 %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
466 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
470 define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) {
471 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_min:
473 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
474 ; SSE2-NEXT: pxor %xmm0, %xmm1
475 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483605,2147483605,2147483605,2147483605]
476 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
477 ; SSE2-NEXT: pand %xmm2, %xmm0
478 ; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
479 ; SSE2-NEXT: por %xmm2, %xmm0
480 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
483 ; SSE4-LABEL: unsigned_sat_constant_v4i32_using_min:
485 ; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
486 ; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
489 ; AVX2-LABEL: unsigned_sat_constant_v4i32_using_min:
491 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967253,4294967253,4294967253,4294967253]
492 ; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
493 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
494 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
497 ; AVX512-LABEL: unsigned_sat_constant_v4i32_using_min:
499 ; AVX512-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
500 ; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
502 %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
503 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -43, i32 -43, i32 -43, i32 -43>
504 %r = add <4 x i32> %s, <i32 42, i32 42, i32 42, i32 42>
508 define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
509 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
511 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
512 ; SSE2-NEXT: paddd %xmm0, %xmm1
513 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
514 ; SSE2-NEXT: pxor %xmm2, %xmm0
515 ; SSE2-NEXT: pxor %xmm1, %xmm2
516 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
517 ; SSE2-NEXT: por %xmm1, %xmm0
520 ; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
522 ; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
523 ; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
526 ; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
528 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
529 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253]
530 ; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0
531 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
534 ; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
536 ; AVX512-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
537 ; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
539 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
540 %c = icmp ugt <4 x i32> %x, %a
541 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
545 define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) {
546 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
548 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
549 ; SSE2-NEXT: paddd %xmm0, %xmm1
550 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
551 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
552 ; SSE2-NEXT: por %xmm1, %xmm0
555 ; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
557 ; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
558 ; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
561 ; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
563 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
564 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253]
565 ; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0
566 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
569 ; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
571 ; AVX512-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
572 ; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
574 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
575 %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
576 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
580 define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat(<4 x i32> %x) {
581 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
583 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [43,44,45,46]
584 ; SSE2-NEXT: paddd %xmm0, %xmm1
585 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
586 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
587 ; SSE2-NEXT: por %xmm1, %xmm0
590 ; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
592 ; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
593 ; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
596 ; AVX-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
598 ; AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
599 ; AVX-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
601 %a = add <4 x i32> %x, <i32 43, i32 44, i32 45, i32 46>
602 %c = icmp ugt <4 x i32> %x, <i32 -44, i32 -45, i32 -46, i32 -47>
603 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
607 define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) {
608 ; SSE2-LABEL: unsigned_sat_constant_v2i64_using_min:
610 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
611 ; SSE2-NEXT: pxor %xmm0, %xmm1
612 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
613 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
614 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
615 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
616 ; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
617 ; SSE2-NEXT: pand %xmm3, %xmm1
618 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
619 ; SSE2-NEXT: por %xmm1, %xmm2
620 ; SSE2-NEXT: pand %xmm2, %xmm0
621 ; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
622 ; SSE2-NEXT: por %xmm2, %xmm0
623 ; SSE2-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
626 ; SSE41-LABEL: unsigned_sat_constant_v2i64_using_min:
628 ; SSE41-NEXT: movdqa %xmm0, %xmm1
629 ; SSE41-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573]
630 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
631 ; SSE41-NEXT: pxor %xmm1, %xmm0
632 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372034707292117,9223372034707292117]
633 ; SSE41-NEXT: movdqa %xmm3, %xmm4
634 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
635 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
636 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
637 ; SSE41-NEXT: pand %xmm4, %xmm0
638 ; SSE41-NEXT: por %xmm3, %xmm0
639 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
640 ; SSE41-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
641 ; SSE41-NEXT: movdqa %xmm2, %xmm0
644 ; SSE42-LABEL: unsigned_sat_constant_v2i64_using_min:
646 ; SSE42-NEXT: movdqa %xmm0, %xmm1
647 ; SSE42-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573]
648 ; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
649 ; SSE42-NEXT: pxor %xmm0, %xmm3
650 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775765,9223372036854775765]
651 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
652 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
653 ; SSE42-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
654 ; SSE42-NEXT: movdqa %xmm2, %xmm0
657 ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_min:
659 ; AVX2-NEXT: vmovapd {{.*#+}} xmm1 = [18446744073709551573,18446744073709551573]
660 ; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
661 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765]
662 ; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
663 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
664 ; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
667 ; AVX512-LABEL: unsigned_sat_constant_v2i64_using_min:
669 ; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
670 ; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
672 %c = icmp ult <2 x i64> %x, <i64 -43, i64 -43>
673 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> <i64 -43, i64 -43>
674 %r = add <2 x i64> %s, <i64 42, i64 42>
678 define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
679 ; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
681 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
682 ; SSE2-NEXT: paddq %xmm0, %xmm1
683 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
684 ; SSE2-NEXT: pxor %xmm2, %xmm0
685 ; SSE2-NEXT: pxor %xmm1, %xmm2
686 ; SSE2-NEXT: movdqa %xmm0, %xmm3
687 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
688 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
689 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
690 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
691 ; SSE2-NEXT: pand %xmm4, %xmm2
692 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
693 ; SSE2-NEXT: por %xmm1, %xmm0
694 ; SSE2-NEXT: por %xmm2, %xmm0
697 ; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
699 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
700 ; SSE41-NEXT: paddq %xmm0, %xmm1
701 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
702 ; SSE41-NEXT: pxor %xmm2, %xmm0
703 ; SSE41-NEXT: pxor %xmm1, %xmm2
704 ; SSE41-NEXT: movdqa %xmm0, %xmm3
705 ; SSE41-NEXT: pcmpgtd %xmm2, %xmm3
706 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
707 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
708 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
709 ; SSE41-NEXT: pand %xmm4, %xmm2
710 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
711 ; SSE41-NEXT: por %xmm1, %xmm0
712 ; SSE41-NEXT: por %xmm2, %xmm0
715 ; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
717 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
718 ; SSE42-NEXT: movdqa %xmm0, %xmm2
719 ; SSE42-NEXT: pxor %xmm1, %xmm2
720 ; SSE42-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
721 ; SSE42-NEXT: pxor %xmm0, %xmm1
722 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm2
723 ; SSE42-NEXT: por %xmm2, %xmm0
726 ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
728 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
729 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2
730 ; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
731 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
732 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
733 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
736 ; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
738 ; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
739 ; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
741 %a = add <2 x i64> %x, <i64 42, i64 42>
742 %c = icmp ugt <2 x i64> %x, %a
743 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
747 define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
748 ; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
750 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
751 ; SSE2-NEXT: paddq %xmm0, %xmm1
752 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
753 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
754 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
755 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,2,2]
756 ; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
757 ; SSE2-NEXT: pand %xmm3, %xmm2
758 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
759 ; SSE2-NEXT: por %xmm1, %xmm0
760 ; SSE2-NEXT: por %xmm2, %xmm0
763 ; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
765 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
766 ; SSE41-NEXT: paddq %xmm0, %xmm1
767 ; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
768 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
769 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
770 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,2,2]
771 ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
772 ; SSE41-NEXT: pand %xmm3, %xmm2
773 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
774 ; SSE41-NEXT: por %xmm1, %xmm0
775 ; SSE41-NEXT: por %xmm2, %xmm0
778 ; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
780 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
781 ; SSE42-NEXT: movdqa %xmm0, %xmm2
782 ; SSE42-NEXT: pxor %xmm1, %xmm2
783 ; SSE42-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
784 ; SSE42-NEXT: pxor %xmm0, %xmm1
785 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm2
786 ; SSE42-NEXT: por %xmm2, %xmm0
789 ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
791 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
792 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2
793 ; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
794 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
795 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
796 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
799 ; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
801 ; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
802 ; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
804 %a = add <2 x i64> %x, <i64 42, i64 42>
805 %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43>
806 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
810 define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8> %y) {
811 ; SSE-LABEL: unsigned_sat_variable_v16i8_using_min:
813 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
814 ; SSE-NEXT: pxor %xmm1, %xmm2
815 ; SSE-NEXT: pminub %xmm2, %xmm0
816 ; SSE-NEXT: paddb %xmm1, %xmm0
819 ; AVX2-LABEL: unsigned_sat_variable_v16i8_using_min:
821 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
822 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
823 ; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
824 ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
827 ; AVX512-LABEL: unsigned_sat_variable_v16i8_using_min:
829 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
830 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
831 ; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
832 ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
834 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
835 %c = icmp ult <16 x i8> %x, %noty
836 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %noty
837 %r = add <16 x i8> %s, %y
841 define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) {
842 ; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
844 ; SSE-NEXT: paddusb %xmm1, %xmm0
847 ; AVX-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
849 ; AVX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
851 %a = add <16 x i8> %x, %y
852 %c = icmp ugt <16 x i8> %x, %a
853 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
857 define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16 x i8> %y) {
858 ; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
860 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
861 ; SSE-NEXT: movdqa %xmm0, %xmm3
862 ; SSE-NEXT: paddb %xmm1, %xmm3
863 ; SSE-NEXT: pxor %xmm2, %xmm1
864 ; SSE-NEXT: pminub %xmm0, %xmm1
865 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
866 ; SSE-NEXT: pxor %xmm2, %xmm0
867 ; SSE-NEXT: por %xmm3, %xmm0
870 ; AVX2-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
872 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
873 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
874 ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm1
875 ; AVX2-NEXT: vpminub %xmm3, %xmm0, %xmm3
876 ; AVX2-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
877 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
878 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
881 ; AVX512-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
883 ; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
884 ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm3
885 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
886 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm1
887 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
888 ; AVX512-NEXT: vpternlogq $222, %xmm2, %xmm3, %xmm0
890 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
891 %a = add <16 x i8> %x, %y
892 %c = icmp ugt <16 x i8> %x, %noty
893 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
897 define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16> %y) {
898 ; SSE2-LABEL: unsigned_sat_variable_v8i16_using_min:
900 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
901 ; SSE2-NEXT: pxor %xmm1, %xmm2
902 ; SSE2-NEXT: movdqa %xmm0, %xmm3
903 ; SSE2-NEXT: psubusw %xmm2, %xmm3
904 ; SSE2-NEXT: psubw %xmm3, %xmm0
905 ; SSE2-NEXT: paddw %xmm1, %xmm0
908 ; SSE4-LABEL: unsigned_sat_variable_v8i16_using_min:
910 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm2
911 ; SSE4-NEXT: pxor %xmm1, %xmm2
912 ; SSE4-NEXT: pminuw %xmm2, %xmm0
913 ; SSE4-NEXT: paddw %xmm1, %xmm0
916 ; AVX2-LABEL: unsigned_sat_variable_v8i16_using_min:
918 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
919 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
920 ; AVX2-NEXT: vpminuw %xmm2, %xmm0, %xmm0
921 ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
924 ; AVX512-LABEL: unsigned_sat_variable_v8i16_using_min:
926 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
927 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
928 ; AVX512-NEXT: vpminuw %xmm2, %xmm0, %xmm0
929 ; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
931 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
932 %c = icmp ult <8 x i16> %x, %noty
933 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %noty
934 %r = add <8 x i16> %s, %y
938 define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) {
939 ; SSE-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
941 ; SSE-NEXT: paddusw %xmm1, %xmm0
944 ; AVX-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
946 ; AVX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
948 %a = add <8 x i16> %x, %y
949 %c = icmp ugt <8 x i16> %x, %a
950 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
954 define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8 x i16> %y) {
955 ; SSE2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
957 ; SSE2-NEXT: movdqa %xmm0, %xmm2
958 ; SSE2-NEXT: paddw %xmm1, %xmm2
959 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
960 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
961 ; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
962 ; SSE2-NEXT: por %xmm2, %xmm0
965 ; SSE4-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
967 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm2
968 ; SSE4-NEXT: movdqa %xmm0, %xmm3
969 ; SSE4-NEXT: paddw %xmm1, %xmm3
970 ; SSE4-NEXT: pxor %xmm2, %xmm1
971 ; SSE4-NEXT: pminuw %xmm0, %xmm1
972 ; SSE4-NEXT: pcmpeqw %xmm1, %xmm0
973 ; SSE4-NEXT: pxor %xmm2, %xmm0
974 ; SSE4-NEXT: por %xmm3, %xmm0
977 ; AVX2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
979 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
980 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
981 ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm1
982 ; AVX2-NEXT: vpminuw %xmm3, %xmm0, %xmm3
983 ; AVX2-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
984 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
985 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
988 ; AVX512-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
990 ; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
991 ; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm3
992 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
993 ; AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm1
994 ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
995 ; AVX512-NEXT: vpternlogq $222, %xmm2, %xmm3, %xmm0
997 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
998 %a = add <8 x i16> %x, %y
999 %c = icmp ugt <8 x i16> %x, %noty
1000 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
1004 define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> %y) {
1005 ; SSE2-LABEL: unsigned_sat_variable_v4i32_using_min:
1007 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1008 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
1009 ; SSE2-NEXT: pxor %xmm0, %xmm3
1010 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647,2147483647,2147483647]
1011 ; SSE2-NEXT: pxor %xmm1, %xmm4
1012 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1013 ; SSE2-NEXT: pand %xmm4, %xmm0
1014 ; SSE2-NEXT: pxor %xmm2, %xmm4
1015 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1016 ; SSE2-NEXT: pandn %xmm4, %xmm2
1017 ; SSE2-NEXT: por %xmm2, %xmm0
1018 ; SSE2-NEXT: paddd %xmm1, %xmm0
1021 ; SSE4-LABEL: unsigned_sat_variable_v4i32_using_min:
1023 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm2
1024 ; SSE4-NEXT: pxor %xmm1, %xmm2
1025 ; SSE4-NEXT: pminud %xmm2, %xmm0
1026 ; SSE4-NEXT: paddd %xmm1, %xmm0
1029 ; AVX2-LABEL: unsigned_sat_variable_v4i32_using_min:
1031 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1032 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
1033 ; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0
1034 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1037 ; AVX512-LABEL: unsigned_sat_variable_v4i32_using_min:
1039 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
1040 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
1041 ; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0
1042 ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1044 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1045 %c = icmp ult <4 x i32> %x, %noty
1046 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %noty
1047 %r = add <4 x i32> %s, %y
1051 define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) {
1052 ; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1054 ; SSE2-NEXT: paddd %xmm0, %xmm1
1055 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1056 ; SSE2-NEXT: pxor %xmm2, %xmm0
1057 ; SSE2-NEXT: pxor %xmm1, %xmm2
1058 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
1059 ; SSE2-NEXT: por %xmm1, %xmm0
1062 ; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1064 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm2
1065 ; SSE4-NEXT: pxor %xmm1, %xmm2
1066 ; SSE4-NEXT: pminud %xmm2, %xmm0
1067 ; SSE4-NEXT: paddd %xmm1, %xmm0
1070 ; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1072 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1073 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
1074 ; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0
1075 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1078 ; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1080 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
1081 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
1082 ; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0
1083 ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1085 %a = add <4 x i32> %x, %y
1086 %c = icmp ugt <4 x i32> %x, %a
1087 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
1091 define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 x i32> %y) {
1092 ; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1094 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1095 ; SSE2-NEXT: paddd %xmm1, %xmm2
1096 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1097 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1098 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
1099 ; SSE2-NEXT: por %xmm2, %xmm0
1102 ; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1104 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm2
1105 ; SSE4-NEXT: movdqa %xmm0, %xmm3
1106 ; SSE4-NEXT: paddd %xmm1, %xmm3
1107 ; SSE4-NEXT: pxor %xmm2, %xmm1
1108 ; SSE4-NEXT: pminud %xmm0, %xmm1
1109 ; SSE4-NEXT: pcmpeqd %xmm1, %xmm0
1110 ; SSE4-NEXT: pxor %xmm2, %xmm0
1111 ; SSE4-NEXT: por %xmm3, %xmm0
1114 ; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1116 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1117 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
1118 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
1119 ; AVX2-NEXT: vpminud %xmm3, %xmm0, %xmm3
1120 ; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
1121 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
1122 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1125 ; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1127 ; AVX512-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
1128 ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm2
1129 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
1130 ; AVX512-NEXT: vpcmpnleud %xmm1, %xmm0, %k1
1131 ; AVX512-NEXT: vmovdqa32 %xmm3, %xmm2 {%k1}
1132 ; AVX512-NEXT: vmovdqa %xmm2, %xmm0
1134 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1135 %a = add <4 x i32> %x, %y
1136 %c = icmp ugt <4 x i32> %x, %noty
1137 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
1141 define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> %y) {
1142 ; SSE2-LABEL: unsigned_sat_variable_v2i64_using_min:
1144 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1145 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
1146 ; SSE2-NEXT: pxor %xmm0, %xmm3
1147 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159]
1148 ; SSE2-NEXT: pxor %xmm1, %xmm4
1149 ; SSE2-NEXT: movdqa %xmm4, %xmm5
1150 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
1151 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1152 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm4
1153 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1154 ; SSE2-NEXT: pand %xmm6, %xmm3
1155 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1156 ; SSE2-NEXT: por %xmm3, %xmm4
1157 ; SSE2-NEXT: pand %xmm4, %xmm0
1158 ; SSE2-NEXT: pxor %xmm2, %xmm4
1159 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1160 ; SSE2-NEXT: pandn %xmm4, %xmm2
1161 ; SSE2-NEXT: por %xmm2, %xmm0
1162 ; SSE2-NEXT: paddq %xmm1, %xmm0
1165 ; SSE41-LABEL: unsigned_sat_variable_v2i64_using_min:
1167 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1168 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
1169 ; SSE41-NEXT: pxor %xmm1, %xmm3
1170 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
1171 ; SSE41-NEXT: pxor %xmm2, %xmm0
1172 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159]
1173 ; SSE41-NEXT: pxor %xmm1, %xmm4
1174 ; SSE41-NEXT: movdqa %xmm4, %xmm5
1175 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm5
1176 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
1177 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
1178 ; SSE41-NEXT: pand %xmm5, %xmm0
1179 ; SSE41-NEXT: por %xmm4, %xmm0
1180 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1181 ; SSE41-NEXT: paddq %xmm1, %xmm3
1182 ; SSE41-NEXT: movdqa %xmm3, %xmm0
1185 ; SSE42-LABEL: unsigned_sat_variable_v2i64_using_min:
1187 ; SSE42-NEXT: movdqa %xmm0, %xmm2
1188 ; SSE42-NEXT: pcmpeqd %xmm3, %xmm3
1189 ; SSE42-NEXT: pxor %xmm1, %xmm3
1190 ; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
1191 ; SSE42-NEXT: pxor %xmm0, %xmm4
1192 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775807]
1193 ; SSE42-NEXT: pxor %xmm1, %xmm0
1194 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm0
1195 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1196 ; SSE42-NEXT: paddq %xmm1, %xmm3
1197 ; SSE42-NEXT: movdqa %xmm3, %xmm0
1200 ; AVX2-LABEL: unsigned_sat_variable_v2i64_using_min:
1202 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1203 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
1204 ; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1205 ; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm4
1206 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1207 ; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
1208 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
1211 ; AVX512-LABEL: unsigned_sat_variable_v2i64_using_min:
1213 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
1214 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
1215 ; AVX512-NEXT: vpminuq %xmm2, %xmm0, %xmm0
1216 ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
1218 %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
1219 %c = icmp ult <2 x i64> %x, %noty
1220 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %noty
1221 %r = add <2 x i64> %s, %y
1225 define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) {
1226 ; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1228 ; SSE2-NEXT: paddq %xmm0, %xmm1
1229 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1230 ; SSE2-NEXT: pxor %xmm2, %xmm0
1231 ; SSE2-NEXT: pxor %xmm1, %xmm2
1232 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1233 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1234 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1235 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
1236 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1237 ; SSE2-NEXT: pand %xmm4, %xmm2
1238 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1239 ; SSE2-NEXT: por %xmm1, %xmm0
1240 ; SSE2-NEXT: por %xmm2, %xmm0
1243 ; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1245 ; SSE41-NEXT: paddq %xmm0, %xmm1
1246 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1247 ; SSE41-NEXT: pxor %xmm2, %xmm0
1248 ; SSE41-NEXT: pxor %xmm1, %xmm2
1249 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1250 ; SSE41-NEXT: pcmpgtd %xmm2, %xmm3
1251 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1252 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
1253 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1254 ; SSE41-NEXT: pand %xmm4, %xmm2
1255 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1256 ; SSE41-NEXT: por %xmm1, %xmm0
1257 ; SSE41-NEXT: por %xmm2, %xmm0
1260 ; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1262 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1263 ; SSE42-NEXT: movdqa %xmm0, %xmm3
1264 ; SSE42-NEXT: pxor %xmm2, %xmm3
1265 ; SSE42-NEXT: paddq %xmm1, %xmm0
1266 ; SSE42-NEXT: pxor %xmm0, %xmm2
1267 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm3
1268 ; SSE42-NEXT: por %xmm3, %xmm0
1271 ; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1273 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1274 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
1275 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
1276 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1
1277 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
1278 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1281 ; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1283 ; AVX512-NEXT: vmovdqa %xmm1, %xmm2
1284 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
1285 ; AVX512-NEXT: vpminuq %xmm2, %xmm0, %xmm0
1286 ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
1288 %a = add <2 x i64> %x, %y
1289 %c = icmp ugt <2 x i64> %x, %a
1290 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
1294 define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 x i64> %y) {
1295 ; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1297 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1298 ; SSE2-NEXT: paddq %xmm1, %xmm2
1299 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1300 ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1301 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1302 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
1303 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1304 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
1305 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1306 ; SSE2-NEXT: pand %xmm4, %xmm1
1307 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1308 ; SSE2-NEXT: por %xmm2, %xmm0
1309 ; SSE2-NEXT: por %xmm1, %xmm0
1312 ; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1314 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1315 ; SSE41-NEXT: paddq %xmm1, %xmm2
1316 ; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1317 ; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1318 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1319 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm3
1320 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1321 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
1322 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1323 ; SSE41-NEXT: pand %xmm4, %xmm1
1324 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1325 ; SSE41-NEXT: por %xmm2, %xmm0
1326 ; SSE41-NEXT: por %xmm1, %xmm0
1329 ; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1331 ; SSE42-NEXT: movdqa %xmm0, %xmm2
1332 ; SSE42-NEXT: paddq %xmm1, %xmm2
1333 ; SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1334 ; SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1335 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1336 ; SSE42-NEXT: por %xmm2, %xmm0
1339 ; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1341 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2
1342 ; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1343 ; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1344 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
1345 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
1348 ; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1350 ; AVX512-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
1351 ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm2
1352 ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
1353 ; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1
1354 ; AVX512-NEXT: vmovdqa64 %xmm3, %xmm2 {%k1}
1355 ; AVX512-NEXT: vmovdqa %xmm2, %xmm0
1357 %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
1358 %a = add <2 x i64> %x, %y
1359 %c = icmp ugt <2 x i64> %x, %noty
1360 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a