1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ANY,SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ANY,SSE,SSE41
5 ; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not.
6 ; Test each of those patterns with i8/i16/i32/i64.
7 ; Test each of those with a constant operand and a variable operand.
8 ; Test each of those with a 128-bit vector type.
10 define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
11 ; ANY-LABEL: unsigned_sat_constant_i8_using_min:
13 ; ANY-NEXT: movl %edi, %eax
14 ; ANY-NEXT: cmpb $-43, %al
15 ; ANY-NEXT: jb .LBB0_2
17 ; ANY-NEXT: movb $-43, %al
19 ; ANY-NEXT: addb $42, %al
20 ; ANY-NEXT: # kill: def $al killed $al killed $eax
22 %c = icmp ult i8 %x, -43
23 %s = select i1 %c, i8 %x, i8 -43
28 define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
29 ; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
31 ; ANY-NEXT: addb $42, %dil
32 ; ANY-NEXT: movb $-1, %al
33 ; ANY-NEXT: jb .LBB1_2
35 ; ANY-NEXT: movl %edi, %eax
39 %c = icmp ugt i8 %x, %a
40 %r = select i1 %c, i8 -1, i8 %a
44 define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
45 ; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval:
47 ; ANY-NEXT: addb $42, %dil
48 ; ANY-NEXT: movb $-1, %al
49 ; ANY-NEXT: jb .LBB2_2
51 ; ANY-NEXT: movl %edi, %eax
55 %c = icmp ugt i8 %x, -43
56 %r = select i1 %c, i8 -1, i8 %a
60 define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
61 ; ANY-LABEL: unsigned_sat_constant_i16_using_min:
63 ; ANY-NEXT: cmpw $-43, %di
64 ; ANY-NEXT: movl $65493, %eax # imm = 0xFFD5
65 ; ANY-NEXT: cmovbl %edi, %eax
66 ; ANY-NEXT: addl $42, %eax
67 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
69 %c = icmp ult i16 %x, -43
70 %s = select i1 %c, i16 %x, i16 -43
75 define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
76 ; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
78 ; ANY-NEXT: addw $42, %di
79 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
80 ; ANY-NEXT: cmovael %edi, %eax
81 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
84 %c = icmp ugt i16 %x, %a
85 %r = select i1 %c, i16 -1, i16 %a
89 define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
90 ; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
92 ; ANY-NEXT: addw $42, %di
93 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
94 ; ANY-NEXT: cmovael %edi, %eax
95 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
98 %c = icmp ugt i16 %x, -43
99 %r = select i1 %c, i16 -1, i16 %a
103 define i32 @unsigned_sat_constant_i32_using_min(i32 %x) {
104 ; ANY-LABEL: unsigned_sat_constant_i32_using_min:
106 ; ANY-NEXT: cmpl $-43, %edi
107 ; ANY-NEXT: movl $-43, %eax
108 ; ANY-NEXT: cmovbl %edi, %eax
109 ; ANY-NEXT: addl $42, %eax
111 %c = icmp ult i32 %x, -43
112 %s = select i1 %c, i32 %x, i32 -43
117 define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
118 ; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
120 ; ANY-NEXT: addl $42, %edi
121 ; ANY-NEXT: movl $-1, %eax
122 ; ANY-NEXT: cmovael %edi, %eax
125 %c = icmp ugt i32 %x, %a
126 %r = select i1 %c, i32 -1, i32 %a
130 define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
131 ; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_notval:
133 ; ANY-NEXT: addl $42, %edi
134 ; ANY-NEXT: movl $-1, %eax
135 ; ANY-NEXT: cmovael %edi, %eax
138 %c = icmp ugt i32 %x, -43
139 %r = select i1 %c, i32 -1, i32 %a
143 define i64 @unsigned_sat_constant_i64_using_min(i64 %x) {
144 ; ANY-LABEL: unsigned_sat_constant_i64_using_min:
146 ; ANY-NEXT: cmpq $-43, %rdi
147 ; ANY-NEXT: movq $-43, %rax
148 ; ANY-NEXT: cmovbq %rdi, %rax
149 ; ANY-NEXT: addq $42, %rax
151 %c = icmp ult i64 %x, -43
152 %s = select i1 %c, i64 %x, i64 -43
157 define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
158 ; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_sum:
160 ; ANY-NEXT: addq $42, %rdi
161 ; ANY-NEXT: movq $-1, %rax
162 ; ANY-NEXT: cmovaeq %rdi, %rax
165 %c = icmp ugt i64 %x, %a
166 %r = select i1 %c, i64 -1, i64 %a
170 define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
171 ; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
173 ; ANY-NEXT: addq $42, %rdi
174 ; ANY-NEXT: movq $-1, %rax
175 ; ANY-NEXT: cmovaeq %rdi, %rax
178 %c = icmp ugt i64 %x, -43
179 %r = select i1 %c, i64 -1, i64 %a
183 define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
184 ; ANY-LABEL: unsigned_sat_variable_i8_using_min:
186 ; ANY-NEXT: movl %edi, %eax
187 ; ANY-NEXT: movl %esi, %ecx
189 ; ANY-NEXT: cmpb %cl, %al
190 ; ANY-NEXT: jb .LBB12_2
192 ; ANY-NEXT: movl %ecx, %eax
193 ; ANY-NEXT: .LBB12_2:
194 ; ANY-NEXT: addb %sil, %al
195 ; ANY-NEXT: # kill: def $al killed $al killed $eax
197 %noty = xor i8 %y, -1
198 %c = icmp ult i8 %x, %noty
199 %s = select i1 %c, i8 %x, i8 %noty
204 define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
205 ; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
207 ; ANY-NEXT: addb %sil, %dil
208 ; ANY-NEXT: movb $-1, %al
209 ; ANY-NEXT: jb .LBB13_2
211 ; ANY-NEXT: movl %edi, %eax
212 ; ANY-NEXT: .LBB13_2:
215 %c = icmp ugt i8 %x, %a
216 %r = select i1 %c, i8 -1, i8 %a
220 define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
221 ; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
223 ; ANY-NEXT: movl %esi, %eax
225 ; ANY-NEXT: cmpb %al, %dil
226 ; ANY-NEXT: movb $-1, %al
227 ; ANY-NEXT: ja .LBB14_2
229 ; ANY-NEXT: addb %sil, %dil
230 ; ANY-NEXT: movl %edi, %eax
231 ; ANY-NEXT: .LBB14_2:
233 %noty = xor i8 %y, -1
235 %c = icmp ugt i8 %x, %noty
236 %r = select i1 %c, i8 -1, i8 %a
240 define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
241 ; ANY-LABEL: unsigned_sat_variable_i16_using_min:
243 ; ANY-NEXT: # kill: def $esi killed $esi def $rsi
244 ; ANY-NEXT: movl %esi, %eax
245 ; ANY-NEXT: notl %eax
246 ; ANY-NEXT: cmpw %ax, %di
247 ; ANY-NEXT: cmovbl %edi, %eax
248 ; ANY-NEXT: leal (%rax,%rsi), %eax
249 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
251 %noty = xor i16 %y, -1
252 %c = icmp ult i16 %x, %noty
253 %s = select i1 %c, i16 %x, i16 %noty
258 define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
259 ; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
261 ; ANY-NEXT: addw %si, %di
262 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
263 ; ANY-NEXT: cmovael %edi, %eax
264 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
267 %c = icmp ugt i16 %x, %a
268 %r = select i1 %c, i16 -1, i16 %a
272 define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
273 ; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
275 ; ANY-NEXT: # kill: def $esi killed $esi def $rsi
276 ; ANY-NEXT: # kill: def $edi killed $edi def $rdi
277 ; ANY-NEXT: leal (%rdi,%rsi), %ecx
278 ; ANY-NEXT: notl %esi
279 ; ANY-NEXT: cmpw %si, %di
280 ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
281 ; ANY-NEXT: cmovbel %ecx, %eax
282 ; ANY-NEXT: # kill: def $ax killed $ax killed $eax
284 %noty = xor i16 %y, -1
286 %c = icmp ugt i16 %x, %noty
287 %r = select i1 %c, i16 -1, i16 %a
291 define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) {
292 ; ANY-LABEL: unsigned_sat_variable_i32_using_min:
294 ; ANY-NEXT: # kill: def $esi killed $esi def $rsi
295 ; ANY-NEXT: movl %esi, %eax
296 ; ANY-NEXT: notl %eax
297 ; ANY-NEXT: cmpl %eax, %edi
298 ; ANY-NEXT: cmovbl %edi, %eax
299 ; ANY-NEXT: leal (%rax,%rsi), %eax
301 %noty = xor i32 %y, -1
302 %c = icmp ult i32 %x, %noty
303 %s = select i1 %c, i32 %x, i32 %noty
308 define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
309 ; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
311 ; ANY-NEXT: addl %esi, %edi
312 ; ANY-NEXT: movl $-1, %eax
313 ; ANY-NEXT: cmovael %edi, %eax
316 %c = icmp ugt i32 %x, %a
317 %r = select i1 %c, i32 -1, i32 %a
321 define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) {
322 ; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_notval:
324 ; ANY-NEXT: # kill: def $esi killed $esi def $rsi
325 ; ANY-NEXT: # kill: def $edi killed $edi def $rdi
326 ; ANY-NEXT: leal (%rdi,%rsi), %ecx
327 ; ANY-NEXT: notl %esi
328 ; ANY-NEXT: cmpl %esi, %edi
329 ; ANY-NEXT: movl $-1, %eax
330 ; ANY-NEXT: cmovbel %ecx, %eax
332 %noty = xor i32 %y, -1
334 %c = icmp ugt i32 %x, %noty
335 %r = select i1 %c, i32 -1, i32 %a
339 define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) {
340 ; ANY-LABEL: unsigned_sat_variable_i64_using_min:
342 ; ANY-NEXT: movq %rsi, %rax
343 ; ANY-NEXT: notq %rax
344 ; ANY-NEXT: cmpq %rax, %rdi
345 ; ANY-NEXT: cmovbq %rdi, %rax
346 ; ANY-NEXT: leaq (%rax,%rsi), %rax
348 %noty = xor i64 %y, -1
349 %c = icmp ult i64 %x, %noty
350 %s = select i1 %c, i64 %x, i64 %noty
355 define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
356 ; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_sum:
358 ; ANY-NEXT: addq %rsi, %rdi
359 ; ANY-NEXT: movq $-1, %rax
360 ; ANY-NEXT: cmovaeq %rdi, %rax
363 %c = icmp ugt i64 %x, %a
364 %r = select i1 %c, i64 -1, i64 %a
368 define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) {
369 ; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_notval:
371 ; ANY-NEXT: leaq (%rdi,%rsi), %rcx
372 ; ANY-NEXT: notq %rsi
373 ; ANY-NEXT: cmpq %rsi, %rdi
374 ; ANY-NEXT: movq $-1, %rax
375 ; ANY-NEXT: cmovbeq %rcx, %rax
377 %noty = xor i64 %y, -1
379 %c = icmp ugt i64 %x, %noty
380 %r = select i1 %c, i64 -1, i64 %a
384 define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) {
385 ; ANY-LABEL: unsigned_sat_constant_v16i8_using_min:
387 ; ANY-NEXT: pminub {{.*}}(%rip), %xmm0
388 ; ANY-NEXT: paddb {{.*}}(%rip), %xmm0
390 %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
391 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
392 %r = add <16 x i8> %s, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
396 define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
397 ; ANY-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
399 ; ANY-NEXT: paddusb {{.*}}(%rip), %xmm0
401 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
402 %c = icmp ugt <16 x i8> %x, %a
403 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
407 define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) {
408 ; ANY-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
410 ; ANY-NEXT: paddusb {{.*}}(%rip), %xmm0
412 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
413 %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
414 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
418 define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) {
419 ; SSE2-LABEL: unsigned_sat_constant_v8i16_using_min:
421 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
422 ; SSE2-NEXT: pxor %xmm1, %xmm0
423 ; SSE2-NEXT: pminsw {{.*}}(%rip), %xmm0
424 ; SSE2-NEXT: pxor %xmm1, %xmm0
425 ; SSE2-NEXT: paddw {{.*}}(%rip), %xmm0
428 ; SSE41-LABEL: unsigned_sat_constant_v8i16_using_min:
430 ; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
431 ; SSE41-NEXT: paddw {{.*}}(%rip), %xmm0
433 %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
434 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
435 %r = add <8 x i16> %s, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
439 define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
440 ; ANY-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
442 ; ANY-NEXT: paddusw {{.*}}(%rip), %xmm0
444 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
445 %c = icmp ugt <8 x i16> %x, %a
446 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
450 define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) {
451 ; ANY-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
453 ; ANY-NEXT: paddusw {{.*}}(%rip), %xmm0
455 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
456 %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
457 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
461 define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) {
462 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_min:
464 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
465 ; SSE2-NEXT: pxor %xmm0, %xmm1
466 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483605,2147483605,2147483605,2147483605]
467 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
468 ; SSE2-NEXT: pand %xmm2, %xmm0
469 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
470 ; SSE2-NEXT: por %xmm2, %xmm0
471 ; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0
474 ; SSE41-LABEL: unsigned_sat_constant_v4i32_using_min:
476 ; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
477 ; SSE41-NEXT: paddd {{.*}}(%rip), %xmm0
479 %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
480 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -43, i32 -43, i32 -43, i32 -43>
481 %r = add <4 x i32> %s, <i32 42, i32 42, i32 42, i32 42>
485 define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
486 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
488 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
489 ; SSE2-NEXT: paddd %xmm0, %xmm1
490 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
491 ; SSE2-NEXT: pxor %xmm2, %xmm0
492 ; SSE2-NEXT: pxor %xmm1, %xmm2
493 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
494 ; SSE2-NEXT: por %xmm1, %xmm0
497 ; SSE41-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
499 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [42,42,42,42]
500 ; SSE41-NEXT: paddd %xmm0, %xmm2
501 ; SSE41-NEXT: movdqa %xmm0, %xmm1
502 ; SSE41-NEXT: pminud %xmm2, %xmm1
503 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
504 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
505 ; SSE41-NEXT: pxor %xmm0, %xmm1
506 ; SSE41-NEXT: por %xmm2, %xmm1
507 ; SSE41-NEXT: movdqa %xmm1, %xmm0
509 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
510 %c = icmp ugt <4 x i32> %x, %a
511 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
515 define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) {
516 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
518 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
519 ; SSE2-NEXT: paddd %xmm0, %xmm1
520 ; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
521 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
522 ; SSE2-NEXT: por %xmm1, %xmm0
525 ; SSE41-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
527 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
528 ; SSE41-NEXT: paddd %xmm0, %xmm1
529 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4294967254,4294967254,4294967254,4294967254]
530 ; SSE41-NEXT: pmaxud %xmm0, %xmm2
531 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
532 ; SSE41-NEXT: por %xmm1, %xmm0
534 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
535 %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
536 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
540 define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat(<4 x i32> %x) {
541 ; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
543 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [43,44,45,46]
544 ; SSE2-NEXT: paddd %xmm0, %xmm1
545 ; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
546 ; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
547 ; SSE2-NEXT: por %xmm1, %xmm0
550 ; SSE41-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
552 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [43,44,45,46]
553 ; SSE41-NEXT: paddd %xmm0, %xmm1
554 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4294967253,4294967252,4294967251,4294967250]
555 ; SSE41-NEXT: pmaxud %xmm0, %xmm2
556 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
557 ; SSE41-NEXT: por %xmm1, %xmm0
559 %a = add <4 x i32> %x, <i32 43, i32 44, i32 45, i32 46>
560 %c = icmp ugt <4 x i32> %x, <i32 -44, i32 -45, i32 -46, i32 -47>
561 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
565 define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) {
566 ; SSE2-LABEL: unsigned_sat_constant_v2i64_using_min:
568 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
569 ; SSE2-NEXT: pxor %xmm0, %xmm1
570 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
571 ; SSE2-NEXT: movdqa %xmm2, %xmm3
572 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
573 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
574 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
575 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
576 ; SSE2-NEXT: pand %xmm4, %xmm1
577 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
578 ; SSE2-NEXT: por %xmm1, %xmm2
579 ; SSE2-NEXT: pand %xmm2, %xmm0
580 ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
581 ; SSE2-NEXT: por %xmm2, %xmm0
582 ; SSE2-NEXT: paddq {{.*}}(%rip), %xmm0
585 ; SSE41-LABEL: unsigned_sat_constant_v2i64_using_min:
587 ; SSE41-NEXT: movdqa %xmm0, %xmm1
588 ; SSE41-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573]
589 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
590 ; SSE41-NEXT: pxor %xmm1, %xmm0
591 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372034707292117,9223372034707292117]
592 ; SSE41-NEXT: movdqa %xmm3, %xmm4
593 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
594 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
595 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
596 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
597 ; SSE41-NEXT: pand %xmm5, %xmm0
598 ; SSE41-NEXT: por %xmm4, %xmm0
599 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
600 ; SSE41-NEXT: paddq {{.*}}(%rip), %xmm2
601 ; SSE41-NEXT: movdqa %xmm2, %xmm0
603 %c = icmp ult <2 x i64> %x, <i64 -43, i64 -43>
604 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> <i64 -43, i64 -43>
605 %r = add <2 x i64> %s, <i64 42, i64 42>
609 define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
610 ; ANY-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
612 ; ANY-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
613 ; ANY-NEXT: paddq %xmm0, %xmm1
614 ; ANY-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
615 ; ANY-NEXT: pxor %xmm2, %xmm0
616 ; ANY-NEXT: pxor %xmm1, %xmm2
617 ; ANY-NEXT: movdqa %xmm0, %xmm3
618 ; ANY-NEXT: pcmpgtd %xmm2, %xmm3
619 ; ANY-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
620 ; ANY-NEXT: pcmpeqd %xmm0, %xmm2
621 ; ANY-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
622 ; ANY-NEXT: pand %xmm4, %xmm2
623 ; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
624 ; ANY-NEXT: por %xmm1, %xmm0
625 ; ANY-NEXT: por %xmm2, %xmm0
627 %a = add <2 x i64> %x, <i64 42, i64 42>
628 %c = icmp ugt <2 x i64> %x, %a
629 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
633 define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
634 ; ANY-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
636 ; ANY-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
637 ; ANY-NEXT: paddq %xmm0, %xmm1
638 ; ANY-NEXT: pxor {{.*}}(%rip), %xmm0
639 ; ANY-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
640 ; ANY-NEXT: movdqa %xmm0, %xmm3
641 ; ANY-NEXT: pcmpgtd %xmm2, %xmm3
642 ; ANY-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
643 ; ANY-NEXT: pcmpeqd %xmm2, %xmm0
644 ; ANY-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
645 ; ANY-NEXT: pand %xmm4, %xmm2
646 ; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
647 ; ANY-NEXT: por %xmm1, %xmm0
648 ; ANY-NEXT: por %xmm2, %xmm0
650 %a = add <2 x i64> %x, <i64 42, i64 42>
651 %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43>
652 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
656 define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8> %y) {
657 ; ANY-LABEL: unsigned_sat_variable_v16i8_using_min:
659 ; ANY-NEXT: pcmpeqd %xmm2, %xmm2
660 ; ANY-NEXT: pxor %xmm1, %xmm2
661 ; ANY-NEXT: pminub %xmm2, %xmm0
662 ; ANY-NEXT: paddb %xmm1, %xmm0
664 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
665 %c = icmp ult <16 x i8> %x, %noty
666 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %noty
667 %r = add <16 x i8> %s, %y
671 define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) {
672 ; ANY-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
674 ; ANY-NEXT: paddusb %xmm1, %xmm0
676 %a = add <16 x i8> %x, %y
677 %c = icmp ugt <16 x i8> %x, %a
678 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
682 define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16 x i8> %y) {
683 ; ANY-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
685 ; ANY-NEXT: pcmpeqd %xmm2, %xmm2
686 ; ANY-NEXT: movdqa %xmm0, %xmm3
687 ; ANY-NEXT: paddb %xmm1, %xmm3
688 ; ANY-NEXT: pxor %xmm2, %xmm1
689 ; ANY-NEXT: pminub %xmm0, %xmm1
690 ; ANY-NEXT: pcmpeqb %xmm1, %xmm0
691 ; ANY-NEXT: pxor %xmm2, %xmm0
692 ; ANY-NEXT: por %xmm3, %xmm0
694 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
695 %a = add <16 x i8> %x, %y
696 %c = icmp ugt <16 x i8> %x, %noty
697 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
701 define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16> %y) {
702 ; SSE2-LABEL: unsigned_sat_variable_v8i16_using_min:
704 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
705 ; SSE2-NEXT: pxor %xmm2, %xmm0
706 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32767,32767,32767,32767,32767,32767,32767,32767]
707 ; SSE2-NEXT: pxor %xmm1, %xmm3
708 ; SSE2-NEXT: pminsw %xmm3, %xmm0
709 ; SSE2-NEXT: pxor %xmm2, %xmm0
710 ; SSE2-NEXT: paddw %xmm1, %xmm0
713 ; SSE41-LABEL: unsigned_sat_variable_v8i16_using_min:
715 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
716 ; SSE41-NEXT: pxor %xmm1, %xmm2
717 ; SSE41-NEXT: pminuw %xmm2, %xmm0
718 ; SSE41-NEXT: paddw %xmm1, %xmm0
720 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
721 %c = icmp ult <8 x i16> %x, %noty
722 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %noty
723 %r = add <8 x i16> %s, %y
727 define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) {
728 ; ANY-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
730 ; ANY-NEXT: paddusw %xmm1, %xmm0
732 %a = add <8 x i16> %x, %y
733 %c = icmp ugt <8 x i16> %x, %a
734 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
738 define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8 x i16> %y) {
739 ; SSE2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
741 ; SSE2-NEXT: movdqa %xmm0, %xmm2
742 ; SSE2-NEXT: paddw %xmm1, %xmm2
743 ; SSE2-NEXT: pxor {{.*}}(%rip), %xmm1
744 ; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
745 ; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
746 ; SSE2-NEXT: por %xmm2, %xmm0
749 ; SSE41-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
751 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
752 ; SSE41-NEXT: movdqa %xmm0, %xmm3
753 ; SSE41-NEXT: paddw %xmm1, %xmm3
754 ; SSE41-NEXT: pxor %xmm2, %xmm1
755 ; SSE41-NEXT: pminuw %xmm0, %xmm1
756 ; SSE41-NEXT: pcmpeqw %xmm1, %xmm0
757 ; SSE41-NEXT: pxor %xmm2, %xmm0
758 ; SSE41-NEXT: por %xmm3, %xmm0
760 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
761 %a = add <8 x i16> %x, %y
762 %c = icmp ugt <8 x i16> %x, %noty
763 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
767 define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> %y) {
768 ; SSE2-LABEL: unsigned_sat_variable_v4i32_using_min:
770 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
771 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
772 ; SSE2-NEXT: pxor %xmm0, %xmm3
773 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647,2147483647,2147483647]
774 ; SSE2-NEXT: pxor %xmm1, %xmm4
775 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
776 ; SSE2-NEXT: pand %xmm4, %xmm0
777 ; SSE2-NEXT: pxor %xmm2, %xmm4
778 ; SSE2-NEXT: movdqa %xmm1, %xmm2
779 ; SSE2-NEXT: pandn %xmm4, %xmm2
780 ; SSE2-NEXT: por %xmm2, %xmm0
781 ; SSE2-NEXT: paddd %xmm1, %xmm0
784 ; SSE41-LABEL: unsigned_sat_variable_v4i32_using_min:
786 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
787 ; SSE41-NEXT: pxor %xmm1, %xmm2
788 ; SSE41-NEXT: pminud %xmm2, %xmm0
789 ; SSE41-NEXT: paddd %xmm1, %xmm0
791 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
792 %c = icmp ult <4 x i32> %x, %noty
793 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %noty
794 %r = add <4 x i32> %s, %y
798 define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) {
799 ; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
801 ; SSE2-NEXT: paddd %xmm0, %xmm1
802 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
803 ; SSE2-NEXT: pxor %xmm2, %xmm0
804 ; SSE2-NEXT: pxor %xmm1, %xmm2
805 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
806 ; SSE2-NEXT: por %xmm1, %xmm0
809 ; SSE41-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
811 ; SSE41-NEXT: paddd %xmm0, %xmm1
812 ; SSE41-NEXT: movdqa %xmm0, %xmm2
813 ; SSE41-NEXT: pminud %xmm1, %xmm2
814 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
815 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
816 ; SSE41-NEXT: pxor %xmm0, %xmm2
817 ; SSE41-NEXT: por %xmm1, %xmm2
818 ; SSE41-NEXT: movdqa %xmm2, %xmm0
820 %a = add <4 x i32> %x, %y
821 %c = icmp ugt <4 x i32> %x, %a
822 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
826 define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 x i32> %y) {
827 ; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
829 ; SSE2-NEXT: movdqa %xmm0, %xmm2
830 ; SSE2-NEXT: paddd %xmm1, %xmm2
831 ; SSE2-NEXT: pxor {{.*}}(%rip), %xmm1
832 ; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
833 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
834 ; SSE2-NEXT: por %xmm2, %xmm0
837 ; SSE41-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
839 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
840 ; SSE41-NEXT: movdqa %xmm0, %xmm3
841 ; SSE41-NEXT: paddd %xmm1, %xmm3
842 ; SSE41-NEXT: pxor %xmm2, %xmm1
843 ; SSE41-NEXT: pminud %xmm0, %xmm1
844 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
845 ; SSE41-NEXT: pxor %xmm2, %xmm0
846 ; SSE41-NEXT: por %xmm3, %xmm0
848 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
849 %a = add <4 x i32> %x, %y
850 %c = icmp ugt <4 x i32> %x, %noty
851 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
855 define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> %y) {
856 ; SSE2-LABEL: unsigned_sat_variable_v2i64_using_min:
858 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
859 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
860 ; SSE2-NEXT: pxor %xmm0, %xmm3
861 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159]
862 ; SSE2-NEXT: pxor %xmm1, %xmm4
863 ; SSE2-NEXT: movdqa %xmm4, %xmm5
864 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
865 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
866 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm4
867 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
868 ; SSE2-NEXT: pand %xmm6, %xmm3
869 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
870 ; SSE2-NEXT: por %xmm3, %xmm4
871 ; SSE2-NEXT: pand %xmm4, %xmm0
872 ; SSE2-NEXT: pxor %xmm2, %xmm4
873 ; SSE2-NEXT: movdqa %xmm1, %xmm2
874 ; SSE2-NEXT: pandn %xmm4, %xmm2
875 ; SSE2-NEXT: por %xmm2, %xmm0
876 ; SSE2-NEXT: paddq %xmm1, %xmm0
879 ; SSE41-LABEL: unsigned_sat_variable_v2i64_using_min:
881 ; SSE41-NEXT: movdqa %xmm0, %xmm2
882 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
883 ; SSE41-NEXT: pxor %xmm1, %xmm3
884 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
885 ; SSE41-NEXT: pxor %xmm2, %xmm0
886 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159]
887 ; SSE41-NEXT: pxor %xmm1, %xmm4
888 ; SSE41-NEXT: movdqa %xmm4, %xmm5
889 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
890 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
891 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
892 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
893 ; SSE41-NEXT: pand %xmm6, %xmm0
894 ; SSE41-NEXT: por %xmm5, %xmm0
895 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
896 ; SSE41-NEXT: paddq %xmm1, %xmm3
897 ; SSE41-NEXT: movdqa %xmm3, %xmm0
899 %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
900 %c = icmp ult <2 x i64> %x, %noty
901 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %noty
902 %r = add <2 x i64> %s, %y
906 define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) {
907 ; ANY-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
909 ; ANY-NEXT: paddq %xmm0, %xmm1
910 ; ANY-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
911 ; ANY-NEXT: pxor %xmm2, %xmm0
912 ; ANY-NEXT: pxor %xmm1, %xmm2
913 ; ANY-NEXT: movdqa %xmm0, %xmm3
914 ; ANY-NEXT: pcmpgtd %xmm2, %xmm3
915 ; ANY-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
916 ; ANY-NEXT: pcmpeqd %xmm0, %xmm2
917 ; ANY-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
918 ; ANY-NEXT: pand %xmm4, %xmm2
919 ; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
920 ; ANY-NEXT: por %xmm1, %xmm0
921 ; ANY-NEXT: por %xmm2, %xmm0
923 %a = add <2 x i64> %x, %y
924 %c = icmp ugt <2 x i64> %x, %a
925 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
929 define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 x i64> %y) {
930 ; ANY-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
932 ; ANY-NEXT: movdqa %xmm0, %xmm2
933 ; ANY-NEXT: paddq %xmm1, %xmm2
934 ; ANY-NEXT: pxor {{.*}}(%rip), %xmm1
935 ; ANY-NEXT: pxor {{.*}}(%rip), %xmm0
936 ; ANY-NEXT: movdqa %xmm0, %xmm3
937 ; ANY-NEXT: pcmpgtd %xmm1, %xmm3
938 ; ANY-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
939 ; ANY-NEXT: pcmpeqd %xmm1, %xmm0
940 ; ANY-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
941 ; ANY-NEXT: pand %xmm4, %xmm1
942 ; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
943 ; ANY-NEXT: por %xmm2, %xmm0
944 ; ANY-NEXT: por %xmm1, %xmm0
946 %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
947 %a = add <2 x i64> %x, %y
948 %c = icmp ugt <2 x i64> %x, %noty
949 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a