1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
5 define i8 @scmp.8.8(i8 %x, i8 %y) nounwind {
8 ; X64-NEXT: cmpb %sil, %dil
11 ; X64-NEXT: subb %cl, %al
14 ; X86-LABEL: scmp.8.8:
16 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
17 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
20 ; X86-NEXT: subb %cl, %al
22 %1 = call i8 @llvm.scmp(i8 %x, i8 %y)
26 define i8 @scmp.8.16(i16 %x, i16 %y) nounwind {
27 ; X64-LABEL: scmp.8.16:
29 ; X64-NEXT: cmpw %si, %di
32 ; X64-NEXT: subb %cl, %al
35 ; X86-LABEL: scmp.8.16:
37 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
38 ; X86-NEXT: cmpw {{[0-9]+}}(%esp), %ax
41 ; X86-NEXT: subb %cl, %al
43 %1 = call i8 @llvm.scmp(i16 %x, i16 %y)
47 define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
48 ; X64-LABEL: scmp.8.32:
50 ; X64-NEXT: cmpl %esi, %edi
53 ; X64-NEXT: subb %cl, %al
56 ; X86-LABEL: scmp.8.32:
58 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
59 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
62 ; X86-NEXT: subb %cl, %al
64 %1 = call i8 @llvm.scmp(i32 %x, i32 %y)
68 define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
69 ; X64-LABEL: scmp.8.64:
71 ; X64-NEXT: cmpq %rsi, %rdi
74 ; X64-NEXT: subb %cl, %al
77 ; X86-LABEL: scmp.8.64:
79 ; X86-NEXT: pushl %ebx
80 ; X86-NEXT: pushl %edi
81 ; X86-NEXT: pushl %esi
82 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
83 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
84 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
85 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
86 ; X86-NEXT: cmpl %eax, %edx
87 ; X86-NEXT: movl %esi, %edi
88 ; X86-NEXT: sbbl %ecx, %edi
90 ; X86-NEXT: cmpl %edx, %eax
91 ; X86-NEXT: sbbl %esi, %ecx
93 ; X86-NEXT: subb %bl, %al
98 %1 = call i8 @llvm.scmp(i64 %x, i64 %y)
102 define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
103 ; X64-LABEL: scmp.8.128:
105 ; X64-NEXT: cmpq %rdx, %rdi
106 ; X64-NEXT: movq %rsi, %rax
107 ; X64-NEXT: sbbq %rcx, %rax
108 ; X64-NEXT: setl %r8b
109 ; X64-NEXT: cmpq %rdi, %rdx
110 ; X64-NEXT: sbbq %rsi, %rcx
112 ; X64-NEXT: subb %r8b, %al
115 ; X86-LABEL: scmp.8.128:
117 ; X86-NEXT: pushl %ebp
118 ; X86-NEXT: pushl %ebx
119 ; X86-NEXT: pushl %edi
120 ; X86-NEXT: pushl %esi
121 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
122 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
123 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
124 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
125 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
126 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
127 ; X86-NEXT: movl %ebx, %ebp
128 ; X86-NEXT: sbbl %edx, %ebp
129 ; X86-NEXT: movl %ecx, %ebp
130 ; X86-NEXT: sbbl %eax, %ebp
131 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
132 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
133 ; X86-NEXT: movl %esi, %ecx
134 ; X86-NEXT: sbbl %ebp, %ecx
136 ; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
137 ; X86-NEXT: sbbl %ebx, %edx
138 ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
139 ; X86-NEXT: sbbl %esi, %ebp
141 ; X86-NEXT: subb %cl, %al
142 ; X86-NEXT: popl %esi
143 ; X86-NEXT: popl %edi
144 ; X86-NEXT: popl %ebx
145 ; X86-NEXT: popl %ebp
147 %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
151 define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
152 ; X64-LABEL: scmp.32.32:
154 ; X64-NEXT: cmpl %esi, %edi
157 ; X64-NEXT: subb %al, %cl
158 ; X64-NEXT: movsbl %cl, %eax
161 ; X86-LABEL: scmp.32.32:
163 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
164 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
167 ; X86-NEXT: subb %al, %cl
168 ; X86-NEXT: movsbl %cl, %eax
170 %1 = call i32 @llvm.scmp(i32 %x, i32 %y)
174 define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
175 ; X64-LABEL: scmp.32.64:
177 ; X64-NEXT: cmpq %rsi, %rdi
180 ; X64-NEXT: subb %al, %cl
181 ; X64-NEXT: movsbl %cl, %eax
184 ; X86-LABEL: scmp.32.64:
186 ; X86-NEXT: pushl %ebx
187 ; X86-NEXT: pushl %edi
188 ; X86-NEXT: pushl %esi
189 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
190 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
191 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
192 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
193 ; X86-NEXT: cmpl %eax, %edx
194 ; X86-NEXT: movl %esi, %edi
195 ; X86-NEXT: sbbl %ecx, %edi
197 ; X86-NEXT: cmpl %edx, %eax
198 ; X86-NEXT: sbbl %esi, %ecx
200 ; X86-NEXT: subb %bl, %al
201 ; X86-NEXT: movsbl %al, %eax
202 ; X86-NEXT: popl %esi
203 ; X86-NEXT: popl %edi
204 ; X86-NEXT: popl %ebx
206 %1 = call i32 @llvm.scmp(i64 %x, i64 %y)
210 define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
211 ; X64-LABEL: scmp.64.64:
213 ; X64-NEXT: cmpq %rsi, %rdi
216 ; X64-NEXT: subb %al, %cl
217 ; X64-NEXT: movsbq %cl, %rax
220 ; X86-LABEL: scmp.64.64:
222 ; X86-NEXT: pushl %ebx
223 ; X86-NEXT: pushl %edi
224 ; X86-NEXT: pushl %esi
225 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
227 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
228 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
229 ; X86-NEXT: cmpl %eax, %edx
230 ; X86-NEXT: movl %esi, %edi
231 ; X86-NEXT: sbbl %ecx, %edi
233 ; X86-NEXT: cmpl %edx, %eax
234 ; X86-NEXT: sbbl %esi, %ecx
236 ; X86-NEXT: subb %bl, %al
237 ; X86-NEXT: movsbl %al, %eax
238 ; X86-NEXT: movl %eax, %edx
239 ; X86-NEXT: sarl $31, %edx
240 ; X86-NEXT: popl %esi
241 ; X86-NEXT: popl %edi
242 ; X86-NEXT: popl %ebx
244 %1 = call i64 @llvm.scmp(i64 %x, i64 %y)
248 define i4 @scmp_narrow_result(i32 %x, i32 %y) nounwind {
249 ; X64-LABEL: scmp_narrow_result:
251 ; X64-NEXT: cmpl %esi, %edi
254 ; X64-NEXT: subb %cl, %al
257 ; X86-LABEL: scmp_narrow_result:
259 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
260 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
263 ; X86-NEXT: subb %cl, %al
265 %1 = call i4 @llvm.scmp(i32 %x, i32 %y)
269 define i8 @scmp_narrow_op(i62 %x, i62 %y) nounwind {
270 ; X64-LABEL: scmp_narrow_op:
272 ; X64-NEXT: shlq $2, %rsi
273 ; X64-NEXT: sarq $2, %rsi
274 ; X64-NEXT: shlq $2, %rdi
275 ; X64-NEXT: sarq $2, %rdi
276 ; X64-NEXT: cmpq %rsi, %rdi
279 ; X64-NEXT: subb %cl, %al
282 ; X86-LABEL: scmp_narrow_op:
284 ; X86-NEXT: pushl %ebx
285 ; X86-NEXT: pushl %edi
286 ; X86-NEXT: pushl %esi
287 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
288 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
289 ; X86-NEXT: shll $2, %eax
290 ; X86-NEXT: sarl $2, %eax
291 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
292 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
293 ; X86-NEXT: shll $2, %esi
294 ; X86-NEXT: sarl $2, %esi
295 ; X86-NEXT: cmpl %ecx, %edx
296 ; X86-NEXT: movl %esi, %edi
297 ; X86-NEXT: sbbl %eax, %edi
299 ; X86-NEXT: cmpl %edx, %ecx
300 ; X86-NEXT: sbbl %esi, %eax
302 ; X86-NEXT: subb %bl, %al
303 ; X86-NEXT: popl %esi
304 ; X86-NEXT: popl %edi
305 ; X86-NEXT: popl %ebx
307 %1 = call i8 @llvm.scmp(i62 %x, i62 %y)
311 define i141 @scmp_wide_result(i32 %x, i32 %y) nounwind {
312 ; X64-LABEL: scmp_wide_result:
314 ; X64-NEXT: cmpl %esi, %edi
317 ; X64-NEXT: subb %al, %cl
318 ; X64-NEXT: movsbq %cl, %rax
319 ; X64-NEXT: movq %rax, %rdx
320 ; X64-NEXT: sarq $63, %rdx
321 ; X64-NEXT: movl %edx, %ecx
322 ; X64-NEXT: andl $8191, %ecx # imm = 0x1FFF
325 ; X86-LABEL: scmp_wide_result:
327 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
328 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
329 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
332 ; X86-NEXT: subb %cl, %dl
333 ; X86-NEXT: movsbl %dl, %ecx
334 ; X86-NEXT: movl %ecx, (%eax)
335 ; X86-NEXT: sarl $31, %ecx
336 ; X86-NEXT: movl %ecx, 12(%eax)
337 ; X86-NEXT: movl %ecx, 8(%eax)
338 ; X86-NEXT: movl %ecx, 4(%eax)
339 ; X86-NEXT: andl $8191, %ecx # imm = 0x1FFF
340 ; X86-NEXT: movw %cx, 16(%eax)
342 %1 = call i141 @llvm.scmp(i32 %x, i32 %y)
346 define i8 @scmp_wide_op(i109 %x, i109 %y) nounwind {
347 ; X64-LABEL: scmp_wide_op:
349 ; X64-NEXT: shlq $19, %rcx
350 ; X64-NEXT: sarq $19, %rcx
351 ; X64-NEXT: shlq $19, %rsi
352 ; X64-NEXT: sarq $19, %rsi
353 ; X64-NEXT: cmpq %rdx, %rdi
354 ; X64-NEXT: movq %rsi, %rax
355 ; X64-NEXT: sbbq %rcx, %rax
356 ; X64-NEXT: setl %r8b
357 ; X64-NEXT: cmpq %rdi, %rdx
358 ; X64-NEXT: sbbq %rsi, %rcx
360 ; X64-NEXT: subb %r8b, %al
363 ; X86-LABEL: scmp_wide_op:
365 ; X86-NEXT: pushl %ebp
366 ; X86-NEXT: pushl %ebx
367 ; X86-NEXT: pushl %edi
368 ; X86-NEXT: pushl %esi
369 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
370 ; X86-NEXT: shll $19, %eax
371 ; X86-NEXT: sarl $19, %eax
372 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
373 ; X86-NEXT: shll $19, %ecx
374 ; X86-NEXT: sarl $19, %ecx
375 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
376 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
377 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
378 ; X86-NEXT: cmpl %esi, {{[0-9]+}}(%esp)
379 ; X86-NEXT: sbbl %edx, %ebp
380 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
381 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
382 ; X86-NEXT: movl %edi, %esi
383 ; X86-NEXT: sbbl %ebp, %esi
384 ; X86-NEXT: movl %ecx, %esi
385 ; X86-NEXT: sbbl %eax, %esi
387 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
388 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
389 ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
390 ; X86-NEXT: sbbl %edi, %ebp
391 ; X86-NEXT: sbbl %ecx, %eax
393 ; X86-NEXT: subb %bl, %al
394 ; X86-NEXT: popl %esi
395 ; X86-NEXT: popl %edi
396 ; X86-NEXT: popl %ebx
397 ; X86-NEXT: popl %ebp
399 %1 = call i8 @llvm.scmp(i109 %x, i109 %y)
403 define i41 @scmp_uncommon_types(i7 %x, i7 %y) nounwind {
404 ; X64-LABEL: scmp_uncommon_types:
406 ; X64-NEXT: addb %sil, %sil
407 ; X64-NEXT: sarb %sil
408 ; X64-NEXT: addb %dil, %dil
409 ; X64-NEXT: sarb %dil
410 ; X64-NEXT: cmpb %sil, %dil
413 ; X64-NEXT: subb %al, %cl
414 ; X64-NEXT: movsbq %cl, %rax
417 ; X86-LABEL: scmp_uncommon_types:
419 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
420 ; X86-NEXT: addb %al, %al
422 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
423 ; X86-NEXT: addb %cl, %cl
425 ; X86-NEXT: cmpb %al, %cl
428 ; X86-NEXT: subb %al, %cl
429 ; X86-NEXT: movsbl %cl, %eax
430 ; X86-NEXT: movl %eax, %edx
431 ; X86-NEXT: sarl $31, %edx
433 %1 = call i41 @llvm.scmp(i7 %x, i7 %y)
437 define <4 x i32> @scmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind {
438 ; X64-LABEL: scmp_normal_vectors:
440 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
441 ; X64-NEXT: movd %xmm2, %eax
442 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
443 ; X64-NEXT: movd %xmm2, %ecx
444 ; X64-NEXT: cmpl %eax, %ecx
447 ; X64-NEXT: subb %al, %cl
448 ; X64-NEXT: movsbl %cl, %eax
449 ; X64-NEXT: movd %eax, %xmm2
450 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
451 ; X64-NEXT: movd %xmm3, %eax
452 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
453 ; X64-NEXT: movd %xmm3, %ecx
454 ; X64-NEXT: cmpl %eax, %ecx
457 ; X64-NEXT: subb %al, %cl
458 ; X64-NEXT: movsbl %cl, %eax
459 ; X64-NEXT: movd %eax, %xmm3
460 ; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
461 ; X64-NEXT: movd %xmm1, %eax
462 ; X64-NEXT: movd %xmm0, %ecx
463 ; X64-NEXT: cmpl %eax, %ecx
466 ; X64-NEXT: subb %al, %cl
467 ; X64-NEXT: movsbl %cl, %eax
468 ; X64-NEXT: movd %eax, %xmm2
469 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
470 ; X64-NEXT: movd %xmm1, %eax
471 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
472 ; X64-NEXT: movd %xmm0, %ecx
473 ; X64-NEXT: cmpl %eax, %ecx
476 ; X64-NEXT: subb %al, %cl
477 ; X64-NEXT: movsbl %cl, %eax
478 ; X64-NEXT: movd %eax, %xmm0
479 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
480 ; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
481 ; X64-NEXT: movdqa %xmm2, %xmm0
484 ; X86-LABEL: scmp_normal_vectors:
486 ; X86-NEXT: pushl %ebx
487 ; X86-NEXT: pushl %edi
488 ; X86-NEXT: pushl %esi
489 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
490 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
491 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
492 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
493 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
494 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
497 ; X86-NEXT: subb %dl, %dh
498 ; X86-NEXT: movsbl %dh, %edx
499 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
502 ; X86-NEXT: subb %bl, %bh
503 ; X86-NEXT: movsbl %bh, %edi
504 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
507 ; X86-NEXT: subb %bl, %bh
508 ; X86-NEXT: movsbl %bh, %esi
509 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
512 ; X86-NEXT: subb %cl, %ch
513 ; X86-NEXT: movsbl %ch, %ecx
514 ; X86-NEXT: movl %ecx, 12(%eax)
515 ; X86-NEXT: movl %esi, 8(%eax)
516 ; X86-NEXT: movl %edi, 4(%eax)
517 ; X86-NEXT: movl %edx, (%eax)
518 ; X86-NEXT: popl %esi
519 ; X86-NEXT: popl %edi
520 ; X86-NEXT: popl %ebx
522 %1 = call <4 x i32> @llvm.scmp(<4 x i32> %x, <4 x i32> %y)
526 define <4 x i8> @scmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind {
527 ; X64-LABEL: scmp_narrow_vec_result:
529 ; X64-NEXT: movd %xmm1, %eax
530 ; X64-NEXT: movd %xmm0, %ecx
531 ; X64-NEXT: cmpl %eax, %ecx
534 ; X64-NEXT: subb %al, %cl
535 ; X64-NEXT: movzbl %cl, %eax
536 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
537 ; X64-NEXT: movd %xmm2, %ecx
538 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
539 ; X64-NEXT: movd %xmm2, %edx
540 ; X64-NEXT: cmpl %ecx, %edx
543 ; X64-NEXT: subb %cl, %dl
544 ; X64-NEXT: movzbl %dl, %ecx
545 ; X64-NEXT: shll $8, %ecx
546 ; X64-NEXT: orl %eax, %ecx
547 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
548 ; X64-NEXT: movd %xmm2, %eax
549 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
550 ; X64-NEXT: movd %xmm2, %edx
551 ; X64-NEXT: cmpl %eax, %edx
554 ; X64-NEXT: subb %al, %dl
555 ; X64-NEXT: movzbl %dl, %eax
556 ; X64-NEXT: shll $16, %eax
557 ; X64-NEXT: orl %ecx, %eax
558 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
559 ; X64-NEXT: movd %xmm1, %ecx
560 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
561 ; X64-NEXT: movd %xmm0, %edx
562 ; X64-NEXT: cmpl %ecx, %edx
565 ; X64-NEXT: subb %cl, %dl
566 ; X64-NEXT: movzbl %dl, %ecx
567 ; X64-NEXT: shll $24, %ecx
568 ; X64-NEXT: orl %eax, %ecx
569 ; X64-NEXT: movd %ecx, %xmm0
572 ; X86-LABEL: scmp_narrow_vec_result:
574 ; X86-NEXT: pushl %ebx
575 ; X86-NEXT: pushl %edi
576 ; X86-NEXT: pushl %esi
577 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
578 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
579 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
580 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
581 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
582 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
585 ; X86-NEXT: subb %ch, %cl
586 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
589 ; X86-NEXT: subb %ch, %bl
590 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
593 ; X86-NEXT: subb %ch, %bh
594 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
597 ; X86-NEXT: subb %dl, %ch
598 ; X86-NEXT: movb %ch, 3(%eax)
599 ; X86-NEXT: movb %bh, 2(%eax)
600 ; X86-NEXT: movb %bl, 1(%eax)
601 ; X86-NEXT: movb %cl, (%eax)
602 ; X86-NEXT: popl %esi
603 ; X86-NEXT: popl %edi
604 ; X86-NEXT: popl %ebx
606 %1 = call <4 x i8> @llvm.scmp(<4 x i32> %x, <4 x i32> %y)
610 define <4 x i32> @scmp_narrow_vec_op(<4 x i8> %x, <4 x i8> %y) nounwind {
611 ; X64-LABEL: scmp_narrow_vec_op:
613 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
614 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
615 ; X64-NEXT: psrad $24, %xmm1
616 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
617 ; X64-NEXT: movd %xmm2, %eax
618 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
619 ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
620 ; X64-NEXT: psrad $24, %xmm2
621 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,3,3,3]
622 ; X64-NEXT: movd %xmm0, %ecx
623 ; X64-NEXT: cmpl %eax, %ecx
626 ; X64-NEXT: subb %al, %cl
627 ; X64-NEXT: movsbl %cl, %eax
628 ; X64-NEXT: movd %eax, %xmm0
629 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
630 ; X64-NEXT: movd %xmm3, %eax
631 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
632 ; X64-NEXT: movd %xmm3, %ecx
633 ; X64-NEXT: cmpl %eax, %ecx
636 ; X64-NEXT: subb %al, %cl
637 ; X64-NEXT: movsbl %cl, %eax
638 ; X64-NEXT: movd %eax, %xmm3
639 ; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
640 ; X64-NEXT: movd %xmm1, %eax
641 ; X64-NEXT: movd %xmm2, %ecx
642 ; X64-NEXT: cmpl %eax, %ecx
645 ; X64-NEXT: subb %al, %cl
646 ; X64-NEXT: movsbl %cl, %eax
647 ; X64-NEXT: movd %eax, %xmm0
648 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
649 ; X64-NEXT: movd %xmm1, %eax
650 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1]
651 ; X64-NEXT: movd %xmm1, %ecx
652 ; X64-NEXT: cmpl %eax, %ecx
655 ; X64-NEXT: subb %al, %cl
656 ; X64-NEXT: movsbl %cl, %eax
657 ; X64-NEXT: movd %eax, %xmm1
658 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
659 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
662 ; X86-LABEL: scmp_narrow_vec_op:
664 ; X86-NEXT: pushl %ebx
665 ; X86-NEXT: pushl %edi
666 ; X86-NEXT: pushl %esi
667 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
668 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
669 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
670 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
671 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
672 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl
675 ; X86-NEXT: subb %dl, %dh
676 ; X86-NEXT: movsbl %dh, %edx
677 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl
680 ; X86-NEXT: subb %bl, %bh
681 ; X86-NEXT: movsbl %bh, %esi
682 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch
685 ; X86-NEXT: subb %ch, %bl
686 ; X86-NEXT: movsbl %bl, %edi
687 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
690 ; X86-NEXT: subb %cl, %ch
691 ; X86-NEXT: movsbl %ch, %ecx
692 ; X86-NEXT: movl %ecx, 12(%eax)
693 ; X86-NEXT: movl %edi, 8(%eax)
694 ; X86-NEXT: movl %esi, 4(%eax)
695 ; X86-NEXT: movl %edx, (%eax)
696 ; X86-NEXT: popl %esi
697 ; X86-NEXT: popl %edi
698 ; X86-NEXT: popl %ebx
700 %1 = call <4 x i32> @llvm.scmp(<4 x i8> %x, <4 x i8> %y)
704 define <16 x i32> @scmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind {
705 ; X64-LABEL: scmp_wide_vec_result:
707 ; X64-NEXT: movdqa %xmm1, %xmm3
708 ; X64-NEXT: movdqa %xmm0, %xmm2
709 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
710 ; X64-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3]
711 ; X64-NEXT: psrad $24, %xmm5
712 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,3,3,3]
713 ; X64-NEXT: movd %xmm0, %eax
714 ; X64-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
715 ; X64-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3]
716 ; X64-NEXT: psrad $24, %xmm6
717 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm6[3,3,3,3]
718 ; X64-NEXT: movd %xmm0, %ecx
719 ; X64-NEXT: cmpl %eax, %ecx
722 ; X64-NEXT: subb %al, %cl
723 ; X64-NEXT: movsbl %cl, %eax
724 ; X64-NEXT: movd %eax, %xmm0
725 ; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,2,3]
726 ; X64-NEXT: movd %xmm7, %eax
727 ; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,2,3]
728 ; X64-NEXT: movd %xmm7, %ecx
729 ; X64-NEXT: cmpl %eax, %ecx
732 ; X64-NEXT: subb %al, %cl
733 ; X64-NEXT: movsbl %cl, %eax
734 ; X64-NEXT: movd %eax, %xmm7
735 ; X64-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1]
736 ; X64-NEXT: movd %xmm5, %eax
737 ; X64-NEXT: movd %xmm6, %ecx
738 ; X64-NEXT: cmpl %eax, %ecx
741 ; X64-NEXT: subb %al, %cl
742 ; X64-NEXT: movsbl %cl, %eax
743 ; X64-NEXT: movd %eax, %xmm0
744 ; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,1,1]
745 ; X64-NEXT: movd %xmm5, %eax
746 ; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,1,1]
747 ; X64-NEXT: movd %xmm5, %ecx
748 ; X64-NEXT: cmpl %eax, %ecx
751 ; X64-NEXT: subb %al, %cl
752 ; X64-NEXT: movsbl %cl, %eax
753 ; X64-NEXT: movd %eax, %xmm5
754 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
755 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm7[0]
756 ; X64-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
757 ; X64-NEXT: psrad $24, %xmm5
758 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm5[3,3,3,3]
759 ; X64-NEXT: movd %xmm1, %eax
760 ; X64-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4,4,5,5,6,6,7,7]
761 ; X64-NEXT: psrad $24, %xmm4
762 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm4[3,3,3,3]
763 ; X64-NEXT: movd %xmm1, %ecx
764 ; X64-NEXT: cmpl %eax, %ecx
767 ; X64-NEXT: subb %al, %cl
768 ; X64-NEXT: movsbl %cl, %eax
769 ; X64-NEXT: movd %eax, %xmm1
770 ; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm5[2,3,2,3]
771 ; X64-NEXT: movd %xmm6, %eax
772 ; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm4[2,3,2,3]
773 ; X64-NEXT: movd %xmm6, %ecx
774 ; X64-NEXT: cmpl %eax, %ecx
777 ; X64-NEXT: subb %al, %cl
778 ; X64-NEXT: movsbl %cl, %eax
779 ; X64-NEXT: movd %eax, %xmm6
780 ; X64-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm1[0],xmm6[1],xmm1[1]
781 ; X64-NEXT: movd %xmm5, %eax
782 ; X64-NEXT: movd %xmm4, %ecx
783 ; X64-NEXT: cmpl %eax, %ecx
786 ; X64-NEXT: subb %al, %cl
787 ; X64-NEXT: movsbl %cl, %eax
788 ; X64-NEXT: movd %eax, %xmm1
789 ; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,1,1]
790 ; X64-NEXT: movd %xmm5, %eax
791 ; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,1,1]
792 ; X64-NEXT: movd %xmm4, %ecx
793 ; X64-NEXT: cmpl %eax, %ecx
796 ; X64-NEXT: subb %al, %cl
797 ; X64-NEXT: movsbl %cl, %eax
798 ; X64-NEXT: movd %eax, %xmm4
799 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
800 ; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm6[0]
801 ; X64-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
802 ; X64-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
803 ; X64-NEXT: psrad $24, %xmm5
804 ; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm5[3,3,3,3]
805 ; X64-NEXT: movd %xmm4, %eax
806 ; X64-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15]
807 ; X64-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3]
808 ; X64-NEXT: psrad $24, %xmm6
809 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm6[3,3,3,3]
810 ; X64-NEXT: movd %xmm2, %ecx
811 ; X64-NEXT: cmpl %eax, %ecx
814 ; X64-NEXT: subb %al, %cl
815 ; X64-NEXT: movsbl %cl, %eax
816 ; X64-NEXT: movd %eax, %xmm2
817 ; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,2,3]
818 ; X64-NEXT: movd %xmm7, %eax
819 ; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,2,3]
820 ; X64-NEXT: movd %xmm7, %ecx
821 ; X64-NEXT: cmpl %eax, %ecx
824 ; X64-NEXT: subb %al, %cl
825 ; X64-NEXT: movsbl %cl, %eax
826 ; X64-NEXT: movd %eax, %xmm7
827 ; X64-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1]
828 ; X64-NEXT: movd %xmm5, %eax
829 ; X64-NEXT: movd %xmm6, %ecx
830 ; X64-NEXT: cmpl %eax, %ecx
833 ; X64-NEXT: subb %al, %cl
834 ; X64-NEXT: movsbl %cl, %eax
835 ; X64-NEXT: movd %eax, %xmm2
836 ; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,1,1]
837 ; X64-NEXT: movd %xmm5, %eax
838 ; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,1,1]
839 ; X64-NEXT: movd %xmm5, %ecx
840 ; X64-NEXT: cmpl %eax, %ecx
843 ; X64-NEXT: subb %al, %cl
844 ; X64-NEXT: movsbl %cl, %eax
845 ; X64-NEXT: movd %eax, %xmm5
846 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
847 ; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0]
848 ; X64-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
849 ; X64-NEXT: psrad $24, %xmm5
850 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm5[3,3,3,3]
851 ; X64-NEXT: movd %xmm3, %eax
852 ; X64-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4,4,5,5,6,6,7,7]
853 ; X64-NEXT: psrad $24, %xmm4
854 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm4[3,3,3,3]
855 ; X64-NEXT: movd %xmm3, %ecx
856 ; X64-NEXT: cmpl %eax, %ecx
859 ; X64-NEXT: subb %al, %cl
860 ; X64-NEXT: movsbl %cl, %eax
861 ; X64-NEXT: movd %eax, %xmm3
862 ; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm5[2,3,2,3]
863 ; X64-NEXT: movd %xmm6, %eax
864 ; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm4[2,3,2,3]
865 ; X64-NEXT: movd %xmm6, %ecx
866 ; X64-NEXT: cmpl %eax, %ecx
869 ; X64-NEXT: subb %al, %cl
870 ; X64-NEXT: movsbl %cl, %eax
871 ; X64-NEXT: movd %eax, %xmm6
872 ; X64-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
873 ; X64-NEXT: movd %xmm5, %eax
874 ; X64-NEXT: movd %xmm4, %ecx
875 ; X64-NEXT: cmpl %eax, %ecx
878 ; X64-NEXT: subb %al, %cl
879 ; X64-NEXT: movsbl %cl, %eax
880 ; X64-NEXT: movd %eax, %xmm3
881 ; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,1,1]
882 ; X64-NEXT: movd %xmm5, %eax
883 ; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,1,1]
884 ; X64-NEXT: movd %xmm4, %ecx
885 ; X64-NEXT: cmpl %eax, %ecx
888 ; X64-NEXT: subb %al, %cl
889 ; X64-NEXT: movsbl %cl, %eax
890 ; X64-NEXT: movd %eax, %xmm4
891 ; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
892 ; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm6[0]
895 ; X86-LABEL: scmp_wide_vec_result:
897 ; X86-NEXT: pushl %ebp
898 ; X86-NEXT: pushl %ebx
899 ; X86-NEXT: pushl %edi
900 ; X86-NEXT: pushl %esi
901 ; X86-NEXT: subl $16, %esp
902 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
903 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
904 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
905 ; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
906 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
907 ; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
908 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
909 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
912 ; X86-NEXT: subb %al, %cl
913 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
914 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bh
917 ; X86-NEXT: subb %al, %cl
918 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
919 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl
922 ; X86-NEXT: subb %al, %cl
923 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
924 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dh
927 ; X86-NEXT: subb %al, %cl
928 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
929 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch
932 ; X86-NEXT: subb %al, %cl
933 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
934 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ah
937 ; X86-NEXT: subb %al, %cl
938 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
939 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl
942 ; X86-NEXT: subb %al, %cl
943 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
944 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
945 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
948 ; X86-NEXT: subb %al, %bh
949 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
950 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
953 ; X86-NEXT: subb %al, %bl
954 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
955 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
958 ; X86-NEXT: subb %al, %dh
959 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
960 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
963 ; X86-NEXT: subb %al, %dl
964 ; X86-NEXT: movsbl %dl, %eax
965 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
966 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
967 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
970 ; X86-NEXT: subb %al, %dl
971 ; X86-NEXT: movsbl %dl, %eax
972 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
973 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
974 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
977 ; X86-NEXT: subb %al, %dl
978 ; X86-NEXT: movsbl %dl, %ebp
979 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
980 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
983 ; X86-NEXT: subb %al, %dl
984 ; X86-NEXT: movsbl %dl, %edi
985 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
986 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
989 ; X86-NEXT: subb %al, %ah
990 ; X86-NEXT: movsbl %ah, %esi
991 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
992 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
995 ; X86-NEXT: subb %al, %dl
996 ; X86-NEXT: movsbl %dl, %ecx
997 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
998 ; X86-NEXT: movl %ecx, 60(%eax)
999 ; X86-NEXT: movl %esi, 56(%eax)
1000 ; X86-NEXT: movl %edi, 52(%eax)
1001 ; X86-NEXT: movl %ebp, 48(%eax)
1002 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1003 ; X86-NEXT: movl %ecx, 44(%eax)
1004 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1005 ; X86-NEXT: movl %ecx, 40(%eax)
1006 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1007 ; X86-NEXT: movsbl %dh, %edx
1008 ; X86-NEXT: movl %edx, 36(%eax)
1009 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
1010 ; X86-NEXT: movsbl %bl, %esi
1011 ; X86-NEXT: movl %esi, 32(%eax)
1012 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
1013 ; X86-NEXT: movsbl %bh, %edi
1014 ; X86-NEXT: movl %edi, 28(%eax)
1015 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
1016 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
1017 ; X86-NEXT: movl %ebx, 24(%eax)
1018 ; X86-NEXT: movl %edi, 20(%eax)
1019 ; X86-NEXT: movl %esi, 16(%eax)
1020 ; X86-NEXT: movl %edx, 12(%eax)
1021 ; X86-NEXT: movl %ecx, 8(%eax)
1022 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1023 ; X86-NEXT: movl %ecx, 4(%eax)
1024 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1025 ; X86-NEXT: movl %ecx, (%eax)
1026 ; X86-NEXT: addl $16, %esp
1027 ; X86-NEXT: popl %esi
1028 ; X86-NEXT: popl %edi
1029 ; X86-NEXT: popl %ebx
1030 ; X86-NEXT: popl %ebp
1032 %1 = call <16 x i32> @llvm.scmp(<16 x i8> %x, <16 x i8> %y)
1036 define <16 x i8> @scmp_wide_vec_op(<16 x i64> %x, <16 x i64> %y) nounwind {
1037 ; X64-LABEL: scmp_wide_vec_op:
1039 ; X64-NEXT: movq %xmm7, %rax
1040 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1041 ; X64-NEXT: setl %al
1042 ; X64-NEXT: setg %cl
1043 ; X64-NEXT: subb %al, %cl
1044 ; X64-NEXT: movzbl %cl, %eax
1045 ; X64-NEXT: movd %eax, %xmm8
1046 ; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm7[2,3,2,3]
1047 ; X64-NEXT: movq %xmm7, %rax
1048 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1049 ; X64-NEXT: setl %al
1050 ; X64-NEXT: setg %cl
1051 ; X64-NEXT: subb %al, %cl
1052 ; X64-NEXT: movzbl %cl, %eax
1053 ; X64-NEXT: movd %eax, %xmm7
1054 ; X64-NEXT: punpcklbw {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3],xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7]
1055 ; X64-NEXT: movq %xmm6, %rax
1056 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1057 ; X64-NEXT: setl %al
1058 ; X64-NEXT: setg %cl
1059 ; X64-NEXT: subb %al, %cl
1060 ; X64-NEXT: movzbl %cl, %eax
1061 ; X64-NEXT: movd %eax, %xmm7
1062 ; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
1063 ; X64-NEXT: movq %xmm6, %rax
1064 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1065 ; X64-NEXT: setl %al
1066 ; X64-NEXT: setg %cl
1067 ; X64-NEXT: subb %al, %cl
1068 ; X64-NEXT: movzbl %cl, %eax
1069 ; X64-NEXT: movd %eax, %xmm6
1070 ; X64-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3],xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7]
1071 ; X64-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
1072 ; X64-NEXT: movq %xmm5, %rax
1073 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1074 ; X64-NEXT: setl %al
1075 ; X64-NEXT: setg %cl
1076 ; X64-NEXT: subb %al, %cl
1077 ; X64-NEXT: movzbl %cl, %eax
1078 ; X64-NEXT: movd %eax, %xmm6
1079 ; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
1080 ; X64-NEXT: movq %xmm5, %rax
1081 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1082 ; X64-NEXT: setl %al
1083 ; X64-NEXT: setg %cl
1084 ; X64-NEXT: subb %al, %cl
1085 ; X64-NEXT: movzbl %cl, %eax
1086 ; X64-NEXT: movd %eax, %xmm5
1087 ; X64-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
1088 ; X64-NEXT: movq %xmm4, %rax
1089 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1090 ; X64-NEXT: setl %al
1091 ; X64-NEXT: setg %cl
1092 ; X64-NEXT: subb %al, %cl
1093 ; X64-NEXT: movzbl %cl, %eax
1094 ; X64-NEXT: movd %eax, %xmm5
1095 ; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
1096 ; X64-NEXT: movq %xmm4, %rax
1097 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1098 ; X64-NEXT: setl %al
1099 ; X64-NEXT: setg %cl
1100 ; X64-NEXT: subb %al, %cl
1101 ; X64-NEXT: movzbl %cl, %eax
1102 ; X64-NEXT: movd %eax, %xmm4
1103 ; X64-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
1104 ; X64-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
1105 ; X64-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
1106 ; X64-NEXT: movq %xmm3, %rax
1107 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1108 ; X64-NEXT: setl %al
1109 ; X64-NEXT: setg %cl
1110 ; X64-NEXT: subb %al, %cl
1111 ; X64-NEXT: movzbl %cl, %eax
1112 ; X64-NEXT: movd %eax, %xmm4
1113 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
1114 ; X64-NEXT: movq %xmm3, %rax
1115 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1116 ; X64-NEXT: setl %al
1117 ; X64-NEXT: setg %cl
1118 ; X64-NEXT: subb %al, %cl
1119 ; X64-NEXT: movzbl %cl, %eax
1120 ; X64-NEXT: movd %eax, %xmm3
1121 ; X64-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
1122 ; X64-NEXT: movq %xmm2, %rax
1123 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1124 ; X64-NEXT: setl %al
1125 ; X64-NEXT: setg %cl
1126 ; X64-NEXT: subb %al, %cl
1127 ; X64-NEXT: movzbl %cl, %eax
1128 ; X64-NEXT: movd %eax, %xmm3
1129 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
1130 ; X64-NEXT: movq %xmm2, %rax
1131 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1132 ; X64-NEXT: setl %al
1133 ; X64-NEXT: setg %cl
1134 ; X64-NEXT: subb %al, %cl
1135 ; X64-NEXT: movzbl %cl, %eax
1136 ; X64-NEXT: movd %eax, %xmm2
1137 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1138 ; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1139 ; X64-NEXT: movq %xmm1, %rax
1140 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1141 ; X64-NEXT: setl %al
1142 ; X64-NEXT: setg %cl
1143 ; X64-NEXT: subb %al, %cl
1144 ; X64-NEXT: movzbl %cl, %eax
1145 ; X64-NEXT: movd %eax, %xmm2
1146 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1147 ; X64-NEXT: movq %xmm1, %rax
1148 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1149 ; X64-NEXT: setl %al
1150 ; X64-NEXT: setg %cl
1151 ; X64-NEXT: subb %al, %cl
1152 ; X64-NEXT: movzbl %cl, %eax
1153 ; X64-NEXT: movd %eax, %xmm1
1154 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1155 ; X64-NEXT: movq %xmm0, %rax
1156 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1157 ; X64-NEXT: setl %al
1158 ; X64-NEXT: setg %cl
1159 ; X64-NEXT: subb %al, %cl
1160 ; X64-NEXT: movzbl %cl, %eax
1161 ; X64-NEXT: movd %eax, %xmm1
1162 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1163 ; X64-NEXT: movq %xmm0, %rax
1164 ; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1165 ; X64-NEXT: setl %al
1166 ; X64-NEXT: setg %cl
1167 ; X64-NEXT: subb %al, %cl
1168 ; X64-NEXT: movzbl %cl, %eax
1169 ; X64-NEXT: movd %eax, %xmm0
1170 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1171 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1172 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1173 ; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
1174 ; X64-NEXT: movdqa %xmm1, %xmm0
1177 ; X86-LABEL: scmp_wide_vec_op:
1179 ; X86-NEXT: pushl %ebp
1180 ; X86-NEXT: pushl %ebx
1181 ; X86-NEXT: pushl %edi
1182 ; X86-NEXT: pushl %esi
1183 ; X86-NEXT: subl $12, %esp
1184 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1185 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1186 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1187 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1188 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
1189 ; X86-NEXT: cmpl %edx, %edi
1190 ; X86-NEXT: movl %ebx, %ebp
1191 ; X86-NEXT: sbbl %esi, %ebp
1192 ; X86-NEXT: setl %al
1193 ; X86-NEXT: cmpl %edi, %edx
1194 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1195 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
1196 ; X86-NEXT: sbbl %ebx, %esi
1197 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
1198 ; X86-NEXT: setl %ah
1199 ; X86-NEXT: subb %al, %ah
1200 ; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1201 ; X86-NEXT: cmpl %ecx, %ebp
1202 ; X86-NEXT: movl %ebx, %eax
1203 ; X86-NEXT: sbbl %edx, %eax
1204 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1205 ; X86-NEXT: setl %al
1206 ; X86-NEXT: cmpl %ebp, %ecx
1207 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1208 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1209 ; X86-NEXT: sbbl %ebx, %edx
1210 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1211 ; X86-NEXT: setl %ah
1212 ; X86-NEXT: subb %al, %ah
1213 ; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1214 ; X86-NEXT: cmpl %edi, %ecx
1215 ; X86-NEXT: movl %edx, %eax
1216 ; X86-NEXT: sbbl %esi, %eax
1217 ; X86-NEXT: setl %al
1218 ; X86-NEXT: cmpl %ecx, %edi
1219 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1220 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1221 ; X86-NEXT: sbbl %edx, %esi
1222 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1223 ; X86-NEXT: setl %dl
1224 ; X86-NEXT: subb %al, %dl
1225 ; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
1227 ; X86-NEXT: cmpl %ebp, %edi
1228 ; X86-NEXT: movl %esi, %eax
1229 ; X86-NEXT: sbbl %ecx, %eax
1230 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1231 ; X86-NEXT: setl %bl
1232 ; X86-NEXT: cmpl %edi, %ebp
1233 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1234 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1235 ; X86-NEXT: sbbl %esi, %ecx
1236 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1237 ; X86-NEXT: setl %cl
1238 ; X86-NEXT: subb %bl, %cl
1239 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1240 ; X86-NEXT: cmpl %edx, %edi
1241 ; X86-NEXT: movl %esi, %ecx
1242 ; X86-NEXT: sbbl %eax, %ecx
1243 ; X86-NEXT: setl %bl
1244 ; X86-NEXT: cmpl %edi, %edx
1245 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1246 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1247 ; X86-NEXT: sbbl %esi, %eax
1248 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1249 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1250 ; X86-NEXT: setl %bh
1251 ; X86-NEXT: subb %bl, %bh
1252 ; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1253 ; X86-NEXT: cmpl %edx, %eax
1254 ; X86-NEXT: movl %esi, %edi
1255 ; X86-NEXT: sbbl %ecx, %edi
1256 ; X86-NEXT: setl %bl
1257 ; X86-NEXT: cmpl %eax, %edx
1258 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1259 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1260 ; X86-NEXT: sbbl %esi, %ecx
1261 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1262 ; X86-NEXT: setl %bh
1263 ; X86-NEXT: subb %bl, %bh
1264 ; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1265 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1266 ; X86-NEXT: cmpl %edx, %ecx
1267 ; X86-NEXT: movl %esi, %edi
1268 ; X86-NEXT: sbbl %eax, %edi
1269 ; X86-NEXT: setl %bl
1270 ; X86-NEXT: cmpl %ecx, %edx
1271 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1272 ; X86-NEXT: sbbl %esi, %eax
1273 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1274 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1275 ; X86-NEXT: setl %bh
1276 ; X86-NEXT: subb %bl, %bh
1277 ; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1278 ; X86-NEXT: cmpl %ecx, %edx
1279 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1280 ; X86-NEXT: movl %esi, %edi
1281 ; X86-NEXT: sbbl %eax, %edi
1282 ; X86-NEXT: setl %bl
1283 ; X86-NEXT: cmpl %edx, %ecx
1284 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1285 ; X86-NEXT: sbbl %esi, %eax
1286 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1287 ; X86-NEXT: setl %dl
1288 ; X86-NEXT: subb %bl, %dl
1289 ; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1290 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1291 ; X86-NEXT: cmpl %ecx, %edx
1292 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1293 ; X86-NEXT: movl %esi, %edi
1294 ; X86-NEXT: sbbl %eax, %edi
1295 ; X86-NEXT: setl %bl
1296 ; X86-NEXT: cmpl %edx, %ecx
1297 ; X86-NEXT: sbbl %esi, %eax
1298 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1299 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1300 ; X86-NEXT: setl %dl
1301 ; X86-NEXT: subb %bl, %dl
1302 ; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1303 ; X86-NEXT: cmpl %eax, %ecx
1304 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1305 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1306 ; X86-NEXT: movl %esi, %edi
1307 ; X86-NEXT: sbbl %edx, %edi
1308 ; X86-NEXT: setl %bl
1309 ; X86-NEXT: cmpl %ecx, %eax
1310 ; X86-NEXT: sbbl %esi, %edx
1311 ; X86-NEXT: setl %al
1312 ; X86-NEXT: subb %bl, %al
1313 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1314 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
1315 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1316 ; X86-NEXT: cmpl %ebp, %ecx
1317 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1318 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1319 ; X86-NEXT: movl %esi, %edi
1320 ; X86-NEXT: sbbl %edx, %edi
1321 ; X86-NEXT: setl %al
1322 ; X86-NEXT: cmpl %ecx, %ebp
1323 ; X86-NEXT: sbbl %esi, %edx
1324 ; X86-NEXT: setl %cl
1325 ; X86-NEXT: subb %al, %cl
1326 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1327 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
1328 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1329 ; X86-NEXT: cmpl %ebp, %ecx
1330 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1331 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1332 ; X86-NEXT: movl %esi, %edi
1333 ; X86-NEXT: sbbl %edx, %edi
1334 ; X86-NEXT: setl %al
1335 ; X86-NEXT: cmpl %ecx, %ebp
1336 ; X86-NEXT: sbbl %esi, %edx
1337 ; X86-NEXT: setl %cl
1338 ; X86-NEXT: subb %al, %cl
1339 ; X86-NEXT: movb %cl, (%esp) # 1-byte Spill
1340 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1341 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1342 ; X86-NEXT: cmpl %eax, %ecx
1343 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1344 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1345 ; X86-NEXT: movl %edi, %ebp
1346 ; X86-NEXT: sbbl %esi, %ebp
1347 ; X86-NEXT: setl %dl
1348 ; X86-NEXT: cmpl %ecx, %eax
1349 ; X86-NEXT: sbbl %edi, %esi
1350 ; X86-NEXT: setl %ch
1351 ; X86-NEXT: subb %dl, %ch
1352 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1353 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1354 ; X86-NEXT: cmpl %edx, %esi
1355 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1356 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1357 ; X86-NEXT: movl %eax, %ebp
1358 ; X86-NEXT: sbbl %edi, %ebp
1359 ; X86-NEXT: setl %cl
1360 ; X86-NEXT: cmpl %esi, %edx
1361 ; X86-NEXT: sbbl %eax, %edi
1362 ; X86-NEXT: setl %dl
1363 ; X86-NEXT: subb %cl, %dl
1364 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
1365 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1366 ; X86-NEXT: cmpl %ebx, %esi
1367 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1368 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1369 ; X86-NEXT: movl %eax, %ebp
1370 ; X86-NEXT: sbbl %edi, %ebp
1371 ; X86-NEXT: setl %dh
1372 ; X86-NEXT: cmpl %esi, %ebx
1373 ; X86-NEXT: sbbl %eax, %edi
1374 ; X86-NEXT: setl %cl
1375 ; X86-NEXT: subb %dh, %cl
1376 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1377 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1378 ; X86-NEXT: cmpl %eax, %esi
1379 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1380 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
1381 ; X86-NEXT: movl %ebx, %ebp
1382 ; X86-NEXT: sbbl %edi, %ebp
1383 ; X86-NEXT: setl %dh
1384 ; X86-NEXT: cmpl %esi, %eax
1385 ; X86-NEXT: sbbl %ebx, %edi
1386 ; X86-NEXT: setl %bl
1387 ; X86-NEXT: subb %dh, %bl
1388 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1389 ; X86-NEXT: movb %bl, 15(%eax)
1390 ; X86-NEXT: movb %cl, 14(%eax)
1391 ; X86-NEXT: movb %dl, 13(%eax)
1392 ; X86-NEXT: movb %ch, 12(%eax)
1393 ; X86-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
1394 ; X86-NEXT: movb %cl, 11(%eax)
1395 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1396 ; X86-NEXT: movb %cl, 10(%eax)
1397 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1398 ; X86-NEXT: movb %cl, 9(%eax)
1399 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1400 ; X86-NEXT: movb %cl, 8(%eax)
1401 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1402 ; X86-NEXT: movb %cl, 7(%eax)
1403 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1404 ; X86-NEXT: movb %cl, 6(%eax)
1405 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1406 ; X86-NEXT: movb %cl, 5(%eax)
1407 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1408 ; X86-NEXT: movb %cl, 4(%eax)
1409 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1410 ; X86-NEXT: movb %cl, 3(%eax)
1411 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1412 ; X86-NEXT: movb %cl, 2(%eax)
1413 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1414 ; X86-NEXT: movb %cl, 1(%eax)
1415 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1416 ; X86-NEXT: movb %cl, (%eax)
1417 ; X86-NEXT: addl $12, %esp
1418 ; X86-NEXT: popl %esi
1419 ; X86-NEXT: popl %edi
1420 ; X86-NEXT: popl %ebx
1421 ; X86-NEXT: popl %ebp
1423 %1 = call <16 x i8> @llvm.scmp(<16 x i64> %x, <16 x i64> %y)
1427 define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
1428 ; X64-LABEL: scmp_uncommon_vectors:
1430 ; X64-NEXT: pushq %rbp
1431 ; X64-NEXT: pushq %r15
1432 ; X64-NEXT: pushq %r14
1433 ; X64-NEXT: pushq %r13
1434 ; X64-NEXT: pushq %r12
1435 ; X64-NEXT: pushq %rbx
1436 ; X64-NEXT: movq %rdi, %rax
1437 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
1438 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
1439 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
1440 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
1441 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
1442 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
1443 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
1444 ; X64-NEXT: addb %r15b, %r15b
1445 ; X64-NEXT: sarb %r15b
1446 ; X64-NEXT: addb %sil, %sil
1447 ; X64-NEXT: sarb %sil
1448 ; X64-NEXT: cmpb %r15b, %sil
1449 ; X64-NEXT: setl %sil
1450 ; X64-NEXT: setg %r15b
1451 ; X64-NEXT: subb %sil, %r15b
1452 ; X64-NEXT: movsbq %r15b, %rsi
1453 ; X64-NEXT: movq %rsi, (%rax)
1454 ; X64-NEXT: movq %rsi, %xmm0
1455 ; X64-NEXT: sarq $63, %rsi
1456 ; X64-NEXT: addb %r14b, %r14b
1457 ; X64-NEXT: sarb %r14b
1458 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
1459 ; X64-NEXT: addb %r15b, %r15b
1460 ; X64-NEXT: sarb %r15b
1461 ; X64-NEXT: cmpb %r14b, %r15b
1462 ; X64-NEXT: setl %r14b
1463 ; X64-NEXT: setg %r15b
1464 ; X64-NEXT: subb %r14b, %r15b
1465 ; X64-NEXT: movsbq %r15b, %r14
1466 ; X64-NEXT: movq %r14, %r15
1467 ; X64-NEXT: sarq $63, %r15
1468 ; X64-NEXT: addb %bpl, %bpl
1469 ; X64-NEXT: sarb %bpl
1470 ; X64-NEXT: addb %dl, %dl
1471 ; X64-NEXT: sarb %dl
1472 ; X64-NEXT: cmpb %bpl, %dl
1473 ; X64-NEXT: setl %dl
1474 ; X64-NEXT: setg %bpl
1475 ; X64-NEXT: subb %dl, %bpl
1476 ; X64-NEXT: movsbq %bpl, %rdx
1477 ; X64-NEXT: movq %rdx, %r12
1478 ; X64-NEXT: sarq $63, %r12
1479 ; X64-NEXT: addb %bl, %bl
1480 ; X64-NEXT: sarb %bl
1481 ; X64-NEXT: addb %cl, %cl
1482 ; X64-NEXT: sarb %cl
1483 ; X64-NEXT: cmpb %bl, %cl
1484 ; X64-NEXT: setl %cl
1485 ; X64-NEXT: setg %bl
1486 ; X64-NEXT: subb %cl, %bl
1487 ; X64-NEXT: movsbq %bl, %rbx
1488 ; X64-NEXT: movq %rbx, %rcx
1489 ; X64-NEXT: sarq $63, %rcx
1490 ; X64-NEXT: addb %r11b, %r11b
1491 ; X64-NEXT: sarb %r11b
1492 ; X64-NEXT: addb %r8b, %r8b
1493 ; X64-NEXT: sarb %r8b
1494 ; X64-NEXT: cmpb %r11b, %r8b
1495 ; X64-NEXT: setl %r8b
1496 ; X64-NEXT: setg %r11b
1497 ; X64-NEXT: subb %r8b, %r11b
1498 ; X64-NEXT: movsbq %r11b, %r8
1499 ; X64-NEXT: movq %r8, %r11
1500 ; X64-NEXT: sarq $63, %r11
1501 ; X64-NEXT: addb %r10b, %r10b
1502 ; X64-NEXT: sarb %r10b
1503 ; X64-NEXT: addb %r9b, %r9b
1504 ; X64-NEXT: sarb %r9b
1505 ; X64-NEXT: cmpb %r10b, %r9b
1506 ; X64-NEXT: setl %r9b
1507 ; X64-NEXT: setg %r10b
1508 ; X64-NEXT: subb %r9b, %r10b
1509 ; X64-NEXT: movsbq %r10b, %r9
1510 ; X64-NEXT: movq %r9, %r10
1511 ; X64-NEXT: sarq $63, %r10
1512 ; X64-NEXT: addb %dil, %dil
1513 ; X64-NEXT: sarb %dil
1514 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
1515 ; X64-NEXT: addb %bpl, %bpl
1516 ; X64-NEXT: sarb %bpl
1517 ; X64-NEXT: cmpb %dil, %bpl
1518 ; X64-NEXT: setl %dil
1519 ; X64-NEXT: setg %bpl
1520 ; X64-NEXT: subb %dil, %bpl
1521 ; X64-NEXT: movsbq %bpl, %r13
1522 ; X64-NEXT: movq %r13, %rbp
1523 ; X64-NEXT: sarq $63, %rbp
1524 ; X64-NEXT: movq %rbp, %rdi
1525 ; X64-NEXT: shldq $62, %r13, %rdi
1526 ; X64-NEXT: movq %rdi, 88(%rax)
1527 ; X64-NEXT: shrq $2, %rbp
1528 ; X64-NEXT: movl %ebp, 96(%rax)
1529 ; X64-NEXT: movq %r10, %rdi
1530 ; X64-NEXT: shldq $20, %r9, %rdi
1531 ; X64-NEXT: movq %rdi, 64(%rax)
1532 ; X64-NEXT: movq %r11, %rdi
1533 ; X64-NEXT: shldq $31, %r8, %rdi
1534 ; X64-NEXT: movq %rdi, 48(%rax)
1535 ; X64-NEXT: movq %rcx, %rdi
1536 ; X64-NEXT: shldq $42, %rbx, %rdi
1537 ; X64-NEXT: movq %rdi, 32(%rax)
1538 ; X64-NEXT: movabsq $9007199254738944, %rdi # imm = 0x1FFFFFFFFFF800
1539 ; X64-NEXT: andq %r12, %rdi
1540 ; X64-NEXT: shldq $53, %rdx, %r12
1541 ; X64-NEXT: movq %r12, 16(%rax)
1542 ; X64-NEXT: movabsq $9007199254740991, %r12 # imm = 0x1FFFFFFFFFFFFF
1543 ; X64-NEXT: andq %r12, %r15
1544 ; X64-NEXT: shldq $9, %r14, %r15
1545 ; X64-NEXT: shlq $62, %r13
1546 ; X64-NEXT: orq %r15, %r13
1547 ; X64-NEXT: movq %r13, 80(%rax)
1548 ; X64-NEXT: movabsq $2251799813685247, %r15 # imm = 0x7FFFFFFFFFFFF
1549 ; X64-NEXT: andq %rbp, %r15
1550 ; X64-NEXT: movq %r15, %r13
1551 ; X64-NEXT: shrq $48, %r13
1552 ; X64-NEXT: movb %r13b, 102(%rax)
1553 ; X64-NEXT: shrq $32, %r15
1554 ; X64-NEXT: movw %r15w, 100(%rax)
1555 ; X64-NEXT: shlq $42, %rbx
1556 ; X64-NEXT: shrq $11, %rdi
1557 ; X64-NEXT: orq %rbx, %rdi
1558 ; X64-NEXT: movq %rdi, 24(%rax)
1559 ; X64-NEXT: shlq $9, %r14
1560 ; X64-NEXT: shrq $44, %r10
1561 ; X64-NEXT: andl $511, %r10d # imm = 0x1FF
1562 ; X64-NEXT: orq %r14, %r10
1563 ; X64-NEXT: movq %r10, 72(%rax)
1564 ; X64-NEXT: shlq $20, %r9
1565 ; X64-NEXT: shrq $33, %r11
1566 ; X64-NEXT: andl $1048575, %r11d # imm = 0xFFFFF
1567 ; X64-NEXT: orq %r9, %r11
1568 ; X64-NEXT: movq %r11, 56(%rax)
1569 ; X64-NEXT: shlq $31, %r8
1570 ; X64-NEXT: shrq $22, %rcx
1571 ; X64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
1572 ; X64-NEXT: orq %r8, %rcx
1573 ; X64-NEXT: movq %rcx, 40(%rax)
1574 ; X64-NEXT: movq %rsi, %xmm1
1575 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1576 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1577 ; X64-NEXT: movq %xmm0, %rcx
1578 ; X64-NEXT: andq %r12, %rcx
1579 ; X64-NEXT: shlq $53, %rdx
1580 ; X64-NEXT: orq %rcx, %rdx
1581 ; X64-NEXT: movq %rdx, 8(%rax)
1582 ; X64-NEXT: popq %rbx
1583 ; X64-NEXT: popq %r12
1584 ; X64-NEXT: popq %r13
1585 ; X64-NEXT: popq %r14
1586 ; X64-NEXT: popq %r15
1587 ; X64-NEXT: popq %rbp
1590 ; X86-LABEL: scmp_uncommon_vectors:
1592 ; X86-NEXT: pushl %ebp
1593 ; X86-NEXT: pushl %ebx
1594 ; X86-NEXT: pushl %edi
1595 ; X86-NEXT: pushl %esi
1596 ; X86-NEXT: subl $52, %esp
1597 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1598 ; X86-NEXT: addb %al, %al
1599 ; X86-NEXT: sarb %al
1600 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1601 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1602 ; X86-NEXT: addb %al, %al
1603 ; X86-NEXT: sarb %al
1604 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1605 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1606 ; X86-NEXT: addb %al, %al
1607 ; X86-NEXT: sarb %al
1608 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1609 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1610 ; X86-NEXT: addb %al, %al
1611 ; X86-NEXT: sarb %al
1612 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1613 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1614 ; X86-NEXT: addb %al, %al
1615 ; X86-NEXT: sarb %al
1616 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1617 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1618 ; X86-NEXT: addb %al, %al
1619 ; X86-NEXT: sarb %al
1620 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1621 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
1622 ; X86-NEXT: addb %dl, %dl
1623 ; X86-NEXT: sarb %dl
1624 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
1625 ; X86-NEXT: addb %ah, %ah
1626 ; X86-NEXT: sarb %ah
1627 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1628 ; X86-NEXT: addb %cl, %cl
1629 ; X86-NEXT: sarb %cl
1630 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
1631 ; X86-NEXT: addb %ch, %ch
1632 ; X86-NEXT: sarb %ch
1633 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
1634 ; X86-NEXT: addb %bl, %bl
1635 ; X86-NEXT: sarb %bl
1636 ; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
1637 ; X86-NEXT: addb %bh, %bh
1638 ; X86-NEXT: sarb %bh
1639 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1640 ; X86-NEXT: addb %al, %al
1641 ; X86-NEXT: sarb %al
1642 ; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
1643 ; X86-NEXT: addb %dh, %dh
1644 ; X86-NEXT: sarb %dh
1645 ; X86-NEXT: cmpb %al, %dh
1646 ; X86-NEXT: setl %al
1647 ; X86-NEXT: setg %dh
1648 ; X86-NEXT: subb %al, %dh
1649 ; X86-NEXT: movsbl %dh, %esi
1650 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1651 ; X86-NEXT: sarl $31, %esi
1652 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1653 ; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
1654 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1655 ; X86-NEXT: cmpb %bl, %bh
1656 ; X86-NEXT: setl %al
1657 ; X86-NEXT: setg %dh
1658 ; X86-NEXT: subb %al, %dh
1659 ; X86-NEXT: movsbl %dh, %esi
1660 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1661 ; X86-NEXT: sarl $31, %esi
1662 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1663 ; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
1664 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1665 ; X86-NEXT: cmpb %cl, %ch
1666 ; X86-NEXT: setl %al
1667 ; X86-NEXT: setg %cl
1668 ; X86-NEXT: subb %al, %cl
1669 ; X86-NEXT: movsbl %cl, %ecx
1670 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
1671 ; X86-NEXT: movl %ecx, (%ebp)
1672 ; X86-NEXT: sarl $31, %ecx
1673 ; X86-NEXT: movl %ecx, %esi
1674 ; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
1675 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1676 ; X86-NEXT: cmpb %dl, %ah
1677 ; X86-NEXT: setl %al
1678 ; X86-NEXT: setg %dl
1679 ; X86-NEXT: subb %al, %dl
1680 ; X86-NEXT: movsbl %dl, %edi
1681 ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1682 ; X86-NEXT: sarl $31, %edi
1683 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
1684 ; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
1685 ; X86-NEXT: setl %al
1686 ; X86-NEXT: setg %dl
1687 ; X86-NEXT: subb %al, %dl
1688 ; X86-NEXT: movsbl %dl, %esi
1689 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1690 ; X86-NEXT: sarl $31, %esi
1691 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
1692 ; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
1693 ; X86-NEXT: setl %al
1694 ; X86-NEXT: setg %dl
1695 ; X86-NEXT: subb %al, %dl
1696 ; X86-NEXT: movsbl %dl, %eax
1697 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1698 ; X86-NEXT: sarl $31, %eax
1699 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
1700 ; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload
1701 ; X86-NEXT: setl %dl
1702 ; X86-NEXT: setg %dh
1703 ; X86-NEXT: subb %dl, %dh
1704 ; X86-NEXT: movsbl %dh, %edx
1705 ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1706 ; X86-NEXT: sarl $31, %edx
1707 ; X86-NEXT: movl %edx, 96(%ebp)
1708 ; X86-NEXT: movl %edx, 92(%ebp)
1709 ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1710 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
1711 ; X86-NEXT: movl %ebx, 80(%ebp)
1712 ; X86-NEXT: movl %eax, 68(%ebp)
1713 ; X86-NEXT: movl %eax, 64(%ebp)
1714 ; X86-NEXT: movl %esi, 52(%ebp)
1715 ; X86-NEXT: movl %esi, 48(%ebp)
1716 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
1717 ; X86-NEXT: movl %ebx, 36(%ebp)
1718 ; X86-NEXT: movl %edi, 24(%ebp)
1719 ; X86-NEXT: movl %edi, 20(%ebp)
1720 ; X86-NEXT: movl %ecx, 8(%ebp)
1721 ; X86-NEXT: movl %ecx, 4(%ebp)
1722 ; X86-NEXT: movl %edx, %ecx
1723 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1724 ; X86-NEXT: shldl $30, %edx, %ecx
1725 ; X86-NEXT: movl %ecx, 88(%ebp)
1726 ; X86-NEXT: movl %ebp, %ebx
1727 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1728 ; X86-NEXT: shldl $9, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
1729 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
1730 ; X86-NEXT: shldl $9, %ebp, %ecx
1731 ; X86-NEXT: movl %ebx, %ebp
1732 ; X86-NEXT: movl %ecx, 76(%ebx)
1733 ; X86-NEXT: movl %eax, %ecx
1734 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
1735 ; X86-NEXT: shldl $20, %ebx, %ecx
1736 ; X86-NEXT: movl %ecx, 60(%ebp)
1737 ; X86-NEXT: movl %esi, %ecx
1738 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
1739 ; X86-NEXT: shldl $31, %ebx, %ecx
1740 ; X86-NEXT: movl %ecx, 44(%ebp)
1741 ; X86-NEXT: movl %ebp, %ebx
1742 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1743 ; X86-NEXT: shldl $10, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
1744 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
1745 ; X86-NEXT: shldl $10, %ebp, %ecx
1746 ; X86-NEXT: movl %ecx, 32(%ebx)
1747 ; X86-NEXT: movl %edi, %ecx
1748 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
1749 ; X86-NEXT: shldl $21, %ebp, %ecx
1750 ; X86-NEXT: movl %ecx, 16(%ebx)
1751 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1752 ; X86-NEXT: movl %edx, %ecx
1753 ; X86-NEXT: shrl $2, %ecx
1754 ; X86-NEXT: movw %cx, 100(%ebx)
1755 ; X86-NEXT: shll $21, %ebp
1756 ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
1757 ; X86-NEXT: movl %ebp, 12(%ebx)
1758 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1759 ; X86-NEXT: shll $30, %ecx
1760 ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
1761 ; X86-NEXT: movl %ecx, 84(%ebx)
1762 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1763 ; X86-NEXT: shll $9, %ecx
1764 ; X86-NEXT: shrl $12, %eax
1765 ; X86-NEXT: andl $511, %eax # imm = 0x1FF
1766 ; X86-NEXT: orl %ecx, %eax
1767 ; X86-NEXT: movl %eax, 72(%ebx)
1768 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
1769 ; X86-NEXT: shll $20, %eax
1770 ; X86-NEXT: shrl %esi
1771 ; X86-NEXT: andl $1048575, %esi # imm = 0xFFFFF
1772 ; X86-NEXT: orl %eax, %esi
1773 ; X86-NEXT: movl %esi, 56(%ebx)
1774 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
1775 ; X86-NEXT: shll $31, %eax
1776 ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
1777 ; X86-NEXT: movl %eax, 40(%ebx)
1778 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
1779 ; X86-NEXT: shll $10, %eax
1780 ; X86-NEXT: shrl $11, %edi
1781 ; X86-NEXT: andl $1023, %edi # imm = 0x3FF
1782 ; X86-NEXT: orl %eax, %edi
1783 ; X86-NEXT: movl %edi, 28(%ebx)
1784 ; X86-NEXT: movl %edx, %eax
1785 ; X86-NEXT: shrl $18, %eax
1786 ; X86-NEXT: andl $7, %eax
1787 ; X86-NEXT: movb %al, 102(%ebx)
1788 ; X86-NEXT: movl %ebx, %eax
1789 ; X86-NEXT: addl $52, %esp
1790 ; X86-NEXT: popl %esi
1791 ; X86-NEXT: popl %edi
1792 ; X86-NEXT: popl %ebx
1793 ; X86-NEXT: popl %ebp
1795 %1 = call <7 x i117> @llvm.scmp(<7 x i7> %x, <7 x i7> %y)
1799 define <1 x i3> @scmp_scalarize(<1 x i33> %x, <1 x i33> %y) nounwind {
1800 ; X64-LABEL: scmp_scalarize:
1802 ; X64-NEXT: shlq $31, %rsi
1803 ; X64-NEXT: sarq $31, %rsi
1804 ; X64-NEXT: shlq $31, %rdi
1805 ; X64-NEXT: sarq $31, %rdi
1806 ; X64-NEXT: cmpq %rsi, %rdi
1807 ; X64-NEXT: setl %cl
1808 ; X64-NEXT: setg %al
1809 ; X64-NEXT: subb %cl, %al
1812 ; X86-LABEL: scmp_scalarize:
1814 ; X86-NEXT: pushl %ebx
1815 ; X86-NEXT: pushl %edi
1816 ; X86-NEXT: pushl %esi
1817 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1818 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1819 ; X86-NEXT: andl $1, %eax
1820 ; X86-NEXT: negl %eax
1821 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1822 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1823 ; X86-NEXT: andl $1, %esi
1824 ; X86-NEXT: negl %esi
1825 ; X86-NEXT: cmpl %ecx, %edx
1826 ; X86-NEXT: movl %esi, %edi
1827 ; X86-NEXT: sbbl %eax, %edi
1828 ; X86-NEXT: setl %bl
1829 ; X86-NEXT: cmpl %edx, %ecx
1830 ; X86-NEXT: sbbl %esi, %eax
1831 ; X86-NEXT: setl %al
1832 ; X86-NEXT: subb %bl, %al
1833 ; X86-NEXT: popl %esi
1834 ; X86-NEXT: popl %edi
1835 ; X86-NEXT: popl %ebx
1837 %1 = call <1 x i3> @llvm.scmp(<1 x i33> %x, <1 x i33> %y)
1841 define <2 x i8> @scmp_bool_operands(<2 x i1> %x, <2 x i1> %y) nounwind {
1842 ; X64-LABEL: scmp_bool_operands:
1844 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1845 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1846 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1847 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
1848 ; X64-NEXT: andb $1, %al
1849 ; X64-NEXT: negb %al
1850 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
1851 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
1852 ; X64-NEXT: andb $1, %dl
1853 ; X64-NEXT: negb %dl
1854 ; X64-NEXT: cmpb %al, %dl
1855 ; X64-NEXT: setl %al
1856 ; X64-NEXT: setg %dl
1857 ; X64-NEXT: subb %al, %dl
1858 ; X64-NEXT: movzbl %dl, %eax
1859 ; X64-NEXT: andb $1, %cl
1860 ; X64-NEXT: negb %cl
1861 ; X64-NEXT: andb $1, %sil
1862 ; X64-NEXT: negb %sil
1863 ; X64-NEXT: cmpb %cl, %sil
1864 ; X64-NEXT: setl %cl
1865 ; X64-NEXT: setg %dl
1866 ; X64-NEXT: subb %cl, %dl
1867 ; X64-NEXT: movzbl %dl, %ecx
1868 ; X64-NEXT: shll $8, %ecx
1869 ; X64-NEXT: orl %eax, %ecx
1870 ; X64-NEXT: movd %ecx, %xmm0
1873 ; X86-LABEL: scmp_bool_operands:
1875 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1876 ; X86-NEXT: andb $1, %cl
1877 ; X86-NEXT: negb %cl
1878 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
1879 ; X86-NEXT: andb $1, %dl
1880 ; X86-NEXT: negb %dl
1881 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1882 ; X86-NEXT: andb $1, %al
1883 ; X86-NEXT: negb %al
1884 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
1885 ; X86-NEXT: andb $1, %ah
1886 ; X86-NEXT: negb %ah
1887 ; X86-NEXT: cmpb %al, %ah
1888 ; X86-NEXT: setl %ah
1889 ; X86-NEXT: setg %al
1890 ; X86-NEXT: subb %ah, %al
1891 ; X86-NEXT: cmpb %cl, %dl
1892 ; X86-NEXT: setl %cl
1893 ; X86-NEXT: setg %dl
1894 ; X86-NEXT: subb %cl, %dl
1896 %1 = call <2 x i8> @llvm.scmp(<2 x i1> %x, <2 x i1> %y)
1900 define <2 x i16> @scmp_ret_wider_than_operands(<2 x i8> %x, <2 x i8> %y) nounwind {
1901 ; X64-LABEL: scmp_ret_wider_than_operands:
1903 ; X64-NEXT: movd %xmm1, %eax
1904 ; X64-NEXT: movl %eax, %ecx
1905 ; X64-NEXT: shrl $8, %ecx
1906 ; X64-NEXT: movd %xmm0, %edx
1907 ; X64-NEXT: movl %edx, %esi
1908 ; X64-NEXT: shrl $8, %esi
1909 ; X64-NEXT: cmpb %cl, %sil
1910 ; X64-NEXT: setl %cl
1911 ; X64-NEXT: setg %sil
1912 ; X64-NEXT: subb %cl, %sil
1913 ; X64-NEXT: movsbl %sil, %ecx
1914 ; X64-NEXT: cmpb %al, %dl
1915 ; X64-NEXT: setl %al
1916 ; X64-NEXT: setg %dl
1917 ; X64-NEXT: subb %al, %dl
1918 ; X64-NEXT: movsbl %dl, %eax
1919 ; X64-NEXT: movd %eax, %xmm0
1920 ; X64-NEXT: pinsrw $1, %ecx, %xmm0
1923 ; X86-LABEL: scmp_ret_wider_than_operands:
1925 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1926 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1927 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
1928 ; X86-NEXT: setl %al
1929 ; X86-NEXT: setg %dl
1930 ; X86-NEXT: subb %al, %dl
1931 ; X86-NEXT: movsbl %dl, %eax
1932 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
1933 ; X86-NEXT: setl %cl
1934 ; X86-NEXT: setg %dl
1935 ; X86-NEXT: subb %cl, %dl
1936 ; X86-NEXT: movsbl %dl, %edx
1937 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
1938 ; X86-NEXT: # kill: def $dx killed $dx killed $edx
1940 %1 = call <2 x i16> @llvm.scmp(<2 x i8> %x, <2 x i8> %y)