1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE,SSE4
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
6 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
8 define i8 @scmp.8.8(i8 %x, i8 %y) nounwind {
11 ; X64-NEXT: cmpb %sil, %dil
14 ; X64-NEXT: subb %cl, %al
17 ; X86-LABEL: scmp.8.8:
19 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
20 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
23 ; X86-NEXT: subb %cl, %al
25 %1 = call i8 @llvm.scmp(i8 %x, i8 %y)
29 define i8 @scmp.8.16(i16 %x, i16 %y) nounwind {
30 ; X64-LABEL: scmp.8.16:
32 ; X64-NEXT: cmpw %si, %di
35 ; X64-NEXT: subb %cl, %al
38 ; X86-LABEL: scmp.8.16:
40 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
41 ; X86-NEXT: cmpw {{[0-9]+}}(%esp), %ax
44 ; X86-NEXT: subb %cl, %al
46 %1 = call i8 @llvm.scmp(i16 %x, i16 %y)
50 define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
51 ; X64-LABEL: scmp.8.32:
53 ; X64-NEXT: cmpl %esi, %edi
56 ; X64-NEXT: subb %cl, %al
59 ; X86-LABEL: scmp.8.32:
61 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
62 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
65 ; X86-NEXT: subb %cl, %al
67 %1 = call i8 @llvm.scmp(i32 %x, i32 %y)
71 define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
72 ; X64-LABEL: scmp.8.64:
74 ; X64-NEXT: cmpq %rsi, %rdi
77 ; X64-NEXT: subb %cl, %al
80 ; X86-LABEL: scmp.8.64:
82 ; X86-NEXT: pushl %ebx
83 ; X86-NEXT: pushl %edi
84 ; X86-NEXT: pushl %esi
85 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
86 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
87 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
88 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
89 ; X86-NEXT: cmpl %eax, %edx
90 ; X86-NEXT: movl %esi, %edi
91 ; X86-NEXT: sbbl %ecx, %edi
93 ; X86-NEXT: cmpl %edx, %eax
94 ; X86-NEXT: sbbl %esi, %ecx
96 ; X86-NEXT: subb %bl, %al
101 %1 = call i8 @llvm.scmp(i64 %x, i64 %y)
105 define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
106 ; X64-LABEL: scmp.8.128:
108 ; X64-NEXT: cmpq %rdx, %rdi
109 ; X64-NEXT: movq %rsi, %rax
110 ; X64-NEXT: sbbq %rcx, %rax
111 ; X64-NEXT: setl %r8b
112 ; X64-NEXT: cmpq %rdi, %rdx
113 ; X64-NEXT: sbbq %rsi, %rcx
115 ; X64-NEXT: subb %r8b, %al
118 ; X86-LABEL: scmp.8.128:
120 ; X86-NEXT: pushl %ebp
121 ; X86-NEXT: pushl %ebx
122 ; X86-NEXT: pushl %edi
123 ; X86-NEXT: pushl %esi
124 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
125 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
126 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
127 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
128 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
129 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
130 ; X86-NEXT: movl %ebx, %ebp
131 ; X86-NEXT: sbbl %edx, %ebp
132 ; X86-NEXT: movl %ecx, %ebp
133 ; X86-NEXT: sbbl %eax, %ebp
134 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
135 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
136 ; X86-NEXT: movl %esi, %ecx
137 ; X86-NEXT: sbbl %ebp, %ecx
139 ; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
140 ; X86-NEXT: sbbl %ebx, %edx
141 ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
142 ; X86-NEXT: sbbl %esi, %ebp
144 ; X86-NEXT: subb %cl, %al
145 ; X86-NEXT: popl %esi
146 ; X86-NEXT: popl %edi
147 ; X86-NEXT: popl %ebx
148 ; X86-NEXT: popl %ebp
150 %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
154 define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
155 ; X64-LABEL: scmp.32.32:
157 ; X64-NEXT: cmpl %esi, %edi
160 ; X64-NEXT: subb %al, %cl
161 ; X64-NEXT: movsbl %cl, %eax
164 ; X86-LABEL: scmp.32.32:
166 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
167 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
170 ; X86-NEXT: subb %al, %cl
171 ; X86-NEXT: movsbl %cl, %eax
173 %1 = call i32 @llvm.scmp(i32 %x, i32 %y)
177 define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
178 ; X64-LABEL: scmp.32.64:
180 ; X64-NEXT: cmpq %rsi, %rdi
183 ; X64-NEXT: subb %al, %cl
184 ; X64-NEXT: movsbl %cl, %eax
187 ; X86-LABEL: scmp.32.64:
189 ; X86-NEXT: pushl %ebx
190 ; X86-NEXT: pushl %edi
191 ; X86-NEXT: pushl %esi
192 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
193 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
194 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
195 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
196 ; X86-NEXT: cmpl %eax, %edx
197 ; X86-NEXT: movl %esi, %edi
198 ; X86-NEXT: sbbl %ecx, %edi
200 ; X86-NEXT: cmpl %edx, %eax
201 ; X86-NEXT: sbbl %esi, %ecx
203 ; X86-NEXT: subb %bl, %al
204 ; X86-NEXT: movsbl %al, %eax
205 ; X86-NEXT: popl %esi
206 ; X86-NEXT: popl %edi
207 ; X86-NEXT: popl %ebx
209 %1 = call i32 @llvm.scmp(i64 %x, i64 %y)
213 define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
214 ; X64-LABEL: scmp.64.64:
216 ; X64-NEXT: cmpq %rsi, %rdi
219 ; X64-NEXT: subb %al, %cl
220 ; X64-NEXT: movsbq %cl, %rax
223 ; X86-LABEL: scmp.64.64:
225 ; X86-NEXT: pushl %ebx
226 ; X86-NEXT: pushl %edi
227 ; X86-NEXT: pushl %esi
228 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
229 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
230 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
231 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
232 ; X86-NEXT: cmpl %eax, %edx
233 ; X86-NEXT: movl %esi, %edi
234 ; X86-NEXT: sbbl %ecx, %edi
236 ; X86-NEXT: cmpl %edx, %eax
237 ; X86-NEXT: sbbl %esi, %ecx
239 ; X86-NEXT: subb %bl, %al
240 ; X86-NEXT: movsbl %al, %eax
241 ; X86-NEXT: movl %eax, %edx
242 ; X86-NEXT: sarl $31, %edx
243 ; X86-NEXT: popl %esi
244 ; X86-NEXT: popl %edi
245 ; X86-NEXT: popl %ebx
247 %1 = call i64 @llvm.scmp(i64 %x, i64 %y)
251 define i4 @scmp_narrow_result(i32 %x, i32 %y) nounwind {
252 ; X64-LABEL: scmp_narrow_result:
254 ; X64-NEXT: cmpl %esi, %edi
257 ; X64-NEXT: subb %cl, %al
260 ; X86-LABEL: scmp_narrow_result:
262 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
263 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
266 ; X86-NEXT: subb %cl, %al
268 %1 = call i4 @llvm.scmp(i32 %x, i32 %y)
272 define i8 @scmp_narrow_op(i62 %x, i62 %y) nounwind {
273 ; X64-LABEL: scmp_narrow_op:
275 ; X64-NEXT: shlq $2, %rsi
276 ; X64-NEXT: sarq $2, %rsi
277 ; X64-NEXT: shlq $2, %rdi
278 ; X64-NEXT: sarq $2, %rdi
279 ; X64-NEXT: cmpq %rsi, %rdi
282 ; X64-NEXT: subb %cl, %al
285 ; X86-LABEL: scmp_narrow_op:
287 ; X86-NEXT: pushl %ebx
288 ; X86-NEXT: pushl %edi
289 ; X86-NEXT: pushl %esi
290 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
291 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
292 ; X86-NEXT: shll $2, %eax
293 ; X86-NEXT: sarl $2, %eax
294 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
295 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
296 ; X86-NEXT: shll $2, %esi
297 ; X86-NEXT: sarl $2, %esi
298 ; X86-NEXT: cmpl %ecx, %edx
299 ; X86-NEXT: movl %esi, %edi
300 ; X86-NEXT: sbbl %eax, %edi
302 ; X86-NEXT: cmpl %edx, %ecx
303 ; X86-NEXT: sbbl %esi, %eax
305 ; X86-NEXT: subb %bl, %al
306 ; X86-NEXT: popl %esi
307 ; X86-NEXT: popl %edi
308 ; X86-NEXT: popl %ebx
310 %1 = call i8 @llvm.scmp(i62 %x, i62 %y)
314 define i141 @scmp_wide_result(i32 %x, i32 %y) nounwind {
315 ; X64-LABEL: scmp_wide_result:
317 ; X64-NEXT: cmpl %esi, %edi
320 ; X64-NEXT: subb %al, %cl
321 ; X64-NEXT: movsbq %cl, %rax
322 ; X64-NEXT: movq %rax, %rdx
323 ; X64-NEXT: sarq $63, %rdx
324 ; X64-NEXT: movl %edx, %ecx
325 ; X64-NEXT: andl $8191, %ecx # imm = 0x1FFF
328 ; X86-LABEL: scmp_wide_result:
330 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
331 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
332 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
335 ; X86-NEXT: subb %cl, %dl
336 ; X86-NEXT: movsbl %dl, %ecx
337 ; X86-NEXT: movl %ecx, (%eax)
338 ; X86-NEXT: sarl $31, %ecx
339 ; X86-NEXT: movl %ecx, 12(%eax)
340 ; X86-NEXT: movl %ecx, 8(%eax)
341 ; X86-NEXT: movl %ecx, 4(%eax)
342 ; X86-NEXT: andl $8191, %ecx # imm = 0x1FFF
343 ; X86-NEXT: movw %cx, 16(%eax)
345 %1 = call i141 @llvm.scmp(i32 %x, i32 %y)
349 define i8 @scmp_wide_op(i109 %x, i109 %y) nounwind {
350 ; X64-LABEL: scmp_wide_op:
352 ; X64-NEXT: shlq $19, %rcx
353 ; X64-NEXT: sarq $19, %rcx
354 ; X64-NEXT: shlq $19, %rsi
355 ; X64-NEXT: sarq $19, %rsi
356 ; X64-NEXT: cmpq %rdx, %rdi
357 ; X64-NEXT: movq %rsi, %rax
358 ; X64-NEXT: sbbq %rcx, %rax
359 ; X64-NEXT: setl %r8b
360 ; X64-NEXT: cmpq %rdi, %rdx
361 ; X64-NEXT: sbbq %rsi, %rcx
363 ; X64-NEXT: subb %r8b, %al
366 ; X86-LABEL: scmp_wide_op:
368 ; X86-NEXT: pushl %ebp
369 ; X86-NEXT: pushl %ebx
370 ; X86-NEXT: pushl %edi
371 ; X86-NEXT: pushl %esi
372 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
373 ; X86-NEXT: shll $19, %eax
374 ; X86-NEXT: sarl $19, %eax
375 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
376 ; X86-NEXT: shll $19, %ecx
377 ; X86-NEXT: sarl $19, %ecx
378 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
379 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
380 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
381 ; X86-NEXT: cmpl %esi, {{[0-9]+}}(%esp)
382 ; X86-NEXT: sbbl %edx, %ebp
383 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
384 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
385 ; X86-NEXT: movl %edi, %esi
386 ; X86-NEXT: sbbl %ebp, %esi
387 ; X86-NEXT: movl %ecx, %esi
388 ; X86-NEXT: sbbl %eax, %esi
390 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
391 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
392 ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
393 ; X86-NEXT: sbbl %edi, %ebp
394 ; X86-NEXT: sbbl %ecx, %eax
396 ; X86-NEXT: subb %bl, %al
397 ; X86-NEXT: popl %esi
398 ; X86-NEXT: popl %edi
399 ; X86-NEXT: popl %ebx
400 ; X86-NEXT: popl %ebp
402 %1 = call i8 @llvm.scmp(i109 %x, i109 %y)
406 define i41 @scmp_uncommon_types(i7 %x, i7 %y) nounwind {
407 ; X64-LABEL: scmp_uncommon_types:
409 ; X64-NEXT: addb %sil, %sil
410 ; X64-NEXT: sarb %sil
411 ; X64-NEXT: addb %dil, %dil
412 ; X64-NEXT: sarb %dil
413 ; X64-NEXT: cmpb %sil, %dil
416 ; X64-NEXT: subb %al, %cl
417 ; X64-NEXT: movsbq %cl, %rax
420 ; X86-LABEL: scmp_uncommon_types:
422 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
423 ; X86-NEXT: addb %al, %al
425 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
426 ; X86-NEXT: addb %cl, %cl
428 ; X86-NEXT: cmpb %al, %cl
431 ; X86-NEXT: subb %al, %cl
432 ; X86-NEXT: movsbl %cl, %eax
433 ; X86-NEXT: movl %eax, %edx
434 ; X86-NEXT: sarl $31, %edx
436 %1 = call i41 @llvm.scmp(i7 %x, i7 %y)
440 define <4 x i32> @scmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind {
441 ; SSE-LABEL: scmp_normal_vectors:
443 ; SSE-NEXT: movdqa %xmm0, %xmm2
444 ; SSE-NEXT: pcmpgtd %xmm1, %xmm2
445 ; SSE-NEXT: pcmpgtd %xmm0, %xmm1
446 ; SSE-NEXT: psubd %xmm2, %xmm1
447 ; SSE-NEXT: movdqa %xmm1, %xmm0
450 ; AVX2-LABEL: scmp_normal_vectors:
452 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm2
453 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
454 ; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0
457 ; AVX512-LABEL: scmp_normal_vectors:
459 ; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
460 ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k2
461 ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1]
462 ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
463 ; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
466 ; X86-LABEL: scmp_normal_vectors:
468 ; X86-NEXT: pushl %ebx
469 ; X86-NEXT: pushl %edi
470 ; X86-NEXT: pushl %esi
471 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
472 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
473 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
474 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
475 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
476 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
479 ; X86-NEXT: subb %dl, %dh
480 ; X86-NEXT: movsbl %dh, %edx
481 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
484 ; X86-NEXT: subb %bl, %bh
485 ; X86-NEXT: movsbl %bh, %edi
486 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
489 ; X86-NEXT: subb %bl, %bh
490 ; X86-NEXT: movsbl %bh, %esi
491 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
494 ; X86-NEXT: subb %cl, %ch
495 ; X86-NEXT: movsbl %ch, %ecx
496 ; X86-NEXT: movl %ecx, 12(%eax)
497 ; X86-NEXT: movl %esi, 8(%eax)
498 ; X86-NEXT: movl %edi, 4(%eax)
499 ; X86-NEXT: movl %edx, (%eax)
500 ; X86-NEXT: popl %esi
501 ; X86-NEXT: popl %edi
502 ; X86-NEXT: popl %ebx
504 %1 = call <4 x i32> @llvm.scmp(<4 x i32> %x, <4 x i32> %y)
508 define <4 x i8> @scmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind {
509 ; SSE2-LABEL: scmp_narrow_vec_result:
511 ; SSE2-NEXT: movd %xmm1, %eax
512 ; SSE2-NEXT: movd %xmm0, %ecx
513 ; SSE2-NEXT: cmpl %eax, %ecx
514 ; SSE2-NEXT: setl %al
515 ; SSE2-NEXT: setg %cl
516 ; SSE2-NEXT: subb %al, %cl
517 ; SSE2-NEXT: movzbl %cl, %eax
518 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
519 ; SSE2-NEXT: movd %xmm2, %ecx
520 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
521 ; SSE2-NEXT: movd %xmm2, %edx
522 ; SSE2-NEXT: cmpl %ecx, %edx
523 ; SSE2-NEXT: setl %cl
524 ; SSE2-NEXT: setg %dl
525 ; SSE2-NEXT: subb %cl, %dl
526 ; SSE2-NEXT: movzbl %dl, %ecx
527 ; SSE2-NEXT: shll $8, %ecx
528 ; SSE2-NEXT: orl %eax, %ecx
529 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
530 ; SSE2-NEXT: movd %xmm2, %eax
531 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
532 ; SSE2-NEXT: movd %xmm2, %edx
533 ; SSE2-NEXT: cmpl %eax, %edx
534 ; SSE2-NEXT: setl %al
535 ; SSE2-NEXT: setg %dl
536 ; SSE2-NEXT: subb %al, %dl
537 ; SSE2-NEXT: movzbl %dl, %eax
538 ; SSE2-NEXT: shll $16, %eax
539 ; SSE2-NEXT: orl %ecx, %eax
540 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
541 ; SSE2-NEXT: movd %xmm1, %ecx
542 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
543 ; SSE2-NEXT: movd %xmm0, %edx
544 ; SSE2-NEXT: cmpl %ecx, %edx
545 ; SSE2-NEXT: setl %cl
546 ; SSE2-NEXT: setg %dl
547 ; SSE2-NEXT: subb %cl, %dl
548 ; SSE2-NEXT: movzbl %dl, %ecx
549 ; SSE2-NEXT: shll $24, %ecx
550 ; SSE2-NEXT: orl %eax, %ecx
551 ; SSE2-NEXT: movd %ecx, %xmm0
554 ; SSE4-LABEL: scmp_narrow_vec_result:
556 ; SSE4-NEXT: pextrd $1, %xmm1, %eax
557 ; SSE4-NEXT: pextrd $1, %xmm0, %ecx
558 ; SSE4-NEXT: cmpl %eax, %ecx
559 ; SSE4-NEXT: setl %al
560 ; SSE4-NEXT: setg %cl
561 ; SSE4-NEXT: subb %al, %cl
562 ; SSE4-NEXT: movzbl %cl, %eax
563 ; SSE4-NEXT: movd %xmm1, %ecx
564 ; SSE4-NEXT: movd %xmm0, %edx
565 ; SSE4-NEXT: cmpl %ecx, %edx
566 ; SSE4-NEXT: setl %cl
567 ; SSE4-NEXT: setg %dl
568 ; SSE4-NEXT: subb %cl, %dl
569 ; SSE4-NEXT: movzbl %dl, %ecx
570 ; SSE4-NEXT: movd %ecx, %xmm2
571 ; SSE4-NEXT: pinsrb $1, %eax, %xmm2
572 ; SSE4-NEXT: pextrd $2, %xmm1, %eax
573 ; SSE4-NEXT: pextrd $2, %xmm0, %ecx
574 ; SSE4-NEXT: cmpl %eax, %ecx
575 ; SSE4-NEXT: setl %al
576 ; SSE4-NEXT: setg %cl
577 ; SSE4-NEXT: subb %al, %cl
578 ; SSE4-NEXT: movzbl %cl, %eax
579 ; SSE4-NEXT: pinsrb $2, %eax, %xmm2
580 ; SSE4-NEXT: pextrd $3, %xmm1, %eax
581 ; SSE4-NEXT: pextrd $3, %xmm0, %ecx
582 ; SSE4-NEXT: cmpl %eax, %ecx
583 ; SSE4-NEXT: setl %al
584 ; SSE4-NEXT: setg %cl
585 ; SSE4-NEXT: subb %al, %cl
586 ; SSE4-NEXT: movzbl %cl, %eax
587 ; SSE4-NEXT: pinsrb $3, %eax, %xmm2
588 ; SSE4-NEXT: movdqa %xmm2, %xmm0
591 ; AVX-LABEL: scmp_narrow_vec_result:
593 ; AVX-NEXT: vpextrd $1, %xmm1, %eax
594 ; AVX-NEXT: vpextrd $1, %xmm0, %ecx
595 ; AVX-NEXT: cmpl %eax, %ecx
598 ; AVX-NEXT: subb %al, %cl
599 ; AVX-NEXT: vmovd %xmm1, %eax
600 ; AVX-NEXT: vmovd %xmm0, %edx
601 ; AVX-NEXT: cmpl %eax, %edx
604 ; AVX-NEXT: subb %al, %dl
605 ; AVX-NEXT: vmovd %edx, %xmm2
606 ; AVX-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2
607 ; AVX-NEXT: vpextrd $2, %xmm1, %eax
608 ; AVX-NEXT: vpextrd $2, %xmm0, %ecx
609 ; AVX-NEXT: cmpl %eax, %ecx
612 ; AVX-NEXT: subb %al, %cl
613 ; AVX-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2
614 ; AVX-NEXT: vpextrd $3, %xmm1, %eax
615 ; AVX-NEXT: vpextrd $3, %xmm0, %ecx
616 ; AVX-NEXT: cmpl %eax, %ecx
619 ; AVX-NEXT: subb %al, %cl
620 ; AVX-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm0
623 ; X86-LABEL: scmp_narrow_vec_result:
625 ; X86-NEXT: pushl %ebx
626 ; X86-NEXT: pushl %edi
627 ; X86-NEXT: pushl %esi
628 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
629 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
630 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
631 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
632 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
633 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
636 ; X86-NEXT: subb %ch, %cl
637 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
640 ; X86-NEXT: subb %ch, %bl
641 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
644 ; X86-NEXT: subb %ch, %bh
645 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
648 ; X86-NEXT: subb %dl, %ch
649 ; X86-NEXT: movb %ch, 3(%eax)
650 ; X86-NEXT: movb %bh, 2(%eax)
651 ; X86-NEXT: movb %bl, 1(%eax)
652 ; X86-NEXT: movb %cl, (%eax)
653 ; X86-NEXT: popl %esi
654 ; X86-NEXT: popl %edi
655 ; X86-NEXT: popl %ebx
657 %1 = call <4 x i8> @llvm.scmp(<4 x i32> %x, <4 x i32> %y)
661 define <4 x i32> @scmp_narrow_vec_op(<4 x i8> %x, <4 x i8> %y) nounwind {
662 ; SSE2-LABEL: scmp_narrow_vec_op:
664 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
665 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
666 ; SSE2-NEXT: psrad $24, %xmm1
667 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
668 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
669 ; SSE2-NEXT: psrad $24, %xmm0
670 ; SSE2-NEXT: movdqa %xmm0, %xmm2
671 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
672 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
673 ; SSE2-NEXT: psubd %xmm2, %xmm1
674 ; SSE2-NEXT: movdqa %xmm1, %xmm0
677 ; SSE4-LABEL: scmp_narrow_vec_op:
679 ; SSE4-NEXT: pmovsxbd %xmm1, %xmm1
680 ; SSE4-NEXT: pmovsxbd %xmm0, %xmm0
681 ; SSE4-NEXT: movdqa %xmm0, %xmm2
682 ; SSE4-NEXT: pcmpgtd %xmm1, %xmm2
683 ; SSE4-NEXT: pcmpgtd %xmm0, %xmm1
684 ; SSE4-NEXT: psubd %xmm2, %xmm1
685 ; SSE4-NEXT: movdqa %xmm1, %xmm0
688 ; AVX2-LABEL: scmp_narrow_vec_op:
690 ; AVX2-NEXT: vpmovsxbd %xmm1, %xmm1
691 ; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0
692 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm2
693 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
694 ; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0
697 ; AVX512-LABEL: scmp_narrow_vec_op:
699 ; AVX512-NEXT: vpmovsxbd %xmm0, %xmm0
700 ; AVX512-NEXT: vpmovsxbd %xmm1, %xmm1
701 ; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
702 ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k2
703 ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1]
704 ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
705 ; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
708 ; X86-LABEL: scmp_narrow_vec_op:
710 ; X86-NEXT: pushl %ebx
711 ; X86-NEXT: pushl %edi
712 ; X86-NEXT: pushl %esi
713 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
714 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
715 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
716 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
717 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
718 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl
721 ; X86-NEXT: subb %dl, %dh
722 ; X86-NEXT: movsbl %dh, %edx
723 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl
726 ; X86-NEXT: subb %bl, %bh
727 ; X86-NEXT: movsbl %bh, %esi
728 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch
731 ; X86-NEXT: subb %ch, %bl
732 ; X86-NEXT: movsbl %bl, %edi
733 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
736 ; X86-NEXT: subb %cl, %ch
737 ; X86-NEXT: movsbl %ch, %ecx
738 ; X86-NEXT: movl %ecx, 12(%eax)
739 ; X86-NEXT: movl %edi, 8(%eax)
740 ; X86-NEXT: movl %esi, 4(%eax)
741 ; X86-NEXT: movl %edx, (%eax)
742 ; X86-NEXT: popl %esi
743 ; X86-NEXT: popl %edi
744 ; X86-NEXT: popl %ebx
746 %1 = call <4 x i32> @llvm.scmp(<4 x i8> %x, <4 x i8> %y)
750 define <16 x i32> @scmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind {
751 ; SSE2-LABEL: scmp_wide_vec_result:
753 ; SSE2-NEXT: movdqa %xmm1, %xmm2
754 ; SSE2-NEXT: movdqa %xmm0, %xmm3
755 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
756 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
757 ; SSE2-NEXT: psrad $24, %xmm0
758 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
759 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
760 ; SSE2-NEXT: psrad $24, %xmm5
761 ; SSE2-NEXT: movdqa %xmm5, %xmm6
762 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm6
763 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm0
764 ; SSE2-NEXT: psubd %xmm6, %xmm0
765 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
766 ; SSE2-NEXT: psrad $24, %xmm1
767 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4,4,5,5,6,6,7,7]
768 ; SSE2-NEXT: psrad $24, %xmm4
769 ; SSE2-NEXT: movdqa %xmm4, %xmm5
770 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm5
771 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm1
772 ; SSE2-NEXT: psubd %xmm5, %xmm1
773 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15]
774 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
775 ; SSE2-NEXT: psrad $24, %xmm2
776 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm3[8],xmm5[9],xmm3[9],xmm5[10],xmm3[10],xmm5[11],xmm3[11],xmm5[12],xmm3[12],xmm5[13],xmm3[13],xmm5[14],xmm3[14],xmm5[15],xmm3[15]
777 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3]
778 ; SSE2-NEXT: psrad $24, %xmm3
779 ; SSE2-NEXT: movdqa %xmm3, %xmm6
780 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm6
781 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
782 ; SSE2-NEXT: psubd %xmm6, %xmm2
783 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
784 ; SSE2-NEXT: psrad $24, %xmm3
785 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
786 ; SSE2-NEXT: psrad $24, %xmm4
787 ; SSE2-NEXT: movdqa %xmm4, %xmm5
788 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
789 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm3
790 ; SSE2-NEXT: psubd %xmm5, %xmm3
793 ; SSE4-LABEL: scmp_wide_vec_result:
795 ; SSE4-NEXT: movdqa %xmm0, %xmm4
796 ; SSE4-NEXT: pmovsxbd %xmm1, %xmm0
797 ; SSE4-NEXT: pmovsxbd %xmm4, %xmm2
798 ; SSE4-NEXT: movdqa %xmm2, %xmm3
799 ; SSE4-NEXT: pcmpgtd %xmm0, %xmm3
800 ; SSE4-NEXT: pcmpgtd %xmm2, %xmm0
801 ; SSE4-NEXT: psubd %xmm3, %xmm0
802 ; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
803 ; SSE4-NEXT: pmovsxbd %xmm2, %xmm5
804 ; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,1,1]
805 ; SSE4-NEXT: pmovsxbd %xmm2, %xmm2
806 ; SSE4-NEXT: movdqa %xmm2, %xmm3
807 ; SSE4-NEXT: pcmpgtd %xmm5, %xmm3
808 ; SSE4-NEXT: pcmpgtd %xmm2, %xmm5
809 ; SSE4-NEXT: psubd %xmm3, %xmm5
810 ; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
811 ; SSE4-NEXT: pmovsxbd %xmm2, %xmm2
812 ; SSE4-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3]
813 ; SSE4-NEXT: pmovsxbd %xmm3, %xmm3
814 ; SSE4-NEXT: movdqa %xmm3, %xmm6
815 ; SSE4-NEXT: pcmpgtd %xmm2, %xmm6
816 ; SSE4-NEXT: pcmpgtd %xmm3, %xmm2
817 ; SSE4-NEXT: psubd %xmm6, %xmm2
818 ; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
819 ; SSE4-NEXT: pmovsxbd %xmm1, %xmm3
820 ; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm4[3,3,3,3]
821 ; SSE4-NEXT: pmovsxbd %xmm1, %xmm1
822 ; SSE4-NEXT: movdqa %xmm1, %xmm4
823 ; SSE4-NEXT: pcmpgtd %xmm3, %xmm4
824 ; SSE4-NEXT: pcmpgtd %xmm1, %xmm3
825 ; SSE4-NEXT: psubd %xmm4, %xmm3
826 ; SSE4-NEXT: movdqa %xmm5, %xmm1
829 ; AVX2-LABEL: scmp_wide_vec_result:
831 ; AVX2-NEXT: vpmovsxbd %xmm1, %ymm2
832 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm3
833 ; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm4
834 ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
835 ; AVX2-NEXT: vpsubd %ymm4, %ymm2, %ymm2
836 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
837 ; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
838 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
839 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
840 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm3
841 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
842 ; AVX2-NEXT: vpsubd %ymm3, %ymm0, %ymm1
843 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0
846 ; AVX512-LABEL: scmp_wide_vec_result:
848 ; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %k1
849 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k2
850 ; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
851 ; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1
852 ; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
855 ; X86-LABEL: scmp_wide_vec_result:
857 ; X86-NEXT: pushl %ebp
858 ; X86-NEXT: pushl %ebx
859 ; X86-NEXT: pushl %edi
860 ; X86-NEXT: pushl %esi
861 ; X86-NEXT: subl $16, %esp
862 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
863 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
864 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
865 ; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
866 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
867 ; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
868 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
869 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
872 ; X86-NEXT: subb %al, %cl
873 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
874 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bh
877 ; X86-NEXT: subb %al, %cl
878 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
879 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl
882 ; X86-NEXT: subb %al, %cl
883 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
884 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dh
887 ; X86-NEXT: subb %al, %cl
888 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
889 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch
892 ; X86-NEXT: subb %al, %cl
893 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
894 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ah
897 ; X86-NEXT: subb %al, %cl
898 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
899 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl
902 ; X86-NEXT: subb %al, %cl
903 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
904 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
905 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
908 ; X86-NEXT: subb %al, %bh
909 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
910 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
913 ; X86-NEXT: subb %al, %bl
914 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
915 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
918 ; X86-NEXT: subb %al, %dh
919 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
920 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
923 ; X86-NEXT: subb %al, %dl
924 ; X86-NEXT: movsbl %dl, %eax
925 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
926 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
927 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
930 ; X86-NEXT: subb %al, %dl
931 ; X86-NEXT: movsbl %dl, %eax
932 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
933 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
934 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
937 ; X86-NEXT: subb %al, %dl
938 ; X86-NEXT: movsbl %dl, %ebp
939 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
940 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
943 ; X86-NEXT: subb %al, %dl
944 ; X86-NEXT: movsbl %dl, %edi
945 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
946 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
949 ; X86-NEXT: subb %al, %ah
950 ; X86-NEXT: movsbl %ah, %esi
951 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
952 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
955 ; X86-NEXT: subb %al, %dl
956 ; X86-NEXT: movsbl %dl, %ecx
957 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
958 ; X86-NEXT: movl %ecx, 60(%eax)
959 ; X86-NEXT: movl %esi, 56(%eax)
960 ; X86-NEXT: movl %edi, 52(%eax)
961 ; X86-NEXT: movl %ebp, 48(%eax)
962 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
963 ; X86-NEXT: movl %ecx, 44(%eax)
964 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
965 ; X86-NEXT: movl %ecx, 40(%eax)
966 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
967 ; X86-NEXT: movsbl %dh, %edx
968 ; X86-NEXT: movl %edx, 36(%eax)
969 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
970 ; X86-NEXT: movsbl %bl, %esi
971 ; X86-NEXT: movl %esi, 32(%eax)
972 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
973 ; X86-NEXT: movsbl %bh, %edi
974 ; X86-NEXT: movl %edi, 28(%eax)
975 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
976 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
977 ; X86-NEXT: movl %ebx, 24(%eax)
978 ; X86-NEXT: movl %edi, 20(%eax)
979 ; X86-NEXT: movl %esi, 16(%eax)
980 ; X86-NEXT: movl %edx, 12(%eax)
981 ; X86-NEXT: movl %ecx, 8(%eax)
982 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
983 ; X86-NEXT: movl %ecx, 4(%eax)
984 ; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
985 ; X86-NEXT: movl %ecx, (%eax)
986 ; X86-NEXT: addl $16, %esp
987 ; X86-NEXT: popl %esi
988 ; X86-NEXT: popl %edi
989 ; X86-NEXT: popl %ebx
990 ; X86-NEXT: popl %ebp
992 %1 = call <16 x i32> @llvm.scmp(<16 x i8> %x, <16 x i8> %y)
996 define <16 x i8> @scmp_wide_vec_op(<16 x i64> %x, <16 x i64> %y) nounwind {
997 ; SSE2-LABEL: scmp_wide_vec_op:
999 ; SSE2-NEXT: movq %xmm7, %rax
1000 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1001 ; SSE2-NEXT: setl %al
1002 ; SSE2-NEXT: setg %cl
1003 ; SSE2-NEXT: subb %al, %cl
1004 ; SSE2-NEXT: movzbl %cl, %eax
1005 ; SSE2-NEXT: movd %eax, %xmm8
1006 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[2,3,2,3]
1007 ; SSE2-NEXT: movq %xmm7, %rax
1008 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1009 ; SSE2-NEXT: setl %al
1010 ; SSE2-NEXT: setg %cl
1011 ; SSE2-NEXT: subb %al, %cl
1012 ; SSE2-NEXT: movzbl %cl, %eax
1013 ; SSE2-NEXT: movd %eax, %xmm7
1014 ; SSE2-NEXT: movq %xmm6, %rax
1015 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1016 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3],xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7]
1017 ; SSE2-NEXT: setl %al
1018 ; SSE2-NEXT: setg %cl
1019 ; SSE2-NEXT: subb %al, %cl
1020 ; SSE2-NEXT: movzbl %cl, %eax
1021 ; SSE2-NEXT: movd %eax, %xmm7
1022 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
1023 ; SSE2-NEXT: movq %xmm6, %rax
1024 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1025 ; SSE2-NEXT: setl %al
1026 ; SSE2-NEXT: setg %cl
1027 ; SSE2-NEXT: subb %al, %cl
1028 ; SSE2-NEXT: movzbl %cl, %eax
1029 ; SSE2-NEXT: movd %eax, %xmm6
1030 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3],xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7]
1031 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
1032 ; SSE2-NEXT: movq %xmm5, %rax
1033 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1034 ; SSE2-NEXT: setl %al
1035 ; SSE2-NEXT: setg %cl
1036 ; SSE2-NEXT: subb %al, %cl
1037 ; SSE2-NEXT: movzbl %cl, %eax
1038 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
1039 ; SSE2-NEXT: movq %xmm5, %rcx
1040 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
1041 ; SSE2-NEXT: movd %eax, %xmm6
1042 ; SSE2-NEXT: setl %al
1043 ; SSE2-NEXT: setg %cl
1044 ; SSE2-NEXT: subb %al, %cl
1045 ; SSE2-NEXT: movzbl %cl, %eax
1046 ; SSE2-NEXT: movq %xmm4, %rcx
1047 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
1048 ; SSE2-NEXT: movd %eax, %xmm8
1049 ; SSE2-NEXT: setl %al
1050 ; SSE2-NEXT: setg %cl
1051 ; SSE2-NEXT: subb %al, %cl
1052 ; SSE2-NEXT: movzbl %cl, %eax
1053 ; SSE2-NEXT: movd %eax, %xmm5
1054 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
1055 ; SSE2-NEXT: movq %xmm4, %rax
1056 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1057 ; SSE2-NEXT: setl %al
1058 ; SSE2-NEXT: setg %cl
1059 ; SSE2-NEXT: subb %al, %cl
1060 ; SSE2-NEXT: movzbl %cl, %eax
1061 ; SSE2-NEXT: movd %eax, %xmm4
1062 ; SSE2-NEXT: movq %xmm3, %rax
1063 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1064 ; SSE2-NEXT: setl %al
1065 ; SSE2-NEXT: setg %cl
1066 ; SSE2-NEXT: subb %al, %cl
1067 ; SSE2-NEXT: movzbl %cl, %eax
1068 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
1069 ; SSE2-NEXT: movq %xmm3, %rcx
1070 ; SSE2-NEXT: movd %eax, %xmm3
1071 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
1072 ; SSE2-NEXT: setl %al
1073 ; SSE2-NEXT: setg %cl
1074 ; SSE2-NEXT: subb %al, %cl
1075 ; SSE2-NEXT: movzbl %cl, %eax
1076 ; SSE2-NEXT: movq %xmm2, %rcx
1077 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
1078 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
1079 ; SSE2-NEXT: movq %xmm2, %rcx
1080 ; SSE2-NEXT: movd %eax, %xmm2
1081 ; SSE2-NEXT: setl %al
1082 ; SSE2-NEXT: setg %dl
1083 ; SSE2-NEXT: subb %al, %dl
1084 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
1085 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1],xmm6[2],xmm8[2],xmm6[3],xmm8[3],xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7]
1086 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
1087 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
1088 ; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
1089 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1090 ; SSE2-NEXT: movzbl %dl, %eax
1091 ; SSE2-NEXT: movd %eax, %xmm2
1092 ; SSE2-NEXT: setl %al
1093 ; SSE2-NEXT: setg %cl
1094 ; SSE2-NEXT: subb %al, %cl
1095 ; SSE2-NEXT: movzbl %cl, %eax
1096 ; SSE2-NEXT: movd %eax, %xmm4
1097 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
1098 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
1099 ; SSE2-NEXT: movq %xmm1, %rax
1100 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1101 ; SSE2-NEXT: setl %al
1102 ; SSE2-NEXT: setg %cl
1103 ; SSE2-NEXT: subb %al, %cl
1104 ; SSE2-NEXT: movzbl %cl, %eax
1105 ; SSE2-NEXT: movd %eax, %xmm3
1106 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1107 ; SSE2-NEXT: movq %xmm1, %rax
1108 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1109 ; SSE2-NEXT: setl %al
1110 ; SSE2-NEXT: setg %cl
1111 ; SSE2-NEXT: subb %al, %cl
1112 ; SSE2-NEXT: movzbl %cl, %eax
1113 ; SSE2-NEXT: movd %eax, %xmm1
1114 ; SSE2-NEXT: movq %xmm0, %rax
1115 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1116 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
1117 ; SSE2-NEXT: setl %al
1118 ; SSE2-NEXT: setg %cl
1119 ; SSE2-NEXT: subb %al, %cl
1120 ; SSE2-NEXT: movzbl %cl, %eax
1121 ; SSE2-NEXT: movd %eax, %xmm1
1122 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1123 ; SSE2-NEXT: movq %xmm0, %rax
1124 ; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1125 ; SSE2-NEXT: setl %al
1126 ; SSE2-NEXT: setg %cl
1127 ; SSE2-NEXT: subb %al, %cl
1128 ; SSE2-NEXT: movzbl %cl, %eax
1129 ; SSE2-NEXT: movd %eax, %xmm0
1130 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1131 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
1132 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1133 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
1134 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1137 ; SSE4-LABEL: scmp_wide_vec_op:
1139 ; SSE4-NEXT: pextrq $1, %xmm0, %rax
1140 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1141 ; SSE4-NEXT: setl %al
1142 ; SSE4-NEXT: setg %cl
1143 ; SSE4-NEXT: subb %al, %cl
1144 ; SSE4-NEXT: movzbl %cl, %eax
1145 ; SSE4-NEXT: movq %xmm0, %rcx
1146 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
1147 ; SSE4-NEXT: setl %cl
1148 ; SSE4-NEXT: setg %dl
1149 ; SSE4-NEXT: subb %cl, %dl
1150 ; SSE4-NEXT: movzbl %dl, %ecx
1151 ; SSE4-NEXT: movd %ecx, %xmm0
1152 ; SSE4-NEXT: pinsrb $1, %eax, %xmm0
1153 ; SSE4-NEXT: movq %xmm1, %rax
1154 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1155 ; SSE4-NEXT: setl %al
1156 ; SSE4-NEXT: setg %cl
1157 ; SSE4-NEXT: subb %al, %cl
1158 ; SSE4-NEXT: movzbl %cl, %eax
1159 ; SSE4-NEXT: pinsrb $2, %eax, %xmm0
1160 ; SSE4-NEXT: pextrq $1, %xmm1, %rax
1161 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1162 ; SSE4-NEXT: setl %al
1163 ; SSE4-NEXT: setg %cl
1164 ; SSE4-NEXT: subb %al, %cl
1165 ; SSE4-NEXT: movzbl %cl, %eax
1166 ; SSE4-NEXT: pinsrb $3, %eax, %xmm0
1167 ; SSE4-NEXT: movq %xmm2, %rax
1168 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1169 ; SSE4-NEXT: setl %al
1170 ; SSE4-NEXT: setg %cl
1171 ; SSE4-NEXT: subb %al, %cl
1172 ; SSE4-NEXT: movzbl %cl, %eax
1173 ; SSE4-NEXT: pinsrb $4, %eax, %xmm0
1174 ; SSE4-NEXT: pextrq $1, %xmm2, %rax
1175 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1176 ; SSE4-NEXT: setl %al
1177 ; SSE4-NEXT: setg %cl
1178 ; SSE4-NEXT: subb %al, %cl
1179 ; SSE4-NEXT: movzbl %cl, %eax
1180 ; SSE4-NEXT: pinsrb $5, %eax, %xmm0
1181 ; SSE4-NEXT: movq %xmm3, %rax
1182 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1183 ; SSE4-NEXT: setl %al
1184 ; SSE4-NEXT: setg %cl
1185 ; SSE4-NEXT: subb %al, %cl
1186 ; SSE4-NEXT: movzbl %cl, %eax
1187 ; SSE4-NEXT: pinsrb $6, %eax, %xmm0
1188 ; SSE4-NEXT: pextrq $1, %xmm3, %rax
1189 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1190 ; SSE4-NEXT: setl %al
1191 ; SSE4-NEXT: setg %cl
1192 ; SSE4-NEXT: subb %al, %cl
1193 ; SSE4-NEXT: movzbl %cl, %eax
1194 ; SSE4-NEXT: pinsrb $7, %eax, %xmm0
1195 ; SSE4-NEXT: movq %xmm4, %rax
1196 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1197 ; SSE4-NEXT: setl %al
1198 ; SSE4-NEXT: setg %cl
1199 ; SSE4-NEXT: subb %al, %cl
1200 ; SSE4-NEXT: movzbl %cl, %eax
1201 ; SSE4-NEXT: pinsrb $8, %eax, %xmm0
1202 ; SSE4-NEXT: pextrq $1, %xmm4, %rax
1203 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1204 ; SSE4-NEXT: setl %al
1205 ; SSE4-NEXT: setg %cl
1206 ; SSE4-NEXT: subb %al, %cl
1207 ; SSE4-NEXT: movzbl %cl, %eax
1208 ; SSE4-NEXT: pinsrb $9, %eax, %xmm0
1209 ; SSE4-NEXT: movq %xmm5, %rax
1210 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1211 ; SSE4-NEXT: setl %al
1212 ; SSE4-NEXT: setg %cl
1213 ; SSE4-NEXT: subb %al, %cl
1214 ; SSE4-NEXT: movzbl %cl, %eax
1215 ; SSE4-NEXT: pinsrb $10, %eax, %xmm0
1216 ; SSE4-NEXT: pextrq $1, %xmm5, %rax
1217 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1218 ; SSE4-NEXT: setl %al
1219 ; SSE4-NEXT: setg %cl
1220 ; SSE4-NEXT: subb %al, %cl
1221 ; SSE4-NEXT: movzbl %cl, %eax
1222 ; SSE4-NEXT: pinsrb $11, %eax, %xmm0
1223 ; SSE4-NEXT: movq %xmm6, %rax
1224 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1225 ; SSE4-NEXT: setl %al
1226 ; SSE4-NEXT: setg %cl
1227 ; SSE4-NEXT: subb %al, %cl
1228 ; SSE4-NEXT: movzbl %cl, %eax
1229 ; SSE4-NEXT: pinsrb $12, %eax, %xmm0
1230 ; SSE4-NEXT: pextrq $1, %xmm6, %rax
1231 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1232 ; SSE4-NEXT: setl %al
1233 ; SSE4-NEXT: setg %cl
1234 ; SSE4-NEXT: subb %al, %cl
1235 ; SSE4-NEXT: movzbl %cl, %eax
1236 ; SSE4-NEXT: pinsrb $13, %eax, %xmm0
1237 ; SSE4-NEXT: movq %xmm7, %rax
1238 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1239 ; SSE4-NEXT: setl %al
1240 ; SSE4-NEXT: setg %cl
1241 ; SSE4-NEXT: subb %al, %cl
1242 ; SSE4-NEXT: movzbl %cl, %eax
1243 ; SSE4-NEXT: pinsrb $14, %eax, %xmm0
1244 ; SSE4-NEXT: pextrq $1, %xmm7, %rax
1245 ; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
1246 ; SSE4-NEXT: setl %al
1247 ; SSE4-NEXT: setg %cl
1248 ; SSE4-NEXT: subb %al, %cl
1249 ; SSE4-NEXT: movzbl %cl, %eax
1250 ; SSE4-NEXT: pinsrb $15, %eax, %xmm0
1253 ; AVX2-LABEL: scmp_wide_vec_op:
1255 ; AVX2-NEXT: vpextrq $1, %xmm4, %rax
1256 ; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
1257 ; AVX2-NEXT: cmpq %rax, %rcx
1258 ; AVX2-NEXT: setl %al
1259 ; AVX2-NEXT: setg %cl
1260 ; AVX2-NEXT: subb %al, %cl
1261 ; AVX2-NEXT: vmovq %xmm4, %rax
1262 ; AVX2-NEXT: vmovq %xmm0, %rdx
1263 ; AVX2-NEXT: cmpq %rax, %rdx
1264 ; AVX2-NEXT: setl %al
1265 ; AVX2-NEXT: setg %dl
1266 ; AVX2-NEXT: subb %al, %dl
1267 ; AVX2-NEXT: vmovd %edx, %xmm8
1268 ; AVX2-NEXT: vpinsrb $1, %ecx, %xmm8, %xmm8
1269 ; AVX2-NEXT: vextracti128 $1, %ymm4, %xmm4
1270 ; AVX2-NEXT: vmovq %xmm4, %rax
1271 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1272 ; AVX2-NEXT: vmovq %xmm0, %rcx
1273 ; AVX2-NEXT: cmpq %rax, %rcx
1274 ; AVX2-NEXT: setl %al
1275 ; AVX2-NEXT: setg %cl
1276 ; AVX2-NEXT: subb %al, %cl
1277 ; AVX2-NEXT: vpinsrb $2, %ecx, %xmm8, %xmm8
1278 ; AVX2-NEXT: vpextrq $1, %xmm4, %rax
1279 ; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
1280 ; AVX2-NEXT: cmpq %rax, %rcx
1281 ; AVX2-NEXT: setl %al
1282 ; AVX2-NEXT: setg %cl
1283 ; AVX2-NEXT: subb %al, %cl
1284 ; AVX2-NEXT: vpinsrb $3, %ecx, %xmm8, %xmm0
1285 ; AVX2-NEXT: vmovq %xmm5, %rax
1286 ; AVX2-NEXT: vmovq %xmm1, %rcx
1287 ; AVX2-NEXT: cmpq %rax, %rcx
1288 ; AVX2-NEXT: setl %al
1289 ; AVX2-NEXT: setg %cl
1290 ; AVX2-NEXT: subb %al, %cl
1291 ; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
1292 ; AVX2-NEXT: vpextrq $1, %xmm5, %rax
1293 ; AVX2-NEXT: vpextrq $1, %xmm1, %rcx
1294 ; AVX2-NEXT: cmpq %rax, %rcx
1295 ; AVX2-NEXT: setl %al
1296 ; AVX2-NEXT: setg %cl
1297 ; AVX2-NEXT: subb %al, %cl
1298 ; AVX2-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
1299 ; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm4
1300 ; AVX2-NEXT: vmovq %xmm4, %rax
1301 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
1302 ; AVX2-NEXT: vmovq %xmm1, %rcx
1303 ; AVX2-NEXT: cmpq %rax, %rcx
1304 ; AVX2-NEXT: setl %al
1305 ; AVX2-NEXT: setg %cl
1306 ; AVX2-NEXT: subb %al, %cl
1307 ; AVX2-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
1308 ; AVX2-NEXT: vpextrq $1, %xmm4, %rax
1309 ; AVX2-NEXT: vpextrq $1, %xmm1, %rcx
1310 ; AVX2-NEXT: cmpq %rax, %rcx
1311 ; AVX2-NEXT: setl %al
1312 ; AVX2-NEXT: setg %cl
1313 ; AVX2-NEXT: subb %al, %cl
1314 ; AVX2-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
1315 ; AVX2-NEXT: vmovq %xmm6, %rax
1316 ; AVX2-NEXT: vmovq %xmm2, %rcx
1317 ; AVX2-NEXT: cmpq %rax, %rcx
1318 ; AVX2-NEXT: setl %al
1319 ; AVX2-NEXT: setg %cl
1320 ; AVX2-NEXT: subb %al, %cl
1321 ; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
1322 ; AVX2-NEXT: vpextrq $1, %xmm6, %rax
1323 ; AVX2-NEXT: vpextrq $1, %xmm2, %rcx
1324 ; AVX2-NEXT: cmpq %rax, %rcx
1325 ; AVX2-NEXT: setl %al
1326 ; AVX2-NEXT: setg %cl
1327 ; AVX2-NEXT: subb %al, %cl
1328 ; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
1329 ; AVX2-NEXT: vextracti128 $1, %ymm6, %xmm1
1330 ; AVX2-NEXT: vmovq %xmm1, %rax
1331 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
1332 ; AVX2-NEXT: vmovq %xmm2, %rcx
1333 ; AVX2-NEXT: cmpq %rax, %rcx
1334 ; AVX2-NEXT: setl %al
1335 ; AVX2-NEXT: setg %cl
1336 ; AVX2-NEXT: subb %al, %cl
1337 ; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
1338 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
1339 ; AVX2-NEXT: vpextrq $1, %xmm2, %rcx
1340 ; AVX2-NEXT: cmpq %rax, %rcx
1341 ; AVX2-NEXT: setl %al
1342 ; AVX2-NEXT: setg %cl
1343 ; AVX2-NEXT: subb %al, %cl
1344 ; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
1345 ; AVX2-NEXT: vmovq %xmm7, %rax
1346 ; AVX2-NEXT: vmovq %xmm3, %rcx
1347 ; AVX2-NEXT: cmpq %rax, %rcx
1348 ; AVX2-NEXT: setl %al
1349 ; AVX2-NEXT: setg %cl
1350 ; AVX2-NEXT: subb %al, %cl
1351 ; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
1352 ; AVX2-NEXT: vpextrq $1, %xmm7, %rax
1353 ; AVX2-NEXT: vpextrq $1, %xmm3, %rcx
1354 ; AVX2-NEXT: cmpq %rax, %rcx
1355 ; AVX2-NEXT: setl %al
1356 ; AVX2-NEXT: setg %cl
1357 ; AVX2-NEXT: subb %al, %cl
1358 ; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
1359 ; AVX2-NEXT: vextracti128 $1, %ymm7, %xmm1
1360 ; AVX2-NEXT: vmovq %xmm1, %rax
1361 ; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm2
1362 ; AVX2-NEXT: vmovq %xmm2, %rcx
1363 ; AVX2-NEXT: cmpq %rax, %rcx
1364 ; AVX2-NEXT: setl %al
1365 ; AVX2-NEXT: setg %cl
1366 ; AVX2-NEXT: subb %al, %cl
1367 ; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
1368 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
1369 ; AVX2-NEXT: vpextrq $1, %xmm2, %rcx
1370 ; AVX2-NEXT: cmpq %rax, %rcx
1371 ; AVX2-NEXT: setl %al
1372 ; AVX2-NEXT: setg %cl
1373 ; AVX2-NEXT: subb %al, %cl
1374 ; AVX2-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
1375 ; AVX2-NEXT: vzeroupper
1378 ; AVX512-LABEL: scmp_wide_vec_op:
1380 ; AVX512-NEXT: vpextrq $1, %xmm2, %rax
1381 ; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
1382 ; AVX512-NEXT: cmpq %rax, %rcx
1383 ; AVX512-NEXT: setl %al
1384 ; AVX512-NEXT: setg %cl
1385 ; AVX512-NEXT: subb %al, %cl
1386 ; AVX512-NEXT: vmovq %xmm2, %rax
1387 ; AVX512-NEXT: vmovq %xmm0, %rdx
1388 ; AVX512-NEXT: cmpq %rax, %rdx
1389 ; AVX512-NEXT: setl %al
1390 ; AVX512-NEXT: setg %dl
1391 ; AVX512-NEXT: subb %al, %dl
1392 ; AVX512-NEXT: vmovd %edx, %xmm4
1393 ; AVX512-NEXT: vpinsrb $1, %ecx, %xmm4, %xmm4
1394 ; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm5
1395 ; AVX512-NEXT: vmovq %xmm5, %rax
1396 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm6
1397 ; AVX512-NEXT: vmovq %xmm6, %rcx
1398 ; AVX512-NEXT: cmpq %rax, %rcx
1399 ; AVX512-NEXT: setl %al
1400 ; AVX512-NEXT: setg %cl
1401 ; AVX512-NEXT: subb %al, %cl
1402 ; AVX512-NEXT: vpinsrb $2, %ecx, %xmm4, %xmm4
1403 ; AVX512-NEXT: vpextrq $1, %xmm5, %rax
1404 ; AVX512-NEXT: vpextrq $1, %xmm6, %rcx
1405 ; AVX512-NEXT: cmpq %rax, %rcx
1406 ; AVX512-NEXT: setl %al
1407 ; AVX512-NEXT: setg %cl
1408 ; AVX512-NEXT: subb %al, %cl
1409 ; AVX512-NEXT: vpinsrb $3, %ecx, %xmm4, %xmm4
1410 ; AVX512-NEXT: vextracti32x4 $2, %zmm2, %xmm5
1411 ; AVX512-NEXT: vmovq %xmm5, %rax
1412 ; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm6
1413 ; AVX512-NEXT: vmovq %xmm6, %rcx
1414 ; AVX512-NEXT: cmpq %rax, %rcx
1415 ; AVX512-NEXT: setl %al
1416 ; AVX512-NEXT: setg %cl
1417 ; AVX512-NEXT: subb %al, %cl
1418 ; AVX512-NEXT: vpinsrb $4, %ecx, %xmm4, %xmm4
1419 ; AVX512-NEXT: vpextrq $1, %xmm5, %rax
1420 ; AVX512-NEXT: vpextrq $1, %xmm6, %rcx
1421 ; AVX512-NEXT: cmpq %rax, %rcx
1422 ; AVX512-NEXT: setl %al
1423 ; AVX512-NEXT: setg %cl
1424 ; AVX512-NEXT: subb %al, %cl
1425 ; AVX512-NEXT: vpinsrb $5, %ecx, %xmm4, %xmm4
1426 ; AVX512-NEXT: vextracti32x4 $3, %zmm2, %xmm2
1427 ; AVX512-NEXT: vmovq %xmm2, %rax
1428 ; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm0
1429 ; AVX512-NEXT: vmovq %xmm0, %rcx
1430 ; AVX512-NEXT: cmpq %rax, %rcx
1431 ; AVX512-NEXT: setl %al
1432 ; AVX512-NEXT: setg %cl
1433 ; AVX512-NEXT: subb %al, %cl
1434 ; AVX512-NEXT: vpinsrb $6, %ecx, %xmm4, %xmm4
1435 ; AVX512-NEXT: vpextrq $1, %xmm2, %rax
1436 ; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
1437 ; AVX512-NEXT: cmpq %rax, %rcx
1438 ; AVX512-NEXT: setl %al
1439 ; AVX512-NEXT: setg %cl
1440 ; AVX512-NEXT: subb %al, %cl
1441 ; AVX512-NEXT: vpinsrb $7, %ecx, %xmm4, %xmm0
1442 ; AVX512-NEXT: vmovq %xmm3, %rax
1443 ; AVX512-NEXT: vmovq %xmm1, %rcx
1444 ; AVX512-NEXT: cmpq %rax, %rcx
1445 ; AVX512-NEXT: setl %al
1446 ; AVX512-NEXT: setg %cl
1447 ; AVX512-NEXT: subb %al, %cl
1448 ; AVX512-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
1449 ; AVX512-NEXT: vpextrq $1, %xmm3, %rax
1450 ; AVX512-NEXT: vpextrq $1, %xmm1, %rcx
1451 ; AVX512-NEXT: cmpq %rax, %rcx
1452 ; AVX512-NEXT: setl %al
1453 ; AVX512-NEXT: setg %cl
1454 ; AVX512-NEXT: subb %al, %cl
1455 ; AVX512-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
1456 ; AVX512-NEXT: vextracti128 $1, %ymm3, %xmm2
1457 ; AVX512-NEXT: vmovq %xmm2, %rax
1458 ; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm4
1459 ; AVX512-NEXT: vmovq %xmm4, %rcx
1460 ; AVX512-NEXT: cmpq %rax, %rcx
1461 ; AVX512-NEXT: setl %al
1462 ; AVX512-NEXT: setg %cl
1463 ; AVX512-NEXT: subb %al, %cl
1464 ; AVX512-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
1465 ; AVX512-NEXT: vpextrq $1, %xmm2, %rax
1466 ; AVX512-NEXT: vpextrq $1, %xmm4, %rcx
1467 ; AVX512-NEXT: cmpq %rax, %rcx
1468 ; AVX512-NEXT: setl %al
1469 ; AVX512-NEXT: setg %cl
1470 ; AVX512-NEXT: subb %al, %cl
1471 ; AVX512-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
1472 ; AVX512-NEXT: vextracti32x4 $2, %zmm3, %xmm2
1473 ; AVX512-NEXT: vmovq %xmm2, %rax
1474 ; AVX512-NEXT: vextracti32x4 $2, %zmm1, %xmm4
1475 ; AVX512-NEXT: vmovq %xmm4, %rcx
1476 ; AVX512-NEXT: cmpq %rax, %rcx
1477 ; AVX512-NEXT: setl %al
1478 ; AVX512-NEXT: setg %cl
1479 ; AVX512-NEXT: subb %al, %cl
1480 ; AVX512-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
1481 ; AVX512-NEXT: vpextrq $1, %xmm2, %rax
1482 ; AVX512-NEXT: vpextrq $1, %xmm4, %rcx
1483 ; AVX512-NEXT: cmpq %rax, %rcx
1484 ; AVX512-NEXT: setl %al
1485 ; AVX512-NEXT: setg %cl
1486 ; AVX512-NEXT: subb %al, %cl
1487 ; AVX512-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
1488 ; AVX512-NEXT: vextracti32x4 $3, %zmm3, %xmm2
1489 ; AVX512-NEXT: vmovq %xmm2, %rax
1490 ; AVX512-NEXT: vextracti32x4 $3, %zmm1, %xmm1
1491 ; AVX512-NEXT: vmovq %xmm1, %rcx
1492 ; AVX512-NEXT: cmpq %rax, %rcx
1493 ; AVX512-NEXT: setl %al
1494 ; AVX512-NEXT: setg %cl
1495 ; AVX512-NEXT: subb %al, %cl
1496 ; AVX512-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
1497 ; AVX512-NEXT: vpextrq $1, %xmm2, %rax
1498 ; AVX512-NEXT: vpextrq $1, %xmm1, %rcx
1499 ; AVX512-NEXT: cmpq %rax, %rcx
1500 ; AVX512-NEXT: setl %al
1501 ; AVX512-NEXT: setg %cl
1502 ; AVX512-NEXT: subb %al, %cl
1503 ; AVX512-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
1504 ; AVX512-NEXT: vzeroupper
1507 ; X86-LABEL: scmp_wide_vec_op:
1509 ; X86-NEXT: pushl %ebp
1510 ; X86-NEXT: pushl %ebx
1511 ; X86-NEXT: pushl %edi
1512 ; X86-NEXT: pushl %esi
1513 ; X86-NEXT: subl $12, %esp
1514 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1515 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1516 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1517 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1518 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
1519 ; X86-NEXT: cmpl %edx, %edi
1520 ; X86-NEXT: movl %ebx, %ebp
1521 ; X86-NEXT: sbbl %esi, %ebp
1522 ; X86-NEXT: setl %al
1523 ; X86-NEXT: cmpl %edi, %edx
1524 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1525 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
1526 ; X86-NEXT: sbbl %ebx, %esi
1527 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
1528 ; X86-NEXT: setl %ah
1529 ; X86-NEXT: subb %al, %ah
1530 ; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1531 ; X86-NEXT: cmpl %ecx, %ebp
1532 ; X86-NEXT: movl %ebx, %eax
1533 ; X86-NEXT: sbbl %edx, %eax
1534 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1535 ; X86-NEXT: setl %al
1536 ; X86-NEXT: cmpl %ebp, %ecx
1537 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1538 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1539 ; X86-NEXT: sbbl %ebx, %edx
1540 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1541 ; X86-NEXT: setl %ah
1542 ; X86-NEXT: subb %al, %ah
1543 ; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1544 ; X86-NEXT: cmpl %edi, %ecx
1545 ; X86-NEXT: movl %edx, %eax
1546 ; X86-NEXT: sbbl %esi, %eax
1547 ; X86-NEXT: setl %al
1548 ; X86-NEXT: cmpl %ecx, %edi
1549 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1550 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1551 ; X86-NEXT: sbbl %edx, %esi
1552 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1553 ; X86-NEXT: setl %dl
1554 ; X86-NEXT: subb %al, %dl
1555 ; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1556 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
1557 ; X86-NEXT: cmpl %ebp, %edi
1558 ; X86-NEXT: movl %esi, %eax
1559 ; X86-NEXT: sbbl %ecx, %eax
1560 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1561 ; X86-NEXT: setl %bl
1562 ; X86-NEXT: cmpl %edi, %ebp
1563 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1564 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1565 ; X86-NEXT: sbbl %esi, %ecx
1566 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1567 ; X86-NEXT: setl %cl
1568 ; X86-NEXT: subb %bl, %cl
1569 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1570 ; X86-NEXT: cmpl %edx, %edi
1571 ; X86-NEXT: movl %esi, %ecx
1572 ; X86-NEXT: sbbl %eax, %ecx
1573 ; X86-NEXT: setl %bl
1574 ; X86-NEXT: cmpl %edi, %edx
1575 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1576 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1577 ; X86-NEXT: sbbl %esi, %eax
1578 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1579 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1580 ; X86-NEXT: setl %bh
1581 ; X86-NEXT: subb %bl, %bh
1582 ; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1583 ; X86-NEXT: cmpl %edx, %eax
1584 ; X86-NEXT: movl %esi, %edi
1585 ; X86-NEXT: sbbl %ecx, %edi
1586 ; X86-NEXT: setl %bl
1587 ; X86-NEXT: cmpl %eax, %edx
1588 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1589 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1590 ; X86-NEXT: sbbl %esi, %ecx
1591 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1592 ; X86-NEXT: setl %bh
1593 ; X86-NEXT: subb %bl, %bh
1594 ; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1595 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1596 ; X86-NEXT: cmpl %edx, %ecx
1597 ; X86-NEXT: movl %esi, %edi
1598 ; X86-NEXT: sbbl %eax, %edi
1599 ; X86-NEXT: setl %bl
1600 ; X86-NEXT: cmpl %ecx, %edx
1601 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1602 ; X86-NEXT: sbbl %esi, %eax
1603 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1604 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1605 ; X86-NEXT: setl %bh
1606 ; X86-NEXT: subb %bl, %bh
1607 ; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1608 ; X86-NEXT: cmpl %ecx, %edx
1609 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1610 ; X86-NEXT: movl %esi, %edi
1611 ; X86-NEXT: sbbl %eax, %edi
1612 ; X86-NEXT: setl %bl
1613 ; X86-NEXT: cmpl %edx, %ecx
1614 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1615 ; X86-NEXT: sbbl %esi, %eax
1616 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1617 ; X86-NEXT: setl %dl
1618 ; X86-NEXT: subb %bl, %dl
1619 ; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1620 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1621 ; X86-NEXT: cmpl %ecx, %edx
1622 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1623 ; X86-NEXT: movl %esi, %edi
1624 ; X86-NEXT: sbbl %eax, %edi
1625 ; X86-NEXT: setl %bl
1626 ; X86-NEXT: cmpl %edx, %ecx
1627 ; X86-NEXT: sbbl %esi, %eax
1628 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1629 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1630 ; X86-NEXT: setl %dl
1631 ; X86-NEXT: subb %bl, %dl
1632 ; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1633 ; X86-NEXT: cmpl %eax, %ecx
1634 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1635 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1636 ; X86-NEXT: movl %esi, %edi
1637 ; X86-NEXT: sbbl %edx, %edi
1638 ; X86-NEXT: setl %bl
1639 ; X86-NEXT: cmpl %ecx, %eax
1640 ; X86-NEXT: sbbl %esi, %edx
1641 ; X86-NEXT: setl %al
1642 ; X86-NEXT: subb %bl, %al
1643 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1644 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
1645 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1646 ; X86-NEXT: cmpl %ebp, %ecx
1647 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1648 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1649 ; X86-NEXT: movl %esi, %edi
1650 ; X86-NEXT: sbbl %edx, %edi
1651 ; X86-NEXT: setl %al
1652 ; X86-NEXT: cmpl %ecx, %ebp
1653 ; X86-NEXT: sbbl %esi, %edx
1654 ; X86-NEXT: setl %cl
1655 ; X86-NEXT: subb %al, %cl
1656 ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1657 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
1658 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1659 ; X86-NEXT: cmpl %ebp, %ecx
1660 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1661 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1662 ; X86-NEXT: movl %esi, %edi
1663 ; X86-NEXT: sbbl %edx, %edi
1664 ; X86-NEXT: setl %al
1665 ; X86-NEXT: cmpl %ecx, %ebp
1666 ; X86-NEXT: sbbl %esi, %edx
1667 ; X86-NEXT: setl %cl
1668 ; X86-NEXT: subb %al, %cl
1669 ; X86-NEXT: movb %cl, (%esp) # 1-byte Spill
1670 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1671 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1672 ; X86-NEXT: cmpl %eax, %ecx
1673 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1674 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1675 ; X86-NEXT: movl %edi, %ebp
1676 ; X86-NEXT: sbbl %esi, %ebp
1677 ; X86-NEXT: setl %dl
1678 ; X86-NEXT: cmpl %ecx, %eax
1679 ; X86-NEXT: sbbl %edi, %esi
1680 ; X86-NEXT: setl %ch
1681 ; X86-NEXT: subb %dl, %ch
1682 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
1683 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1684 ; X86-NEXT: cmpl %edx, %esi
1685 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1686 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1687 ; X86-NEXT: movl %eax, %ebp
1688 ; X86-NEXT: sbbl %edi, %ebp
1689 ; X86-NEXT: setl %cl
1690 ; X86-NEXT: cmpl %esi, %edx
1691 ; X86-NEXT: sbbl %eax, %edi
1692 ; X86-NEXT: setl %dl
1693 ; X86-NEXT: subb %cl, %dl
1694 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
1695 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1696 ; X86-NEXT: cmpl %ebx, %esi
1697 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1698 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1699 ; X86-NEXT: movl %eax, %ebp
1700 ; X86-NEXT: sbbl %edi, %ebp
1701 ; X86-NEXT: setl %dh
1702 ; X86-NEXT: cmpl %esi, %ebx
1703 ; X86-NEXT: sbbl %eax, %edi
1704 ; X86-NEXT: setl %cl
1705 ; X86-NEXT: subb %dh, %cl
1706 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1707 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1708 ; X86-NEXT: cmpl %eax, %esi
1709 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1710 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
1711 ; X86-NEXT: movl %ebx, %ebp
1712 ; X86-NEXT: sbbl %edi, %ebp
1713 ; X86-NEXT: setl %dh
1714 ; X86-NEXT: cmpl %esi, %eax
1715 ; X86-NEXT: sbbl %ebx, %edi
1716 ; X86-NEXT: setl %bl
1717 ; X86-NEXT: subb %dh, %bl
1718 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1719 ; X86-NEXT: movb %bl, 15(%eax)
1720 ; X86-NEXT: movb %cl, 14(%eax)
1721 ; X86-NEXT: movb %dl, 13(%eax)
1722 ; X86-NEXT: movb %ch, 12(%eax)
1723 ; X86-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
1724 ; X86-NEXT: movb %cl, 11(%eax)
1725 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1726 ; X86-NEXT: movb %cl, 10(%eax)
1727 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1728 ; X86-NEXT: movb %cl, 9(%eax)
1729 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1730 ; X86-NEXT: movb %cl, 8(%eax)
1731 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1732 ; X86-NEXT: movb %cl, 7(%eax)
1733 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1734 ; X86-NEXT: movb %cl, 6(%eax)
1735 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1736 ; X86-NEXT: movb %cl, 5(%eax)
1737 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1738 ; X86-NEXT: movb %cl, 4(%eax)
1739 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1740 ; X86-NEXT: movb %cl, 3(%eax)
1741 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1742 ; X86-NEXT: movb %cl, 2(%eax)
1743 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1744 ; X86-NEXT: movb %cl, 1(%eax)
1745 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1746 ; X86-NEXT: movb %cl, (%eax)
1747 ; X86-NEXT: addl $12, %esp
1748 ; X86-NEXT: popl %esi
1749 ; X86-NEXT: popl %edi
1750 ; X86-NEXT: popl %ebx
1751 ; X86-NEXT: popl %ebp
1753 %1 = call <16 x i8> @llvm.scmp(<16 x i64> %x, <16 x i64> %y)
1757 define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
1758 ; SSE2-LABEL: scmp_uncommon_vectors:
1760 ; SSE2-NEXT: pushq %rbp
1761 ; SSE2-NEXT: pushq %r15
1762 ; SSE2-NEXT: pushq %r14
1763 ; SSE2-NEXT: pushq %r13
1764 ; SSE2-NEXT: pushq %r12
1765 ; SSE2-NEXT: pushq %rbx
1766 ; SSE2-NEXT: movq %rdi, %rax
1767 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
1768 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
1769 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
1770 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
1771 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
1772 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
1773 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
1774 ; SSE2-NEXT: addb %r15b, %r15b
1775 ; SSE2-NEXT: sarb %r15b
1776 ; SSE2-NEXT: addb %sil, %sil
1777 ; SSE2-NEXT: sarb %sil
1778 ; SSE2-NEXT: cmpb %r15b, %sil
1779 ; SSE2-NEXT: setl %sil
1780 ; SSE2-NEXT: setg %r15b
1781 ; SSE2-NEXT: subb %sil, %r15b
1782 ; SSE2-NEXT: movsbq %r15b, %rsi
1783 ; SSE2-NEXT: movq %rsi, (%rax)
1784 ; SSE2-NEXT: movq %rsi, %xmm0
1785 ; SSE2-NEXT: sarq $63, %rsi
1786 ; SSE2-NEXT: addb %r14b, %r14b
1787 ; SSE2-NEXT: sarb %r14b
1788 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
1789 ; SSE2-NEXT: addb %r15b, %r15b
1790 ; SSE2-NEXT: sarb %r15b
1791 ; SSE2-NEXT: cmpb %r14b, %r15b
1792 ; SSE2-NEXT: setl %r14b
1793 ; SSE2-NEXT: setg %r15b
1794 ; SSE2-NEXT: subb %r14b, %r15b
1795 ; SSE2-NEXT: movsbq %r15b, %r14
1796 ; SSE2-NEXT: movq %r14, %r15
1797 ; SSE2-NEXT: sarq $63, %r15
1798 ; SSE2-NEXT: addb %bpl, %bpl
1799 ; SSE2-NEXT: sarb %bpl
1800 ; SSE2-NEXT: addb %dl, %dl
1801 ; SSE2-NEXT: sarb %dl
1802 ; SSE2-NEXT: cmpb %bpl, %dl
1803 ; SSE2-NEXT: setl %dl
1804 ; SSE2-NEXT: setg %bpl
1805 ; SSE2-NEXT: subb %dl, %bpl
1806 ; SSE2-NEXT: movsbq %bpl, %rdx
1807 ; SSE2-NEXT: movq %rdx, %r12
1808 ; SSE2-NEXT: sarq $63, %r12
1809 ; SSE2-NEXT: addb %bl, %bl
1810 ; SSE2-NEXT: sarb %bl
1811 ; SSE2-NEXT: addb %cl, %cl
1812 ; SSE2-NEXT: sarb %cl
1813 ; SSE2-NEXT: cmpb %bl, %cl
1814 ; SSE2-NEXT: setl %cl
1815 ; SSE2-NEXT: setg %bl
1816 ; SSE2-NEXT: subb %cl, %bl
1817 ; SSE2-NEXT: movsbq %bl, %rbx
1818 ; SSE2-NEXT: movq %rbx, %rcx
1819 ; SSE2-NEXT: sarq $63, %rcx
1820 ; SSE2-NEXT: addb %r11b, %r11b
1821 ; SSE2-NEXT: sarb %r11b
1822 ; SSE2-NEXT: addb %r8b, %r8b
1823 ; SSE2-NEXT: sarb %r8b
1824 ; SSE2-NEXT: cmpb %r11b, %r8b
1825 ; SSE2-NEXT: setl %r8b
1826 ; SSE2-NEXT: setg %r11b
1827 ; SSE2-NEXT: subb %r8b, %r11b
1828 ; SSE2-NEXT: movsbq %r11b, %r8
1829 ; SSE2-NEXT: movq %r8, %r11
1830 ; SSE2-NEXT: sarq $63, %r11
1831 ; SSE2-NEXT: addb %r10b, %r10b
1832 ; SSE2-NEXT: sarb %r10b
1833 ; SSE2-NEXT: addb %r9b, %r9b
1834 ; SSE2-NEXT: sarb %r9b
1835 ; SSE2-NEXT: cmpb %r10b, %r9b
1836 ; SSE2-NEXT: setl %r9b
1837 ; SSE2-NEXT: setg %r10b
1838 ; SSE2-NEXT: subb %r9b, %r10b
1839 ; SSE2-NEXT: movsbq %r10b, %r9
1840 ; SSE2-NEXT: movq %r9, %r10
1841 ; SSE2-NEXT: sarq $63, %r10
1842 ; SSE2-NEXT: addb %dil, %dil
1843 ; SSE2-NEXT: sarb %dil
1844 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
1845 ; SSE2-NEXT: addb %bpl, %bpl
1846 ; SSE2-NEXT: sarb %bpl
1847 ; SSE2-NEXT: cmpb %dil, %bpl
1848 ; SSE2-NEXT: setl %dil
1849 ; SSE2-NEXT: setg %bpl
1850 ; SSE2-NEXT: subb %dil, %bpl
1851 ; SSE2-NEXT: movsbq %bpl, %rdi
1852 ; SSE2-NEXT: movq %rdi, %r13
1853 ; SSE2-NEXT: sarq $63, %r13
1854 ; SSE2-NEXT: movl %r13d, 96(%rax)
1855 ; SSE2-NEXT: movabsq $2251799813685247, %rbp # imm = 0x7FFFFFFFFFFFF
1856 ; SSE2-NEXT: andq %r13, %rbp
1857 ; SSE2-NEXT: shldq $62, %rdi, %r13
1858 ; SSE2-NEXT: movq %r13, 88(%rax)
1859 ; SSE2-NEXT: movq %r10, %r13
1860 ; SSE2-NEXT: shldq $20, %r9, %r13
1861 ; SSE2-NEXT: movq %r13, 64(%rax)
1862 ; SSE2-NEXT: movq %r11, %r13
1863 ; SSE2-NEXT: shldq $31, %r8, %r13
1864 ; SSE2-NEXT: movq %r13, 48(%rax)
1865 ; SSE2-NEXT: movq %rcx, %r13
1866 ; SSE2-NEXT: shldq $42, %rbx, %r13
1867 ; SSE2-NEXT: movq %r13, 32(%rax)
1868 ; SSE2-NEXT: movabsq $9007199254738944, %r13 # imm = 0x1FFFFFFFFFF800
1869 ; SSE2-NEXT: andq %r12, %r13
1870 ; SSE2-NEXT: shldq $53, %rdx, %r12
1871 ; SSE2-NEXT: movq %r12, 16(%rax)
1872 ; SSE2-NEXT: movq %rbp, %r12
1873 ; SSE2-NEXT: shrq $48, %r12
1874 ; SSE2-NEXT: movb %r12b, 102(%rax)
1875 ; SSE2-NEXT: shrq $32, %rbp
1876 ; SSE2-NEXT: movw %bp, 100(%rax)
1877 ; SSE2-NEXT: movabsq $9007199254740991, %r12 # imm = 0x1FFFFFFFFFFFFF
1878 ; SSE2-NEXT: andq %r12, %r15
1879 ; SSE2-NEXT: shldq $9, %r14, %r15
1880 ; SSE2-NEXT: shlq $62, %rdi
1881 ; SSE2-NEXT: orq %r15, %rdi
1882 ; SSE2-NEXT: movq %rdi, 80(%rax)
1883 ; SSE2-NEXT: shlq $42, %rbx
1884 ; SSE2-NEXT: shrq $11, %r13
1885 ; SSE2-NEXT: orq %rbx, %r13
1886 ; SSE2-NEXT: movq %r13, 24(%rax)
1887 ; SSE2-NEXT: shlq $9, %r14
1888 ; SSE2-NEXT: andl $511, %r10d # imm = 0x1FF
1889 ; SSE2-NEXT: orq %r14, %r10
1890 ; SSE2-NEXT: movq %r10, 72(%rax)
1891 ; SSE2-NEXT: shlq $20, %r9
1892 ; SSE2-NEXT: andl $1048575, %r11d # imm = 0xFFFFF
1893 ; SSE2-NEXT: orq %r9, %r11
1894 ; SSE2-NEXT: movq %r11, 56(%rax)
1895 ; SSE2-NEXT: shlq $31, %r8
1896 ; SSE2-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
1897 ; SSE2-NEXT: orq %r8, %rcx
1898 ; SSE2-NEXT: movq %rcx, 40(%rax)
1899 ; SSE2-NEXT: movq %rsi, %xmm1
1900 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1901 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1902 ; SSE2-NEXT: movq %xmm0, %rcx
1903 ; SSE2-NEXT: andq %r12, %rcx
1904 ; SSE2-NEXT: shlq $53, %rdx
1905 ; SSE2-NEXT: orq %rcx, %rdx
1906 ; SSE2-NEXT: movq %rdx, 8(%rax)
1907 ; SSE2-NEXT: popq %rbx
1908 ; SSE2-NEXT: popq %r12
1909 ; SSE2-NEXT: popq %r13
1910 ; SSE2-NEXT: popq %r14
1911 ; SSE2-NEXT: popq %r15
1912 ; SSE2-NEXT: popq %rbp
1915 ; SSE4-LABEL: scmp_uncommon_vectors:
1917 ; SSE4-NEXT: pushq %rbp
1918 ; SSE4-NEXT: pushq %r15
1919 ; SSE4-NEXT: pushq %r14
1920 ; SSE4-NEXT: pushq %r13
1921 ; SSE4-NEXT: pushq %r12
1922 ; SSE4-NEXT: pushq %rbx
1923 ; SSE4-NEXT: movq %rdi, %rax
1924 ; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
1925 ; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
1926 ; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
1927 ; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
1928 ; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
1929 ; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
1930 ; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
1931 ; SSE4-NEXT: addb %r14b, %r14b
1932 ; SSE4-NEXT: sarb %r14b
1933 ; SSE4-NEXT: addb %sil, %sil
1934 ; SSE4-NEXT: sarb %sil
1935 ; SSE4-NEXT: cmpb %r14b, %sil
1936 ; SSE4-NEXT: setl %sil
1937 ; SSE4-NEXT: setg %r14b
1938 ; SSE4-NEXT: subb %sil, %r14b
1939 ; SSE4-NEXT: movsbq %r14b, %r14
1940 ; SSE4-NEXT: movq %r14, (%rax)
1941 ; SSE4-NEXT: sarq $63, %r14
1942 ; SSE4-NEXT: addb %r15b, %r15b
1943 ; SSE4-NEXT: sarb %r15b
1944 ; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
1945 ; SSE4-NEXT: addb %sil, %sil
1946 ; SSE4-NEXT: sarb %sil
1947 ; SSE4-NEXT: cmpb %r15b, %sil
1948 ; SSE4-NEXT: setl %sil
1949 ; SSE4-NEXT: setg %r15b
1950 ; SSE4-NEXT: subb %sil, %r15b
1951 ; SSE4-NEXT: movsbq %r15b, %rsi
1952 ; SSE4-NEXT: movq %rsi, %r15
1953 ; SSE4-NEXT: sarq $63, %r15
1954 ; SSE4-NEXT: addb %bpl, %bpl
1955 ; SSE4-NEXT: sarb %bpl
1956 ; SSE4-NEXT: addb %dl, %dl
1957 ; SSE4-NEXT: sarb %dl
1958 ; SSE4-NEXT: cmpb %bpl, %dl
1959 ; SSE4-NEXT: setl %dl
1960 ; SSE4-NEXT: setg %bpl
1961 ; SSE4-NEXT: subb %dl, %bpl
1962 ; SSE4-NEXT: movsbq %bpl, %r12
1963 ; SSE4-NEXT: movq %r12, %r13
1964 ; SSE4-NEXT: sarq $63, %r13
1965 ; SSE4-NEXT: addb %bl, %bl
1966 ; SSE4-NEXT: sarb %bl
1967 ; SSE4-NEXT: addb %cl, %cl
1968 ; SSE4-NEXT: sarb %cl
1969 ; SSE4-NEXT: cmpb %bl, %cl
1970 ; SSE4-NEXT: setl %cl
1971 ; SSE4-NEXT: setg %dl
1972 ; SSE4-NEXT: subb %cl, %dl
1973 ; SSE4-NEXT: movsbq %dl, %rbx
1974 ; SSE4-NEXT: movq %rbx, %rcx
1975 ; SSE4-NEXT: sarq $63, %rcx
1976 ; SSE4-NEXT: addb %r11b, %r11b
1977 ; SSE4-NEXT: sarb %r11b
1978 ; SSE4-NEXT: addb %r8b, %r8b
1979 ; SSE4-NEXT: sarb %r8b
1980 ; SSE4-NEXT: cmpb %r11b, %r8b
1981 ; SSE4-NEXT: setl %dl
1982 ; SSE4-NEXT: setg %r8b
1983 ; SSE4-NEXT: subb %dl, %r8b
1984 ; SSE4-NEXT: movsbq %r8b, %rdx
1985 ; SSE4-NEXT: movq %rdx, %r8
1986 ; SSE4-NEXT: sarq $63, %r8
1987 ; SSE4-NEXT: addb %r10b, %r10b
1988 ; SSE4-NEXT: sarb %r10b
1989 ; SSE4-NEXT: addb %r9b, %r9b
1990 ; SSE4-NEXT: sarb %r9b
1991 ; SSE4-NEXT: cmpb %r10b, %r9b
1992 ; SSE4-NEXT: setl %r9b
1993 ; SSE4-NEXT: setg %r10b
1994 ; SSE4-NEXT: subb %r9b, %r10b
1995 ; SSE4-NEXT: movsbq %r10b, %r9
1996 ; SSE4-NEXT: movq %r9, %r10
1997 ; SSE4-NEXT: sarq $63, %r10
1998 ; SSE4-NEXT: addb %dil, %dil
1999 ; SSE4-NEXT: sarb %dil
2000 ; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
2001 ; SSE4-NEXT: addb %r11b, %r11b
2002 ; SSE4-NEXT: sarb %r11b
2003 ; SSE4-NEXT: cmpb %dil, %r11b
2004 ; SSE4-NEXT: setl %dil
2005 ; SSE4-NEXT: setg %r11b
2006 ; SSE4-NEXT: subb %dil, %r11b
2007 ; SSE4-NEXT: movsbq %r11b, %rdi
2008 ; SSE4-NEXT: movq %rdi, %rbp
2009 ; SSE4-NEXT: sarq $63, %rbp
2010 ; SSE4-NEXT: movl %ebp, 96(%rax)
2011 ; SSE4-NEXT: movabsq $2251799813685247, %r11 # imm = 0x7FFFFFFFFFFFF
2012 ; SSE4-NEXT: andq %rbp, %r11
2013 ; SSE4-NEXT: shldq $62, %rdi, %rbp
2014 ; SSE4-NEXT: movq %rbp, 88(%rax)
2015 ; SSE4-NEXT: movq %r10, %rbp
2016 ; SSE4-NEXT: shldq $20, %r9, %rbp
2017 ; SSE4-NEXT: movq %rbp, 64(%rax)
2018 ; SSE4-NEXT: movq %r8, %rbp
2019 ; SSE4-NEXT: shldq $31, %rdx, %rbp
2020 ; SSE4-NEXT: movq %rbp, 48(%rax)
2021 ; SSE4-NEXT: movq %rcx, %rbp
2022 ; SSE4-NEXT: shldq $42, %rbx, %rbp
2023 ; SSE4-NEXT: movq %rbp, 32(%rax)
2024 ; SSE4-NEXT: movabsq $9007199254738944, %rbp # imm = 0x1FFFFFFFFFF800
2025 ; SSE4-NEXT: andq %r13, %rbp
2026 ; SSE4-NEXT: shldq $53, %r12, %r13
2027 ; SSE4-NEXT: movq %r13, 16(%rax)
2028 ; SSE4-NEXT: movq %r11, %r13
2029 ; SSE4-NEXT: shrq $48, %r13
2030 ; SSE4-NEXT: movb %r13b, 102(%rax)
2031 ; SSE4-NEXT: shrq $32, %r11
2032 ; SSE4-NEXT: movw %r11w, 100(%rax)
2033 ; SSE4-NEXT: movabsq $9007199254740991, %r11 # imm = 0x1FFFFFFFFFFFFF
2034 ; SSE4-NEXT: andq %r11, %r15
2035 ; SSE4-NEXT: shldq $9, %rsi, %r15
2036 ; SSE4-NEXT: shlq $62, %rdi
2037 ; SSE4-NEXT: orq %r15, %rdi
2038 ; SSE4-NEXT: movq %rdi, 80(%rax)
2039 ; SSE4-NEXT: andq %r11, %r14
2040 ; SSE4-NEXT: shlq $53, %r12
2041 ; SSE4-NEXT: orq %r14, %r12
2042 ; SSE4-NEXT: movq %r12, 8(%rax)
2043 ; SSE4-NEXT: shlq $42, %rbx
2044 ; SSE4-NEXT: shrq $11, %rbp
2045 ; SSE4-NEXT: orq %rbx, %rbp
2046 ; SSE4-NEXT: movq %rbp, 24(%rax)
2047 ; SSE4-NEXT: shlq $9, %rsi
2048 ; SSE4-NEXT: andl $511, %r10d # imm = 0x1FF
2049 ; SSE4-NEXT: orq %rsi, %r10
2050 ; SSE4-NEXT: movq %r10, 72(%rax)
2051 ; SSE4-NEXT: shlq $20, %r9
2052 ; SSE4-NEXT: andl $1048575, %r8d # imm = 0xFFFFF
2053 ; SSE4-NEXT: orq %r9, %r8
2054 ; SSE4-NEXT: movq %r8, 56(%rax)
2055 ; SSE4-NEXT: shlq $31, %rdx
2056 ; SSE4-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
2057 ; SSE4-NEXT: orq %rdx, %rcx
2058 ; SSE4-NEXT: movq %rcx, 40(%rax)
2059 ; SSE4-NEXT: popq %rbx
2060 ; SSE4-NEXT: popq %r12
2061 ; SSE4-NEXT: popq %r13
2062 ; SSE4-NEXT: popq %r14
2063 ; SSE4-NEXT: popq %r15
2064 ; SSE4-NEXT: popq %rbp
2067 ; AVX-LABEL: scmp_uncommon_vectors:
2069 ; AVX-NEXT: pushq %rbp
2070 ; AVX-NEXT: pushq %r15
2071 ; AVX-NEXT: pushq %r14
2072 ; AVX-NEXT: pushq %r13
2073 ; AVX-NEXT: pushq %r12
2074 ; AVX-NEXT: pushq %rbx
2075 ; AVX-NEXT: movq %rdi, %rax
2076 ; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
2077 ; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
2078 ; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
2079 ; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
2080 ; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
2081 ; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
2082 ; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
2083 ; AVX-NEXT: addb %r14b, %r14b
2084 ; AVX-NEXT: sarb %r14b
2085 ; AVX-NEXT: addb %sil, %sil
2086 ; AVX-NEXT: sarb %sil
2087 ; AVX-NEXT: cmpb %r14b, %sil
2088 ; AVX-NEXT: setl %sil
2089 ; AVX-NEXT: setg %r14b
2090 ; AVX-NEXT: subb %sil, %r14b
2091 ; AVX-NEXT: movsbq %r14b, %r14
2092 ; AVX-NEXT: movq %r14, (%rax)
2093 ; AVX-NEXT: sarq $63, %r14
2094 ; AVX-NEXT: addb %r15b, %r15b
2095 ; AVX-NEXT: sarb %r15b
2096 ; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
2097 ; AVX-NEXT: addb %sil, %sil
2098 ; AVX-NEXT: sarb %sil
2099 ; AVX-NEXT: cmpb %r15b, %sil
2100 ; AVX-NEXT: setl %sil
2101 ; AVX-NEXT: setg %r15b
2102 ; AVX-NEXT: subb %sil, %r15b
2103 ; AVX-NEXT: movsbq %r15b, %rsi
2104 ; AVX-NEXT: movq %rsi, %r12
2105 ; AVX-NEXT: sarq $63, %r12
2106 ; AVX-NEXT: addb %bpl, %bpl
2107 ; AVX-NEXT: sarb %bpl
2108 ; AVX-NEXT: addb %dl, %dl
2109 ; AVX-NEXT: sarb %dl
2110 ; AVX-NEXT: cmpb %bpl, %dl
2111 ; AVX-NEXT: setl %dl
2112 ; AVX-NEXT: setg %bpl
2113 ; AVX-NEXT: subb %dl, %bpl
2114 ; AVX-NEXT: movsbq %bpl, %r15
2115 ; AVX-NEXT: movq %r15, %r13
2116 ; AVX-NEXT: sarq $63, %r13
2117 ; AVX-NEXT: addb %bl, %bl
2118 ; AVX-NEXT: sarb %bl
2119 ; AVX-NEXT: addb %cl, %cl
2120 ; AVX-NEXT: sarb %cl
2121 ; AVX-NEXT: cmpb %bl, %cl
2122 ; AVX-NEXT: setl %cl
2123 ; AVX-NEXT: setg %dl
2124 ; AVX-NEXT: subb %cl, %dl
2125 ; AVX-NEXT: movsbq %dl, %rbx
2126 ; AVX-NEXT: movq %rbx, %rcx
2127 ; AVX-NEXT: sarq $63, %rcx
2128 ; AVX-NEXT: addb %r11b, %r11b
2129 ; AVX-NEXT: sarb %r11b
2130 ; AVX-NEXT: addb %r8b, %r8b
2131 ; AVX-NEXT: sarb %r8b
2132 ; AVX-NEXT: cmpb %r11b, %r8b
2133 ; AVX-NEXT: setl %dl
2134 ; AVX-NEXT: setg %r8b
2135 ; AVX-NEXT: subb %dl, %r8b
2136 ; AVX-NEXT: movsbq %r8b, %rdx
2137 ; AVX-NEXT: movq %rdx, %r8
2138 ; AVX-NEXT: sarq $63, %r8
2139 ; AVX-NEXT: addb %r10b, %r10b
2140 ; AVX-NEXT: sarb %r10b
2141 ; AVX-NEXT: addb %r9b, %r9b
2142 ; AVX-NEXT: sarb %r9b
2143 ; AVX-NEXT: cmpb %r10b, %r9b
2144 ; AVX-NEXT: setl %r9b
2145 ; AVX-NEXT: setg %r10b
2146 ; AVX-NEXT: subb %r9b, %r10b
2147 ; AVX-NEXT: movsbq %r10b, %r9
2148 ; AVX-NEXT: movq %r9, %r10
2149 ; AVX-NEXT: sarq $63, %r10
2150 ; AVX-NEXT: addb %dil, %dil
2151 ; AVX-NEXT: sarb %dil
2152 ; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
2153 ; AVX-NEXT: addb %r11b, %r11b
2154 ; AVX-NEXT: sarb %r11b
2155 ; AVX-NEXT: cmpb %dil, %r11b
2156 ; AVX-NEXT: setl %dil
2157 ; AVX-NEXT: setg %r11b
2158 ; AVX-NEXT: subb %dil, %r11b
2159 ; AVX-NEXT: movsbq %r11b, %rdi
2160 ; AVX-NEXT: movq %rdi, %rbp
2161 ; AVX-NEXT: sarq $63, %rbp
2162 ; AVX-NEXT: movl %ebp, 96(%rax)
2163 ; AVX-NEXT: movb $51, %r11b
2164 ; AVX-NEXT: bzhiq %r11, %rbp, %r11
2165 ; AVX-NEXT: shldq $62, %rdi, %rbp
2166 ; AVX-NEXT: movq %rbp, 88(%rax)
2167 ; AVX-NEXT: movq %r10, %rbp
2168 ; AVX-NEXT: shldq $20, %r9, %rbp
2169 ; AVX-NEXT: movq %rbp, 64(%rax)
2170 ; AVX-NEXT: movq %r8, %rbp
2171 ; AVX-NEXT: shldq $31, %rdx, %rbp
2172 ; AVX-NEXT: movq %rbp, 48(%rax)
2173 ; AVX-NEXT: movq %rcx, %rbp
2174 ; AVX-NEXT: shldq $42, %rbx, %rbp
2175 ; AVX-NEXT: movq %rbp, 32(%rax)
2176 ; AVX-NEXT: movb $42, %bpl
2177 ; AVX-NEXT: bzhiq %rbp, %r13, %rbp
2178 ; AVX-NEXT: shldq $53, %r15, %r13
2179 ; AVX-NEXT: movq %r13, 16(%rax)
2180 ; AVX-NEXT: movq %r11, %r13
2181 ; AVX-NEXT: shrq $48, %r13
2182 ; AVX-NEXT: movb %r13b, 102(%rax)
2183 ; AVX-NEXT: shrq $32, %r11
2184 ; AVX-NEXT: movw %r11w, 100(%rax)
2185 ; AVX-NEXT: movb $53, %r11b
2186 ; AVX-NEXT: bzhiq %r11, %r12, %r12
2187 ; AVX-NEXT: shldq $9, %rsi, %r12
2188 ; AVX-NEXT: shlq $62, %rdi
2189 ; AVX-NEXT: orq %r12, %rdi
2190 ; AVX-NEXT: movq %rdi, 80(%rax)
2191 ; AVX-NEXT: shlq $42, %rbx
2192 ; AVX-NEXT: orq %rbp, %rbx
2193 ; AVX-NEXT: movq %rbx, 24(%rax)
2194 ; AVX-NEXT: bzhiq %r11, %r14, %rdi
2195 ; AVX-NEXT: shlq $53, %r15
2196 ; AVX-NEXT: orq %rdi, %r15
2197 ; AVX-NEXT: movq %r15, 8(%rax)
2198 ; AVX-NEXT: shlq $9, %rsi
2199 ; AVX-NEXT: andl $511, %r10d # imm = 0x1FF
2200 ; AVX-NEXT: orq %rsi, %r10
2201 ; AVX-NEXT: movq %r10, 72(%rax)
2202 ; AVX-NEXT: shlq $20, %r9
2203 ; AVX-NEXT: andl $1048575, %r8d # imm = 0xFFFFF
2204 ; AVX-NEXT: orq %r9, %r8
2205 ; AVX-NEXT: movq %r8, 56(%rax)
2206 ; AVX-NEXT: shlq $31, %rdx
2207 ; AVX-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
2208 ; AVX-NEXT: orq %rdx, %rcx
2209 ; AVX-NEXT: movq %rcx, 40(%rax)
2210 ; AVX-NEXT: popq %rbx
2211 ; AVX-NEXT: popq %r12
2212 ; AVX-NEXT: popq %r13
2213 ; AVX-NEXT: popq %r14
2214 ; AVX-NEXT: popq %r15
2215 ; AVX-NEXT: popq %rbp
2218 ; X86-LABEL: scmp_uncommon_vectors:
2220 ; X86-NEXT: pushl %ebp
2221 ; X86-NEXT: pushl %ebx
2222 ; X86-NEXT: pushl %edi
2223 ; X86-NEXT: pushl %esi
2224 ; X86-NEXT: subl $52, %esp
2225 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2226 ; X86-NEXT: addb %al, %al
2227 ; X86-NEXT: sarb %al
2228 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2229 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2230 ; X86-NEXT: addb %al, %al
2231 ; X86-NEXT: sarb %al
2232 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2233 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2234 ; X86-NEXT: addb %al, %al
2235 ; X86-NEXT: sarb %al
2236 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2237 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2238 ; X86-NEXT: addb %al, %al
2239 ; X86-NEXT: sarb %al
2240 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2241 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2242 ; X86-NEXT: addb %al, %al
2243 ; X86-NEXT: sarb %al
2244 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2245 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2246 ; X86-NEXT: addb %al, %al
2247 ; X86-NEXT: sarb %al
2248 ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2249 ; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
2250 ; X86-NEXT: addb %dh, %dh
2251 ; X86-NEXT: sarb %dh
2252 ; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
2253 ; X86-NEXT: addb %dl, %dl
2254 ; X86-NEXT: sarb %dl
2255 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2256 ; X86-NEXT: addb %al, %al
2257 ; X86-NEXT: sarb %al
2258 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
2259 ; X86-NEXT: addb %ah, %ah
2260 ; X86-NEXT: sarb %ah
2261 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
2262 ; X86-NEXT: addb %cl, %cl
2263 ; X86-NEXT: sarb %cl
2264 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
2265 ; X86-NEXT: addb %ch, %ch
2266 ; X86-NEXT: sarb %ch
2267 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
2268 ; X86-NEXT: addb %bl, %bl
2269 ; X86-NEXT: sarb %bl
2270 ; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
2271 ; X86-NEXT: addb %bh, %bh
2272 ; X86-NEXT: sarb %bh
2273 ; X86-NEXT: cmpb %bl, %bh
2274 ; X86-NEXT: setl %bl
2275 ; X86-NEXT: setg %bh
2276 ; X86-NEXT: subb %bl, %bh
2277 ; X86-NEXT: movsbl %bh, %esi
2278 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2279 ; X86-NEXT: sarl $31, %esi
2280 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2281 ; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
2282 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2283 ; X86-NEXT: cmpb %cl, %ch
2284 ; X86-NEXT: setl %cl
2285 ; X86-NEXT: setg %ch
2286 ; X86-NEXT: subb %cl, %ch
2287 ; X86-NEXT: movsbl %ch, %ecx
2288 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2289 ; X86-NEXT: sarl $31, %ecx
2290 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2291 ; X86-NEXT: andl $2097151, %ecx # imm = 0x1FFFFF
2292 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2293 ; X86-NEXT: cmpb %al, %ah
2294 ; X86-NEXT: setl %al
2295 ; X86-NEXT: setg %cl
2296 ; X86-NEXT: subb %al, %cl
2297 ; X86-NEXT: movsbl %cl, %ecx
2298 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
2299 ; X86-NEXT: movl %ecx, (%edi)
2300 ; X86-NEXT: sarl $31, %ecx
2301 ; X86-NEXT: movl %ecx, %eax
2302 ; X86-NEXT: andl $2097151, %eax # imm = 0x1FFFFF
2303 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2304 ; X86-NEXT: cmpb %dh, %dl
2305 ; X86-NEXT: setl %al
2306 ; X86-NEXT: setg %dl
2307 ; X86-NEXT: subb %al, %dl
2308 ; X86-NEXT: movsbl %dl, %ebp
2309 ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2310 ; X86-NEXT: sarl $31, %ebp
2311 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
2312 ; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
2313 ; X86-NEXT: setl %al
2314 ; X86-NEXT: setg %dl
2315 ; X86-NEXT: subb %al, %dl
2316 ; X86-NEXT: movsbl %dl, %esi
2317 ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2318 ; X86-NEXT: sarl $31, %esi
2319 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
2320 ; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
2321 ; X86-NEXT: setl %al
2322 ; X86-NEXT: setg %dl
2323 ; X86-NEXT: subb %al, %dl
2324 ; X86-NEXT: movsbl %dl, %eax
2325 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2326 ; X86-NEXT: sarl $31, %eax
2327 ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
2328 ; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload
2329 ; X86-NEXT: setl %dl
2330 ; X86-NEXT: setg %dh
2331 ; X86-NEXT: subb %dl, %dh
2332 ; X86-NEXT: movsbl %dh, %ebx
2333 ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2334 ; X86-NEXT: sarl $31, %ebx
2335 ; X86-NEXT: movl %ebx, 96(%edi)
2336 ; X86-NEXT: movl %ebx, 92(%edi)
2337 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2338 ; X86-NEXT: movl %edx, 80(%edi)
2339 ; X86-NEXT: movl %eax, 68(%edi)
2340 ; X86-NEXT: movl %eax, 64(%edi)
2341 ; X86-NEXT: movl %esi, 52(%edi)
2342 ; X86-NEXT: movl %esi, 48(%edi)
2343 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2344 ; X86-NEXT: movl %edx, 36(%edi)
2345 ; X86-NEXT: movl %ebp, 24(%edi)
2346 ; X86-NEXT: movl %ebp, 20(%edi)
2347 ; X86-NEXT: movl %ecx, 8(%edi)
2348 ; X86-NEXT: movl %ecx, 4(%edi)
2349 ; X86-NEXT: movl %ebx, %ecx
2350 ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2351 ; X86-NEXT: movw %cx, 100(%edi)
2352 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2353 ; X86-NEXT: shldl $30, %edx, %ecx
2354 ; X86-NEXT: movl %ecx, 88(%edi)
2355 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
2356 ; X86-NEXT: shldl $9, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
2357 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2358 ; X86-NEXT: shldl $9, %edx, %ecx
2359 ; X86-NEXT: movl %ecx, 76(%edi)
2360 ; X86-NEXT: movl %eax, %ecx
2361 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2362 ; X86-NEXT: shldl $20, %edx, %ecx
2363 ; X86-NEXT: movl %ecx, 60(%edi)
2364 ; X86-NEXT: movl %esi, %ecx
2365 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2366 ; X86-NEXT: shldl $31, %edx, %ecx
2367 ; X86-NEXT: movl %ecx, 44(%edi)
2368 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
2369 ; X86-NEXT: shldl $10, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
2370 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2371 ; X86-NEXT: shldl $10, %edx, %ecx
2372 ; X86-NEXT: movl %ecx, 32(%edi)
2373 ; X86-NEXT: movl %ebp, %ecx
2374 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
2375 ; X86-NEXT: shldl $21, %ebx, %ecx
2376 ; X86-NEXT: movl %ecx, 16(%edi)
2377 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
2378 ; X86-NEXT: shll $9, %ecx
2379 ; X86-NEXT: andl $511, %eax # imm = 0x1FF
2380 ; X86-NEXT: orl %ecx, %eax
2381 ; X86-NEXT: movl %eax, 72(%edi)
2382 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2383 ; X86-NEXT: shll $20, %eax
2384 ; X86-NEXT: andl $1048575, %esi # imm = 0xFFFFF
2385 ; X86-NEXT: orl %eax, %esi
2386 ; X86-NEXT: movl %esi, 56(%edi)
2387 ; X86-NEXT: shll $10, %edx
2388 ; X86-NEXT: andl $1023, %ebp # imm = 0x3FF
2389 ; X86-NEXT: orl %edx, %ebp
2390 ; X86-NEXT: movl %ebp, 28(%edi)
2391 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2392 ; X86-NEXT: shll $21, %eax
2393 ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
2394 ; X86-NEXT: movl %eax, 12(%edi)
2395 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2396 ; X86-NEXT: andl $7, %eax
2397 ; X86-NEXT: movb %al, 102(%edi)
2398 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2399 ; X86-NEXT: shll $30, %eax
2400 ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
2401 ; X86-NEXT: movl %eax, 84(%edi)
2402 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2403 ; X86-NEXT: shll $31, %eax
2404 ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
2405 ; X86-NEXT: movl %eax, 40(%edi)
2406 ; X86-NEXT: movl %edi, %eax
2407 ; X86-NEXT: addl $52, %esp
2408 ; X86-NEXT: popl %esi
2409 ; X86-NEXT: popl %edi
2410 ; X86-NEXT: popl %ebx
2411 ; X86-NEXT: popl %ebp
2413 %1 = call <7 x i117> @llvm.scmp(<7 x i7> %x, <7 x i7> %y)
2417 define <1 x i3> @scmp_scalarize(<1 x i33> %x, <1 x i33> %y) nounwind {
2418 ; X64-LABEL: scmp_scalarize:
2420 ; X64-NEXT: shlq $31, %rsi
2421 ; X64-NEXT: sarq $31, %rsi
2422 ; X64-NEXT: shlq $31, %rdi
2423 ; X64-NEXT: sarq $31, %rdi
2424 ; X64-NEXT: cmpq %rsi, %rdi
2425 ; X64-NEXT: setl %cl
2426 ; X64-NEXT: setg %al
2427 ; X64-NEXT: subb %cl, %al
2430 ; X86-LABEL: scmp_scalarize:
2432 ; X86-NEXT: pushl %ebx
2433 ; X86-NEXT: pushl %edi
2434 ; X86-NEXT: pushl %esi
2435 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
2436 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2437 ; X86-NEXT: andl $1, %eax
2438 ; X86-NEXT: negl %eax
2439 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2440 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
2441 ; X86-NEXT: andl $1, %esi
2442 ; X86-NEXT: negl %esi
2443 ; X86-NEXT: cmpl %ecx, %edx
2444 ; X86-NEXT: movl %esi, %edi
2445 ; X86-NEXT: sbbl %eax, %edi
2446 ; X86-NEXT: setl %bl
2447 ; X86-NEXT: cmpl %edx, %ecx
2448 ; X86-NEXT: sbbl %esi, %eax
2449 ; X86-NEXT: setl %al
2450 ; X86-NEXT: subb %bl, %al
2451 ; X86-NEXT: popl %esi
2452 ; X86-NEXT: popl %edi
2453 ; X86-NEXT: popl %ebx
2455 %1 = call <1 x i3> @llvm.scmp(<1 x i33> %x, <1 x i33> %y)
2459 define <2 x i8> @scmp_bool_operands(<2 x i1> %x, <2 x i1> %y) nounwind {
2460 ; SSE2-LABEL: scmp_bool_operands:
2462 ; SSE2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
2463 ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
2464 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
2465 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
2466 ; SSE2-NEXT: andb $1, %al
2467 ; SSE2-NEXT: negb %al
2468 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
2469 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
2470 ; SSE2-NEXT: andb $1, %dl
2471 ; SSE2-NEXT: negb %dl
2472 ; SSE2-NEXT: cmpb %al, %dl
2473 ; SSE2-NEXT: setl %al
2474 ; SSE2-NEXT: setg %dl
2475 ; SSE2-NEXT: subb %al, %dl
2476 ; SSE2-NEXT: movzbl %dl, %eax
2477 ; SSE2-NEXT: andb $1, %cl
2478 ; SSE2-NEXT: negb %cl
2479 ; SSE2-NEXT: andb $1, %sil
2480 ; SSE2-NEXT: negb %sil
2481 ; SSE2-NEXT: cmpb %cl, %sil
2482 ; SSE2-NEXT: setl %cl
2483 ; SSE2-NEXT: setg %dl
2484 ; SSE2-NEXT: subb %cl, %dl
2485 ; SSE2-NEXT: movzbl %dl, %ecx
2486 ; SSE2-NEXT: shll $8, %ecx
2487 ; SSE2-NEXT: orl %eax, %ecx
2488 ; SSE2-NEXT: movd %ecx, %xmm0
2491 ; SSE4-LABEL: scmp_bool_operands:
2493 ; SSE4-NEXT: pextrb $8, %xmm1, %eax
2494 ; SSE4-NEXT: andb $1, %al
2495 ; SSE4-NEXT: negb %al
2496 ; SSE4-NEXT: pextrb $8, %xmm0, %ecx
2497 ; SSE4-NEXT: andb $1, %cl
2498 ; SSE4-NEXT: negb %cl
2499 ; SSE4-NEXT: cmpb %al, %cl
2500 ; SSE4-NEXT: setl %al
2501 ; SSE4-NEXT: setg %cl
2502 ; SSE4-NEXT: subb %al, %cl
2503 ; SSE4-NEXT: movzbl %cl, %eax
2504 ; SSE4-NEXT: movd %xmm1, %ecx
2505 ; SSE4-NEXT: andb $1, %cl
2506 ; SSE4-NEXT: negb %cl
2507 ; SSE4-NEXT: movd %xmm0, %edx
2508 ; SSE4-NEXT: andb $1, %dl
2509 ; SSE4-NEXT: negb %dl
2510 ; SSE4-NEXT: cmpb %cl, %dl
2511 ; SSE4-NEXT: setl %cl
2512 ; SSE4-NEXT: setg %dl
2513 ; SSE4-NEXT: subb %cl, %dl
2514 ; SSE4-NEXT: movzbl %dl, %ecx
2515 ; SSE4-NEXT: movd %ecx, %xmm0
2516 ; SSE4-NEXT: pinsrb $1, %eax, %xmm0
2519 ; AVX2-LABEL: scmp_bool_operands:
2521 ; AVX2-NEXT: vpextrb $8, %xmm1, %eax
2522 ; AVX2-NEXT: andb $1, %al
2523 ; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
2524 ; AVX2-NEXT: negb %al
2525 ; AVX2-NEXT: andb $1, %cl
2526 ; AVX2-NEXT: negb %cl
2527 ; AVX2-NEXT: cmpb %al, %cl
2528 ; AVX2-NEXT: setl %al
2529 ; AVX2-NEXT: setg %cl
2530 ; AVX2-NEXT: subb %al, %cl
2531 ; AVX2-NEXT: vmovd %xmm1, %eax
2532 ; AVX2-NEXT: andb $1, %al
2533 ; AVX2-NEXT: negb %al
2534 ; AVX2-NEXT: vmovd %xmm0, %edx
2535 ; AVX2-NEXT: andb $1, %dl
2536 ; AVX2-NEXT: negb %dl
2537 ; AVX2-NEXT: cmpb %al, %dl
2538 ; AVX2-NEXT: setl %al
2539 ; AVX2-NEXT: setg %dl
2540 ; AVX2-NEXT: subb %al, %dl
2541 ; AVX2-NEXT: vmovd %edx, %xmm0
2542 ; AVX2-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
2545 ; AVX512-LABEL: scmp_bool_operands:
2547 ; AVX512-NEXT: vpsllq $63, %xmm0, %xmm0
2548 ; AVX512-NEXT: vpmovq2m %xmm0, %k0
2549 ; AVX512-NEXT: kshiftrb $1, %k0, %k1
2550 ; AVX512-NEXT: kmovd %k1, %eax
2551 ; AVX512-NEXT: vpsllq $63, %xmm1, %xmm0
2552 ; AVX512-NEXT: vpmovq2m %xmm0, %k1
2553 ; AVX512-NEXT: kshiftrb $1, %k1, %k2
2554 ; AVX512-NEXT: kmovd %k2, %ecx
2555 ; AVX512-NEXT: andb $1, %cl
2556 ; AVX512-NEXT: negb %cl
2557 ; AVX512-NEXT: andb $1, %al
2558 ; AVX512-NEXT: negb %al
2559 ; AVX512-NEXT: cmpb %cl, %al
2560 ; AVX512-NEXT: setl %al
2561 ; AVX512-NEXT: setg %cl
2562 ; AVX512-NEXT: subb %al, %cl
2563 ; AVX512-NEXT: kmovd %k1, %eax
2564 ; AVX512-NEXT: andb $1, %al
2565 ; AVX512-NEXT: negb %al
2566 ; AVX512-NEXT: kmovd %k0, %edx
2567 ; AVX512-NEXT: andb $1, %dl
2568 ; AVX512-NEXT: negb %dl
2569 ; AVX512-NEXT: cmpb %al, %dl
2570 ; AVX512-NEXT: setl %al
2571 ; AVX512-NEXT: setg %dl
2572 ; AVX512-NEXT: subb %al, %dl
2573 ; AVX512-NEXT: vmovd %edx, %xmm0
2574 ; AVX512-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
2577 ; X86-LABEL: scmp_bool_operands:
2579 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
2580 ; X86-NEXT: andb $1, %cl
2581 ; X86-NEXT: negb %cl
2582 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
2583 ; X86-NEXT: andb $1, %dl
2584 ; X86-NEXT: negb %dl
2585 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2586 ; X86-NEXT: andb $1, %al
2587 ; X86-NEXT: negb %al
2588 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
2589 ; X86-NEXT: andb $1, %ah
2590 ; X86-NEXT: negb %ah
2591 ; X86-NEXT: cmpb %al, %ah
2592 ; X86-NEXT: setl %ah
2593 ; X86-NEXT: setg %al
2594 ; X86-NEXT: subb %ah, %al
2595 ; X86-NEXT: cmpb %cl, %dl
2596 ; X86-NEXT: setl %cl
2597 ; X86-NEXT: setg %dl
2598 ; X86-NEXT: subb %cl, %dl
2600 %1 = call <2 x i8> @llvm.scmp(<2 x i1> %x, <2 x i1> %y)
2604 define <2 x i16> @scmp_ret_wider_than_operands(<2 x i8> %x, <2 x i8> %y) nounwind {
2605 ; SSE2-LABEL: scmp_ret_wider_than_operands:
2607 ; SSE2-NEXT: movd %xmm1, %eax
2608 ; SSE2-NEXT: movl %eax, %ecx
2609 ; SSE2-NEXT: shrl $8, %ecx
2610 ; SSE2-NEXT: movd %xmm0, %edx
2611 ; SSE2-NEXT: movl %edx, %esi
2612 ; SSE2-NEXT: shrl $8, %esi
2613 ; SSE2-NEXT: cmpb %cl, %sil
2614 ; SSE2-NEXT: setl %cl
2615 ; SSE2-NEXT: setg %sil
2616 ; SSE2-NEXT: subb %cl, %sil
2617 ; SSE2-NEXT: movsbl %sil, %ecx
2618 ; SSE2-NEXT: cmpb %al, %dl
2619 ; SSE2-NEXT: setl %al
2620 ; SSE2-NEXT: setg %dl
2621 ; SSE2-NEXT: subb %al, %dl
2622 ; SSE2-NEXT: movsbl %dl, %eax
2623 ; SSE2-NEXT: movd %eax, %xmm0
2624 ; SSE2-NEXT: pinsrw $1, %ecx, %xmm0
2627 ; SSE4-LABEL: scmp_ret_wider_than_operands:
2629 ; SSE4-NEXT: pextrb $1, %xmm1, %eax
2630 ; SSE4-NEXT: pextrb $1, %xmm0, %ecx
2631 ; SSE4-NEXT: cmpb %al, %cl
2632 ; SSE4-NEXT: setl %al
2633 ; SSE4-NEXT: setg %cl
2634 ; SSE4-NEXT: subb %al, %cl
2635 ; SSE4-NEXT: movsbl %cl, %eax
2636 ; SSE4-NEXT: movd %xmm1, %ecx
2637 ; SSE4-NEXT: movd %xmm0, %edx
2638 ; SSE4-NEXT: cmpb %cl, %dl
2639 ; SSE4-NEXT: setl %cl
2640 ; SSE4-NEXT: setg %dl
2641 ; SSE4-NEXT: subb %cl, %dl
2642 ; SSE4-NEXT: movsbl %dl, %ecx
2643 ; SSE4-NEXT: movd %ecx, %xmm0
2644 ; SSE4-NEXT: pinsrw $1, %eax, %xmm0
2647 ; AVX-LABEL: scmp_ret_wider_than_operands:
2649 ; AVX-NEXT: vpextrb $1, %xmm1, %eax
2650 ; AVX-NEXT: vpextrb $1, %xmm0, %ecx
2651 ; AVX-NEXT: cmpb %al, %cl
2652 ; AVX-NEXT: setl %al
2653 ; AVX-NEXT: setg %cl
2654 ; AVX-NEXT: subb %al, %cl
2655 ; AVX-NEXT: movsbl %cl, %eax
2656 ; AVX-NEXT: vmovd %xmm1, %ecx
2657 ; AVX-NEXT: vmovd %xmm0, %edx
2658 ; AVX-NEXT: cmpb %cl, %dl
2659 ; AVX-NEXT: setl %cl
2660 ; AVX-NEXT: setg %dl
2661 ; AVX-NEXT: subb %cl, %dl
2662 ; AVX-NEXT: movsbl %dl, %ecx
2663 ; AVX-NEXT: vmovd %ecx, %xmm0
2664 ; AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
2667 ; X86-LABEL: scmp_ret_wider_than_operands:
2669 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
2670 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2671 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
2672 ; X86-NEXT: setl %al
2673 ; X86-NEXT: setg %dl
2674 ; X86-NEXT: subb %al, %dl
2675 ; X86-NEXT: movsbl %dl, %eax
2676 ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
2677 ; X86-NEXT: setl %cl
2678 ; X86-NEXT: setg %dl
2679 ; X86-NEXT: subb %cl, %dl
2680 ; X86-NEXT: movsbl %dl, %edx
2681 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
2682 ; X86-NEXT: # kill: def $dx killed $dx killed $edx
2684 %1 = call <2 x i16> @llvm.scmp(<2 x i8> %x, <2 x i8> %y)