1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
6 ; This tests codegen time inlining/optimization of memcmp
9 @.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
11 declare dso_local i32 @memcmp(i8*, i8*, i64)
13 define i32 @length2(i8* %X, i8* %Y) nounwind minsize {
18 ; X64-NEXT: jmp memcmp # TAILCALL
19 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
23 define i1 @length2_eq(i8* %X, i8* %Y) nounwind minsize {
24 ; X64-LABEL: length2_eq:
26 ; X64-NEXT: movzwl (%rdi), %eax
27 ; X64-NEXT: cmpw (%rsi), %ax
30 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
31 %c = icmp eq i32 %m, 0
35 define i1 @length2_eq_const(i8* %X) nounwind minsize {
36 ; X64-LABEL: length2_eq_const:
38 ; X64-NEXT: cmpw $12849, (%rdi) # imm = 0x3231
41 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
42 %c = icmp ne i32 %m, 0
46 define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind minsize {
47 ; X64-LABEL: length2_eq_nobuiltin_attr:
49 ; X64-NEXT: pushq %rax
52 ; X64-NEXT: callq memcmp
53 ; X64-NEXT: testl %eax, %eax
57 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind nobuiltin
58 %c = icmp eq i32 %m, 0
62 define i32 @length3(i8* %X, i8* %Y) nounwind minsize {
67 ; X64-NEXT: jmp memcmp # TAILCALL
68 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
72 define i1 @length3_eq(i8* %X, i8* %Y) nounwind minsize {
73 ; X64-LABEL: length3_eq:
75 ; X64-NEXT: pushq %rax
78 ; X64-NEXT: callq memcmp
79 ; X64-NEXT: testl %eax, %eax
83 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
84 %c = icmp ne i32 %m, 0
88 define i32 @length4(i8* %X, i8* %Y) nounwind minsize {
93 ; X64-NEXT: jmp memcmp # TAILCALL
94 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
98 define i1 @length4_eq(i8* %X, i8* %Y) nounwind minsize {
99 ; X64-LABEL: length4_eq:
101 ; X64-NEXT: movl (%rdi), %eax
102 ; X64-NEXT: cmpl (%rsi), %eax
103 ; X64-NEXT: setne %al
105 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
106 %c = icmp ne i32 %m, 0
110 define i1 @length4_eq_const(i8* %X) nounwind minsize {
111 ; X64-LABEL: length4_eq_const:
113 ; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
116 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 4) nounwind
117 %c = icmp eq i32 %m, 0
121 define i32 @length5(i8* %X, i8* %Y) nounwind minsize {
122 ; X64-LABEL: length5:
125 ; X64-NEXT: popq %rdx
126 ; X64-NEXT: jmp memcmp # TAILCALL
127 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
131 define i1 @length5_eq(i8* %X, i8* %Y) nounwind minsize {
132 ; X64-LABEL: length5_eq:
134 ; X64-NEXT: pushq %rax
136 ; X64-NEXT: popq %rdx
137 ; X64-NEXT: callq memcmp
138 ; X64-NEXT: testl %eax, %eax
139 ; X64-NEXT: setne %al
140 ; X64-NEXT: popq %rcx
142 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
143 %c = icmp ne i32 %m, 0
147 define i32 @length8(i8* %X, i8* %Y) nounwind minsize {
148 ; X64-LABEL: length8:
151 ; X64-NEXT: popq %rdx
152 ; X64-NEXT: jmp memcmp # TAILCALL
153 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
157 define i1 @length8_eq(i8* %X, i8* %Y) nounwind minsize {
158 ; X64-LABEL: length8_eq:
160 ; X64-NEXT: movq (%rdi), %rax
161 ; X64-NEXT: cmpq (%rsi), %rax
164 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
165 %c = icmp eq i32 %m, 0
169 define i1 @length8_eq_const(i8* %X) nounwind minsize {
170 ; X64-LABEL: length8_eq_const:
172 ; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
173 ; X64-NEXT: cmpq %rax, (%rdi)
174 ; X64-NEXT: setne %al
176 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 8) nounwind
177 %c = icmp ne i32 %m, 0
181 define i1 @length12_eq(i8* %X, i8* %Y) nounwind minsize {
182 ; X64-LABEL: length12_eq:
184 ; X64-NEXT: pushq %rax
185 ; X64-NEXT: pushq $12
186 ; X64-NEXT: popq %rdx
187 ; X64-NEXT: callq memcmp
188 ; X64-NEXT: testl %eax, %eax
189 ; X64-NEXT: setne %al
190 ; X64-NEXT: popq %rcx
192 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
193 %c = icmp ne i32 %m, 0
197 define i32 @length12(i8* %X, i8* %Y) nounwind minsize {
198 ; X64-LABEL: length12:
200 ; X64-NEXT: pushq $12
201 ; X64-NEXT: popq %rdx
202 ; X64-NEXT: jmp memcmp # TAILCALL
203 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
207 ; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
209 define i32 @length16(i8* %X, i8* %Y) nounwind minsize {
211 ; X64-LABEL: length16:
213 ; X64-NEXT: pushq $16
214 ; X64-NEXT: popq %rdx
215 ; X64-NEXT: jmp memcmp # TAILCALL
216 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
220 define i1 @length16_eq(i8* %x, i8* %y) nounwind minsize {
221 ; X64-SSE2-LABEL: length16_eq:
223 ; X64-SSE2-NEXT: movdqu (%rsi), %xmm0
224 ; X64-SSE2-NEXT: movdqu (%rdi), %xmm1
225 ; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
226 ; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
227 ; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
228 ; X64-SSE2-NEXT: setne %al
229 ; X64-SSE2-NEXT: retq
231 ; X64-AVX-LABEL: length16_eq:
233 ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
234 ; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
235 ; X64-AVX-NEXT: vptest %xmm0, %xmm0
236 ; X64-AVX-NEXT: setne %al
238 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
239 %cmp = icmp ne i32 %call, 0
243 define i1 @length16_eq_const(i8* %X) nounwind minsize {
244 ; X64-SSE2-LABEL: length16_eq_const:
246 ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
247 ; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
248 ; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
249 ; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
250 ; X64-SSE2-NEXT: sete %al
251 ; X64-SSE2-NEXT: retq
253 ; X64-AVX-LABEL: length16_eq_const:
255 ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
256 ; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
257 ; X64-AVX-NEXT: vptest %xmm0, %xmm0
258 ; X64-AVX-NEXT: sete %al
260 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
261 %c = icmp eq i32 %m, 0
265 ; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
267 define i32 @length24(i8* %X, i8* %Y) nounwind minsize {
268 ; X64-LABEL: length24:
270 ; X64-NEXT: pushq $24
271 ; X64-NEXT: popq %rdx
272 ; X64-NEXT: jmp memcmp # TAILCALL
273 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
277 define i1 @length24_eq(i8* %x, i8* %y) nounwind minsize {
278 ; X64-LABEL: length24_eq:
280 ; X64-NEXT: pushq %rax
281 ; X64-NEXT: pushq $24
282 ; X64-NEXT: popq %rdx
283 ; X64-NEXT: callq memcmp
284 ; X64-NEXT: testl %eax, %eax
286 ; X64-NEXT: popq %rcx
288 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
289 %cmp = icmp eq i32 %call, 0
293 define i1 @length24_eq_const(i8* %X) nounwind minsize {
294 ; X64-LABEL: length24_eq_const:
296 ; X64-NEXT: pushq %rax
297 ; X64-NEXT: pushq $24
298 ; X64-NEXT: popq %rdx
299 ; X64-NEXT: movl $.L.str, %esi
300 ; X64-NEXT: callq memcmp
301 ; X64-NEXT: testl %eax, %eax
302 ; X64-NEXT: setne %al
303 ; X64-NEXT: popq %rcx
305 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
306 %c = icmp ne i32 %m, 0
310 define i32 @length32(i8* %X, i8* %Y) nounwind minsize {
311 ; X64-LABEL: length32:
313 ; X64-NEXT: pushq $32
314 ; X64-NEXT: popq %rdx
315 ; X64-NEXT: jmp memcmp # TAILCALL
316 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
320 ; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
322 define i1 @length32_eq(i8* %x, i8* %y) nounwind minsize {
323 ; X64-SSE2-LABEL: length32_eq:
325 ; X64-SSE2-NEXT: pushq %rax
326 ; X64-SSE2-NEXT: pushq $32
327 ; X64-SSE2-NEXT: popq %rdx
328 ; X64-SSE2-NEXT: callq memcmp
329 ; X64-SSE2-NEXT: testl %eax, %eax
330 ; X64-SSE2-NEXT: sete %al
331 ; X64-SSE2-NEXT: popq %rcx
332 ; X64-SSE2-NEXT: retq
334 ; X64-AVX1-LABEL: length32_eq:
336 ; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
337 ; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
338 ; X64-AVX1-NEXT: vptest %ymm0, %ymm0
339 ; X64-AVX1-NEXT: sete %al
340 ; X64-AVX1-NEXT: vzeroupper
341 ; X64-AVX1-NEXT: retq
343 ; X64-AVX2-LABEL: length32_eq:
345 ; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
346 ; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
347 ; X64-AVX2-NEXT: vptest %ymm0, %ymm0
348 ; X64-AVX2-NEXT: sete %al
349 ; X64-AVX2-NEXT: vzeroupper
350 ; X64-AVX2-NEXT: retq
351 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
352 %cmp = icmp eq i32 %call, 0
356 define i1 @length32_eq_const(i8* %X) nounwind minsize {
357 ; X64-SSE2-LABEL: length32_eq_const:
359 ; X64-SSE2-NEXT: pushq %rax
360 ; X64-SSE2-NEXT: pushq $32
361 ; X64-SSE2-NEXT: popq %rdx
362 ; X64-SSE2-NEXT: movl $.L.str, %esi
363 ; X64-SSE2-NEXT: callq memcmp
364 ; X64-SSE2-NEXT: testl %eax, %eax
365 ; X64-SSE2-NEXT: setne %al
366 ; X64-SSE2-NEXT: popq %rcx
367 ; X64-SSE2-NEXT: retq
369 ; X64-AVX1-LABEL: length32_eq_const:
371 ; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
372 ; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
373 ; X64-AVX1-NEXT: vptest %ymm0, %ymm0
374 ; X64-AVX1-NEXT: setne %al
375 ; X64-AVX1-NEXT: vzeroupper
376 ; X64-AVX1-NEXT: retq
378 ; X64-AVX2-LABEL: length32_eq_const:
380 ; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
381 ; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
382 ; X64-AVX2-NEXT: vptest %ymm0, %ymm0
383 ; X64-AVX2-NEXT: setne %al
384 ; X64-AVX2-NEXT: vzeroupper
385 ; X64-AVX2-NEXT: retq
386 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
387 %c = icmp ne i32 %m, 0
391 define i32 @length64(i8* %X, i8* %Y) nounwind minsize {
392 ; X64-LABEL: length64:
394 ; X64-NEXT: pushq $64
395 ; X64-NEXT: popq %rdx
396 ; X64-NEXT: jmp memcmp # TAILCALL
397 %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
401 define i1 @length64_eq(i8* %x, i8* %y) nounwind minsize {
402 ; X64-LABEL: length64_eq:
404 ; X64-NEXT: pushq %rax
405 ; X64-NEXT: pushq $64
406 ; X64-NEXT: popq %rdx
407 ; X64-NEXT: callq memcmp
408 ; X64-NEXT: testl %eax, %eax
409 ; X64-NEXT: setne %al
410 ; X64-NEXT: popq %rcx
412 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
413 %cmp = icmp ne i32 %call, 0
417 define i1 @length64_eq_const(i8* %X) nounwind minsize {
418 ; X64-LABEL: length64_eq_const:
420 ; X64-NEXT: pushq %rax
421 ; X64-NEXT: pushq $64
422 ; X64-NEXT: popq %rdx
423 ; X64-NEXT: movl $.L.str, %esi
424 ; X64-NEXT: callq memcmp
425 ; X64-NEXT: testl %eax, %eax
427 ; X64-NEXT: popq %rcx
429 %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
430 %c = icmp eq i32 %m, 0