1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-unknown"
7 ; Stack reload folding tests.
9 ; By including a nop call with sideeffects we can force a partial register spill of the
10 ; relevant registers and check that the reload is correctly folded into the instruction.
12 define i32 @stack_fold_andn_u32(i32 %a0, i32 %a1) {
13 ; CHECK-LABEL: stack_fold_andn_u32:
15 ; CHECK-NEXT: pushq %rbp
16 ; CHECK-NEXT: .cfi_def_cfa_offset 16
17 ; CHECK-NEXT: pushq %r15
18 ; CHECK-NEXT: .cfi_def_cfa_offset 24
19 ; CHECK-NEXT: pushq %r14
20 ; CHECK-NEXT: .cfi_def_cfa_offset 32
21 ; CHECK-NEXT: pushq %r13
22 ; CHECK-NEXT: .cfi_def_cfa_offset 40
23 ; CHECK-NEXT: pushq %r12
24 ; CHECK-NEXT: .cfi_def_cfa_offset 48
25 ; CHECK-NEXT: pushq %rbx
26 ; CHECK-NEXT: .cfi_def_cfa_offset 56
27 ; CHECK-NEXT: .cfi_offset %rbx, -56
28 ; CHECK-NEXT: .cfi_offset %r12, -48
29 ; CHECK-NEXT: .cfi_offset %r13, -40
30 ; CHECK-NEXT: .cfi_offset %r14, -32
31 ; CHECK-NEXT: .cfi_offset %r15, -24
32 ; CHECK-NEXT: .cfi_offset %rbp, -16
33 ; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
34 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
38 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
39 ; CHECK-NEXT: andnl {{[-0-9]+}}(%r{{[sb]}}p), %eax, %eax # 4-byte Folded Reload
40 ; CHECK-NEXT: popq %rbx
41 ; CHECK-NEXT: .cfi_def_cfa_offset 48
42 ; CHECK-NEXT: popq %r12
43 ; CHECK-NEXT: .cfi_def_cfa_offset 40
44 ; CHECK-NEXT: popq %r13
45 ; CHECK-NEXT: .cfi_def_cfa_offset 32
46 ; CHECK-NEXT: popq %r14
47 ; CHECK-NEXT: .cfi_def_cfa_offset 24
48 ; CHECK-NEXT: popq %r15
49 ; CHECK-NEXT: .cfi_def_cfa_offset 16
50 ; CHECK-NEXT: popq %rbp
51 ; CHECK-NEXT: .cfi_def_cfa_offset 8
53 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
59 define i64 @stack_fold_andn_u64(i64 %a0, i64 %a1) {
60 ; CHECK-LABEL: stack_fold_andn_u64:
62 ; CHECK-NEXT: pushq %rbp
63 ; CHECK-NEXT: .cfi_def_cfa_offset 16
64 ; CHECK-NEXT: pushq %r15
65 ; CHECK-NEXT: .cfi_def_cfa_offset 24
66 ; CHECK-NEXT: pushq %r14
67 ; CHECK-NEXT: .cfi_def_cfa_offset 32
68 ; CHECK-NEXT: pushq %r13
69 ; CHECK-NEXT: .cfi_def_cfa_offset 40
70 ; CHECK-NEXT: pushq %r12
71 ; CHECK-NEXT: .cfi_def_cfa_offset 48
72 ; CHECK-NEXT: pushq %rbx
73 ; CHECK-NEXT: .cfi_def_cfa_offset 56
74 ; CHECK-NEXT: .cfi_offset %rbx, -56
75 ; CHECK-NEXT: .cfi_offset %r12, -48
76 ; CHECK-NEXT: .cfi_offset %r13, -40
77 ; CHECK-NEXT: .cfi_offset %r14, -32
78 ; CHECK-NEXT: .cfi_offset %r15, -24
79 ; CHECK-NEXT: .cfi_offset %rbp, -16
80 ; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
81 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
85 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
86 ; CHECK-NEXT: andnq {{[-0-9]+}}(%r{{[sb]}}p), %rax, %rax # 8-byte Folded Reload
87 ; CHECK-NEXT: popq %rbx
88 ; CHECK-NEXT: .cfi_def_cfa_offset 48
89 ; CHECK-NEXT: popq %r12
90 ; CHECK-NEXT: .cfi_def_cfa_offset 40
91 ; CHECK-NEXT: popq %r13
92 ; CHECK-NEXT: .cfi_def_cfa_offset 32
93 ; CHECK-NEXT: popq %r14
94 ; CHECK-NEXT: .cfi_def_cfa_offset 24
95 ; CHECK-NEXT: popq %r15
96 ; CHECK-NEXT: .cfi_def_cfa_offset 16
97 ; CHECK-NEXT: popq %rbp
98 ; CHECK-NEXT: .cfi_def_cfa_offset 8
100 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
106 define i32 @stack_fold_bextr_u32(i32 %a0, i32 %a1) {
107 ; CHECK-LABEL: stack_fold_bextr_u32:
109 ; CHECK-NEXT: pushq %rbp
110 ; CHECK-NEXT: .cfi_def_cfa_offset 16
111 ; CHECK-NEXT: pushq %r15
112 ; CHECK-NEXT: .cfi_def_cfa_offset 24
113 ; CHECK-NEXT: pushq %r14
114 ; CHECK-NEXT: .cfi_def_cfa_offset 32
115 ; CHECK-NEXT: pushq %r13
116 ; CHECK-NEXT: .cfi_def_cfa_offset 40
117 ; CHECK-NEXT: pushq %r12
118 ; CHECK-NEXT: .cfi_def_cfa_offset 48
119 ; CHECK-NEXT: pushq %rbx
120 ; CHECK-NEXT: .cfi_def_cfa_offset 56
121 ; CHECK-NEXT: .cfi_offset %rbx, -56
122 ; CHECK-NEXT: .cfi_offset %r12, -48
123 ; CHECK-NEXT: .cfi_offset %r13, -40
124 ; CHECK-NEXT: .cfi_offset %r14, -32
125 ; CHECK-NEXT: .cfi_offset %r15, -24
126 ; CHECK-NEXT: .cfi_offset %rbp, -16
127 ; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
128 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
131 ; CHECK-NEXT: #NO_APP
132 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
133 ; CHECK-NEXT: bextrl %eax, {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
134 ; CHECK-NEXT: popq %rbx
135 ; CHECK-NEXT: .cfi_def_cfa_offset 48
136 ; CHECK-NEXT: popq %r12
137 ; CHECK-NEXT: .cfi_def_cfa_offset 40
138 ; CHECK-NEXT: popq %r13
139 ; CHECK-NEXT: .cfi_def_cfa_offset 32
140 ; CHECK-NEXT: popq %r14
141 ; CHECK-NEXT: .cfi_def_cfa_offset 24
142 ; CHECK-NEXT: popq %r15
143 ; CHECK-NEXT: .cfi_def_cfa_offset 16
144 ; CHECK-NEXT: popq %rbp
145 ; CHECK-NEXT: .cfi_def_cfa_offset 8
147 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
148 %2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %a1)
151 declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
153 define i64 @stack_fold_bextr_u64(i64 %a0, i64 %a1) {
154 ; CHECK-LABEL: stack_fold_bextr_u64:
156 ; CHECK-NEXT: pushq %rbp
157 ; CHECK-NEXT: .cfi_def_cfa_offset 16
158 ; CHECK-NEXT: pushq %r15
159 ; CHECK-NEXT: .cfi_def_cfa_offset 24
160 ; CHECK-NEXT: pushq %r14
161 ; CHECK-NEXT: .cfi_def_cfa_offset 32
162 ; CHECK-NEXT: pushq %r13
163 ; CHECK-NEXT: .cfi_def_cfa_offset 40
164 ; CHECK-NEXT: pushq %r12
165 ; CHECK-NEXT: .cfi_def_cfa_offset 48
166 ; CHECK-NEXT: pushq %rbx
167 ; CHECK-NEXT: .cfi_def_cfa_offset 56
168 ; CHECK-NEXT: .cfi_offset %rbx, -56
169 ; CHECK-NEXT: .cfi_offset %r12, -48
170 ; CHECK-NEXT: .cfi_offset %r13, -40
171 ; CHECK-NEXT: .cfi_offset %r14, -32
172 ; CHECK-NEXT: .cfi_offset %r15, -24
173 ; CHECK-NEXT: .cfi_offset %rbp, -16
174 ; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
175 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
178 ; CHECK-NEXT: #NO_APP
179 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
180 ; CHECK-NEXT: bextrq %rax, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
181 ; CHECK-NEXT: popq %rbx
182 ; CHECK-NEXT: .cfi_def_cfa_offset 48
183 ; CHECK-NEXT: popq %r12
184 ; CHECK-NEXT: .cfi_def_cfa_offset 40
185 ; CHECK-NEXT: popq %r13
186 ; CHECK-NEXT: .cfi_def_cfa_offset 32
187 ; CHECK-NEXT: popq %r14
188 ; CHECK-NEXT: .cfi_def_cfa_offset 24
189 ; CHECK-NEXT: popq %r15
190 ; CHECK-NEXT: .cfi_def_cfa_offset 16
191 ; CHECK-NEXT: popq %rbp
192 ; CHECK-NEXT: .cfi_def_cfa_offset 8
194 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
195 %2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a0, i64 %a1)
198 declare i64 @llvm.x86.bmi.bextr.64(i64, i64)
200 define i32 @stack_fold_blsi_u32(i32 %a0) {
201 ; CHECK-LABEL: stack_fold_blsi_u32:
203 ; CHECK-NEXT: pushq %rbp
204 ; CHECK-NEXT: .cfi_def_cfa_offset 16
205 ; CHECK-NEXT: pushq %r15
206 ; CHECK-NEXT: .cfi_def_cfa_offset 24
207 ; CHECK-NEXT: pushq %r14
208 ; CHECK-NEXT: .cfi_def_cfa_offset 32
209 ; CHECK-NEXT: pushq %r13
210 ; CHECK-NEXT: .cfi_def_cfa_offset 40
211 ; CHECK-NEXT: pushq %r12
212 ; CHECK-NEXT: .cfi_def_cfa_offset 48
213 ; CHECK-NEXT: pushq %rbx
214 ; CHECK-NEXT: .cfi_def_cfa_offset 56
215 ; CHECK-NEXT: .cfi_offset %rbx, -56
216 ; CHECK-NEXT: .cfi_offset %r12, -48
217 ; CHECK-NEXT: .cfi_offset %r13, -40
218 ; CHECK-NEXT: .cfi_offset %r14, -32
219 ; CHECK-NEXT: .cfi_offset %r15, -24
220 ; CHECK-NEXT: .cfi_offset %rbp, -16
221 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
224 ; CHECK-NEXT: #NO_APP
225 ; CHECK-NEXT: blsil {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
226 ; CHECK-NEXT: popq %rbx
227 ; CHECK-NEXT: .cfi_def_cfa_offset 48
228 ; CHECK-NEXT: popq %r12
229 ; CHECK-NEXT: .cfi_def_cfa_offset 40
230 ; CHECK-NEXT: popq %r13
231 ; CHECK-NEXT: .cfi_def_cfa_offset 32
232 ; CHECK-NEXT: popq %r14
233 ; CHECK-NEXT: .cfi_def_cfa_offset 24
234 ; CHECK-NEXT: popq %r15
235 ; CHECK-NEXT: .cfi_def_cfa_offset 16
236 ; CHECK-NEXT: popq %rbp
237 ; CHECK-NEXT: .cfi_def_cfa_offset 8
239 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
245 define i64 @stack_fold_blsi_u64(i64 %a0) {
246 ; CHECK-LABEL: stack_fold_blsi_u64:
248 ; CHECK-NEXT: pushq %rbp
249 ; CHECK-NEXT: .cfi_def_cfa_offset 16
250 ; CHECK-NEXT: pushq %r15
251 ; CHECK-NEXT: .cfi_def_cfa_offset 24
252 ; CHECK-NEXT: pushq %r14
253 ; CHECK-NEXT: .cfi_def_cfa_offset 32
254 ; CHECK-NEXT: pushq %r13
255 ; CHECK-NEXT: .cfi_def_cfa_offset 40
256 ; CHECK-NEXT: pushq %r12
257 ; CHECK-NEXT: .cfi_def_cfa_offset 48
258 ; CHECK-NEXT: pushq %rbx
259 ; CHECK-NEXT: .cfi_def_cfa_offset 56
260 ; CHECK-NEXT: .cfi_offset %rbx, -56
261 ; CHECK-NEXT: .cfi_offset %r12, -48
262 ; CHECK-NEXT: .cfi_offset %r13, -40
263 ; CHECK-NEXT: .cfi_offset %r14, -32
264 ; CHECK-NEXT: .cfi_offset %r15, -24
265 ; CHECK-NEXT: .cfi_offset %rbp, -16
266 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
269 ; CHECK-NEXT: #NO_APP
270 ; CHECK-NEXT: blsiq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
271 ; CHECK-NEXT: popq %rbx
272 ; CHECK-NEXT: .cfi_def_cfa_offset 48
273 ; CHECK-NEXT: popq %r12
274 ; CHECK-NEXT: .cfi_def_cfa_offset 40
275 ; CHECK-NEXT: popq %r13
276 ; CHECK-NEXT: .cfi_def_cfa_offset 32
277 ; CHECK-NEXT: popq %r14
278 ; CHECK-NEXT: .cfi_def_cfa_offset 24
279 ; CHECK-NEXT: popq %r15
280 ; CHECK-NEXT: .cfi_def_cfa_offset 16
281 ; CHECK-NEXT: popq %rbp
282 ; CHECK-NEXT: .cfi_def_cfa_offset 8
284 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
290 define i32 @stack_fold_blsmsk_u32(i32 %a0) {
291 ; CHECK-LABEL: stack_fold_blsmsk_u32:
293 ; CHECK-NEXT: pushq %rbp
294 ; CHECK-NEXT: .cfi_def_cfa_offset 16
295 ; CHECK-NEXT: pushq %r15
296 ; CHECK-NEXT: .cfi_def_cfa_offset 24
297 ; CHECK-NEXT: pushq %r14
298 ; CHECK-NEXT: .cfi_def_cfa_offset 32
299 ; CHECK-NEXT: pushq %r13
300 ; CHECK-NEXT: .cfi_def_cfa_offset 40
301 ; CHECK-NEXT: pushq %r12
302 ; CHECK-NEXT: .cfi_def_cfa_offset 48
303 ; CHECK-NEXT: pushq %rbx
304 ; CHECK-NEXT: .cfi_def_cfa_offset 56
305 ; CHECK-NEXT: .cfi_offset %rbx, -56
306 ; CHECK-NEXT: .cfi_offset %r12, -48
307 ; CHECK-NEXT: .cfi_offset %r13, -40
308 ; CHECK-NEXT: .cfi_offset %r14, -32
309 ; CHECK-NEXT: .cfi_offset %r15, -24
310 ; CHECK-NEXT: .cfi_offset %rbp, -16
311 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
314 ; CHECK-NEXT: #NO_APP
315 ; CHECK-NEXT: blsmskl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
316 ; CHECK-NEXT: popq %rbx
317 ; CHECK-NEXT: .cfi_def_cfa_offset 48
318 ; CHECK-NEXT: popq %r12
319 ; CHECK-NEXT: .cfi_def_cfa_offset 40
320 ; CHECK-NEXT: popq %r13
321 ; CHECK-NEXT: .cfi_def_cfa_offset 32
322 ; CHECK-NEXT: popq %r14
323 ; CHECK-NEXT: .cfi_def_cfa_offset 24
324 ; CHECK-NEXT: popq %r15
325 ; CHECK-NEXT: .cfi_def_cfa_offset 16
326 ; CHECK-NEXT: popq %rbp
327 ; CHECK-NEXT: .cfi_def_cfa_offset 8
329 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
335 define i64 @stack_fold_blsmsk_u64(i64 %a0) {
336 ; CHECK-LABEL: stack_fold_blsmsk_u64:
338 ; CHECK-NEXT: pushq %rbp
339 ; CHECK-NEXT: .cfi_def_cfa_offset 16
340 ; CHECK-NEXT: pushq %r15
341 ; CHECK-NEXT: .cfi_def_cfa_offset 24
342 ; CHECK-NEXT: pushq %r14
343 ; CHECK-NEXT: .cfi_def_cfa_offset 32
344 ; CHECK-NEXT: pushq %r13
345 ; CHECK-NEXT: .cfi_def_cfa_offset 40
346 ; CHECK-NEXT: pushq %r12
347 ; CHECK-NEXT: .cfi_def_cfa_offset 48
348 ; CHECK-NEXT: pushq %rbx
349 ; CHECK-NEXT: .cfi_def_cfa_offset 56
350 ; CHECK-NEXT: .cfi_offset %rbx, -56
351 ; CHECK-NEXT: .cfi_offset %r12, -48
352 ; CHECK-NEXT: .cfi_offset %r13, -40
353 ; CHECK-NEXT: .cfi_offset %r14, -32
354 ; CHECK-NEXT: .cfi_offset %r15, -24
355 ; CHECK-NEXT: .cfi_offset %rbp, -16
356 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
359 ; CHECK-NEXT: #NO_APP
360 ; CHECK-NEXT: blsmskq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
361 ; CHECK-NEXT: popq %rbx
362 ; CHECK-NEXT: .cfi_def_cfa_offset 48
363 ; CHECK-NEXT: popq %r12
364 ; CHECK-NEXT: .cfi_def_cfa_offset 40
365 ; CHECK-NEXT: popq %r13
366 ; CHECK-NEXT: .cfi_def_cfa_offset 32
367 ; CHECK-NEXT: popq %r14
368 ; CHECK-NEXT: .cfi_def_cfa_offset 24
369 ; CHECK-NEXT: popq %r15
370 ; CHECK-NEXT: .cfi_def_cfa_offset 16
371 ; CHECK-NEXT: popq %rbp
372 ; CHECK-NEXT: .cfi_def_cfa_offset 8
374 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
380 define i32 @stack_fold_blsr_u32(i32 %a0) {
381 ; CHECK-LABEL: stack_fold_blsr_u32:
383 ; CHECK-NEXT: pushq %rbp
384 ; CHECK-NEXT: .cfi_def_cfa_offset 16
385 ; CHECK-NEXT: pushq %r15
386 ; CHECK-NEXT: .cfi_def_cfa_offset 24
387 ; CHECK-NEXT: pushq %r14
388 ; CHECK-NEXT: .cfi_def_cfa_offset 32
389 ; CHECK-NEXT: pushq %r13
390 ; CHECK-NEXT: .cfi_def_cfa_offset 40
391 ; CHECK-NEXT: pushq %r12
392 ; CHECK-NEXT: .cfi_def_cfa_offset 48
393 ; CHECK-NEXT: pushq %rbx
394 ; CHECK-NEXT: .cfi_def_cfa_offset 56
395 ; CHECK-NEXT: .cfi_offset %rbx, -56
396 ; CHECK-NEXT: .cfi_offset %r12, -48
397 ; CHECK-NEXT: .cfi_offset %r13, -40
398 ; CHECK-NEXT: .cfi_offset %r14, -32
399 ; CHECK-NEXT: .cfi_offset %r15, -24
400 ; CHECK-NEXT: .cfi_offset %rbp, -16
401 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
404 ; CHECK-NEXT: #NO_APP
405 ; CHECK-NEXT: blsrl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
406 ; CHECK-NEXT: popq %rbx
407 ; CHECK-NEXT: .cfi_def_cfa_offset 48
408 ; CHECK-NEXT: popq %r12
409 ; CHECK-NEXT: .cfi_def_cfa_offset 40
410 ; CHECK-NEXT: popq %r13
411 ; CHECK-NEXT: .cfi_def_cfa_offset 32
412 ; CHECK-NEXT: popq %r14
413 ; CHECK-NEXT: .cfi_def_cfa_offset 24
414 ; CHECK-NEXT: popq %r15
415 ; CHECK-NEXT: .cfi_def_cfa_offset 16
416 ; CHECK-NEXT: popq %rbp
417 ; CHECK-NEXT: .cfi_def_cfa_offset 8
419 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
425 define i64 @stack_fold_blsr_u64(i64 %a0) {
426 ; CHECK-LABEL: stack_fold_blsr_u64:
428 ; CHECK-NEXT: pushq %rbp
429 ; CHECK-NEXT: .cfi_def_cfa_offset 16
430 ; CHECK-NEXT: pushq %r15
431 ; CHECK-NEXT: .cfi_def_cfa_offset 24
432 ; CHECK-NEXT: pushq %r14
433 ; CHECK-NEXT: .cfi_def_cfa_offset 32
434 ; CHECK-NEXT: pushq %r13
435 ; CHECK-NEXT: .cfi_def_cfa_offset 40
436 ; CHECK-NEXT: pushq %r12
437 ; CHECK-NEXT: .cfi_def_cfa_offset 48
438 ; CHECK-NEXT: pushq %rbx
439 ; CHECK-NEXT: .cfi_def_cfa_offset 56
440 ; CHECK-NEXT: .cfi_offset %rbx, -56
441 ; CHECK-NEXT: .cfi_offset %r12, -48
442 ; CHECK-NEXT: .cfi_offset %r13, -40
443 ; CHECK-NEXT: .cfi_offset %r14, -32
444 ; CHECK-NEXT: .cfi_offset %r15, -24
445 ; CHECK-NEXT: .cfi_offset %rbp, -16
446 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
449 ; CHECK-NEXT: #NO_APP
450 ; CHECK-NEXT: blsrq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
451 ; CHECK-NEXT: popq %rbx
452 ; CHECK-NEXT: .cfi_def_cfa_offset 48
453 ; CHECK-NEXT: popq %r12
454 ; CHECK-NEXT: .cfi_def_cfa_offset 40
455 ; CHECK-NEXT: popq %r13
456 ; CHECK-NEXT: .cfi_def_cfa_offset 32
457 ; CHECK-NEXT: popq %r14
458 ; CHECK-NEXT: .cfi_def_cfa_offset 24
459 ; CHECK-NEXT: popq %r15
460 ; CHECK-NEXT: .cfi_def_cfa_offset 16
461 ; CHECK-NEXT: popq %rbp
462 ; CHECK-NEXT: .cfi_def_cfa_offset 8
464 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
470 ;TODO stack_fold_tzcnt_u16
472 define i32 @stack_fold_tzcnt_u32(i32 %a0) {
473 ; CHECK-LABEL: stack_fold_tzcnt_u32:
475 ; CHECK-NEXT: pushq %rbp
476 ; CHECK-NEXT: .cfi_def_cfa_offset 16
477 ; CHECK-NEXT: pushq %r15
478 ; CHECK-NEXT: .cfi_def_cfa_offset 24
479 ; CHECK-NEXT: pushq %r14
480 ; CHECK-NEXT: .cfi_def_cfa_offset 32
481 ; CHECK-NEXT: pushq %r13
482 ; CHECK-NEXT: .cfi_def_cfa_offset 40
483 ; CHECK-NEXT: pushq %r12
484 ; CHECK-NEXT: .cfi_def_cfa_offset 48
485 ; CHECK-NEXT: pushq %rbx
486 ; CHECK-NEXT: .cfi_def_cfa_offset 56
487 ; CHECK-NEXT: .cfi_offset %rbx, -56
488 ; CHECK-NEXT: .cfi_offset %r12, -48
489 ; CHECK-NEXT: .cfi_offset %r13, -40
490 ; CHECK-NEXT: .cfi_offset %r14, -32
491 ; CHECK-NEXT: .cfi_offset %r15, -24
492 ; CHECK-NEXT: .cfi_offset %rbp, -16
493 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
496 ; CHECK-NEXT: #NO_APP
497 ; CHECK-NEXT: tzcntl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
498 ; CHECK-NEXT: popq %rbx
499 ; CHECK-NEXT: .cfi_def_cfa_offset 48
500 ; CHECK-NEXT: popq %r12
501 ; CHECK-NEXT: .cfi_def_cfa_offset 40
502 ; CHECK-NEXT: popq %r13
503 ; CHECK-NEXT: .cfi_def_cfa_offset 32
504 ; CHECK-NEXT: popq %r14
505 ; CHECK-NEXT: .cfi_def_cfa_offset 24
506 ; CHECK-NEXT: popq %r15
507 ; CHECK-NEXT: .cfi_def_cfa_offset 16
508 ; CHECK-NEXT: popq %rbp
509 ; CHECK-NEXT: .cfi_def_cfa_offset 8
511 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
512 %2 = tail call i32 @llvm.cttz.i32(i32 %a0, i1 0)
515 declare i32 @llvm.cttz.i32(i32, i1)
517 define i64 @stack_fold_tzcnt_u64(i64 %a0) {
518 ; CHECK-LABEL: stack_fold_tzcnt_u64:
520 ; CHECK-NEXT: pushq %rbp
521 ; CHECK-NEXT: .cfi_def_cfa_offset 16
522 ; CHECK-NEXT: pushq %r15
523 ; CHECK-NEXT: .cfi_def_cfa_offset 24
524 ; CHECK-NEXT: pushq %r14
525 ; CHECK-NEXT: .cfi_def_cfa_offset 32
526 ; CHECK-NEXT: pushq %r13
527 ; CHECK-NEXT: .cfi_def_cfa_offset 40
528 ; CHECK-NEXT: pushq %r12
529 ; CHECK-NEXT: .cfi_def_cfa_offset 48
530 ; CHECK-NEXT: pushq %rbx
531 ; CHECK-NEXT: .cfi_def_cfa_offset 56
532 ; CHECK-NEXT: .cfi_offset %rbx, -56
533 ; CHECK-NEXT: .cfi_offset %r12, -48
534 ; CHECK-NEXT: .cfi_offset %r13, -40
535 ; CHECK-NEXT: .cfi_offset %r14, -32
536 ; CHECK-NEXT: .cfi_offset %r15, -24
537 ; CHECK-NEXT: .cfi_offset %rbp, -16
538 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
541 ; CHECK-NEXT: #NO_APP
542 ; CHECK-NEXT: tzcntq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
543 ; CHECK-NEXT: popq %rbx
544 ; CHECK-NEXT: .cfi_def_cfa_offset 48
545 ; CHECK-NEXT: popq %r12
546 ; CHECK-NEXT: .cfi_def_cfa_offset 40
547 ; CHECK-NEXT: popq %r13
548 ; CHECK-NEXT: .cfi_def_cfa_offset 32
549 ; CHECK-NEXT: popq %r14
550 ; CHECK-NEXT: .cfi_def_cfa_offset 24
551 ; CHECK-NEXT: popq %r15
552 ; CHECK-NEXT: .cfi_def_cfa_offset 16
553 ; CHECK-NEXT: popq %rbp
554 ; CHECK-NEXT: .cfi_def_cfa_offset 8
556 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
557 %2 = tail call i64 @llvm.cttz.i64(i64 %a0, i1 0)
560 declare i64 @llvm.cttz.i64(i64, i1)