1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi,+tbm < %s | FileCheck %s
4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-unknown"
7 ; Stack reload folding tests.
9 ; By including a nop call with sideeffects we can force a partial register spill of the
10 ; relevant registers and check that the reload is correctly folded into the instruction.
12 define i32 @stack_fold_bextri_u32(i32 %a0) {
13 ; CHECK-LABEL: stack_fold_bextri_u32:
15 ; CHECK-NEXT: pushq %rbp
16 ; CHECK-NEXT: .cfi_def_cfa_offset 16
17 ; CHECK-NEXT: pushq %r15
18 ; CHECK-NEXT: .cfi_def_cfa_offset 24
19 ; CHECK-NEXT: pushq %r14
20 ; CHECK-NEXT: .cfi_def_cfa_offset 32
21 ; CHECK-NEXT: pushq %r13
22 ; CHECK-NEXT: .cfi_def_cfa_offset 40
23 ; CHECK-NEXT: pushq %r12
24 ; CHECK-NEXT: .cfi_def_cfa_offset 48
25 ; CHECK-NEXT: pushq %rbx
26 ; CHECK-NEXT: .cfi_def_cfa_offset 56
27 ; CHECK-NEXT: .cfi_offset %rbx, -56
28 ; CHECK-NEXT: .cfi_offset %r12, -48
29 ; CHECK-NEXT: .cfi_offset %r13, -40
30 ; CHECK-NEXT: .cfi_offset %r14, -32
31 ; CHECK-NEXT: .cfi_offset %r15, -24
32 ; CHECK-NEXT: .cfi_offset %rbp, -16
33 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
37 ; CHECK-NEXT: bextrl $3841, {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
38 ; CHECK-NEXT: # imm = 0xF01
39 ; CHECK-NEXT: popq %rbx
40 ; CHECK-NEXT: .cfi_def_cfa_offset 48
41 ; CHECK-NEXT: popq %r12
42 ; CHECK-NEXT: .cfi_def_cfa_offset 40
43 ; CHECK-NEXT: popq %r13
44 ; CHECK-NEXT: .cfi_def_cfa_offset 32
45 ; CHECK-NEXT: popq %r14
46 ; CHECK-NEXT: .cfi_def_cfa_offset 24
47 ; CHECK-NEXT: popq %r15
48 ; CHECK-NEXT: .cfi_def_cfa_offset 16
49 ; CHECK-NEXT: popq %rbp
50 ; CHECK-NEXT: .cfi_def_cfa_offset 8
52 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
53 %2 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a0, i32 3841)
56 declare i32 @llvm.x86.tbm.bextri.u32(i32, i32)
58 define i64 @stack_fold_bextri_u64(i64 %a0) {
59 ; CHECK-LABEL: stack_fold_bextri_u64:
61 ; CHECK-NEXT: pushq %rbp
62 ; CHECK-NEXT: .cfi_def_cfa_offset 16
63 ; CHECK-NEXT: pushq %r15
64 ; CHECK-NEXT: .cfi_def_cfa_offset 24
65 ; CHECK-NEXT: pushq %r14
66 ; CHECK-NEXT: .cfi_def_cfa_offset 32
67 ; CHECK-NEXT: pushq %r13
68 ; CHECK-NEXT: .cfi_def_cfa_offset 40
69 ; CHECK-NEXT: pushq %r12
70 ; CHECK-NEXT: .cfi_def_cfa_offset 48
71 ; CHECK-NEXT: pushq %rbx
72 ; CHECK-NEXT: .cfi_def_cfa_offset 56
73 ; CHECK-NEXT: .cfi_offset %rbx, -56
74 ; CHECK-NEXT: .cfi_offset %r12, -48
75 ; CHECK-NEXT: .cfi_offset %r13, -40
76 ; CHECK-NEXT: .cfi_offset %r14, -32
77 ; CHECK-NEXT: .cfi_offset %r15, -24
78 ; CHECK-NEXT: .cfi_offset %rbp, -16
79 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
83 ; CHECK-NEXT: bextrq $3841, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
84 ; CHECK-NEXT: # imm = 0xF01
85 ; CHECK-NEXT: popq %rbx
86 ; CHECK-NEXT: .cfi_def_cfa_offset 48
87 ; CHECK-NEXT: popq %r12
88 ; CHECK-NEXT: .cfi_def_cfa_offset 40
89 ; CHECK-NEXT: popq %r13
90 ; CHECK-NEXT: .cfi_def_cfa_offset 32
91 ; CHECK-NEXT: popq %r14
92 ; CHECK-NEXT: .cfi_def_cfa_offset 24
93 ; CHECK-NEXT: popq %r15
94 ; CHECK-NEXT: .cfi_def_cfa_offset 16
95 ; CHECK-NEXT: popq %rbp
96 ; CHECK-NEXT: .cfi_def_cfa_offset 8
98 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
99 %2 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a0, i64 3841)
102 declare i64 @llvm.x86.tbm.bextri.u64(i64, i64)
104 define i32 @stack_fold_blcfill_u32(i32 %a0) {
105 ; CHECK-LABEL: stack_fold_blcfill_u32:
107 ; CHECK-NEXT: pushq %rbp
108 ; CHECK-NEXT: .cfi_def_cfa_offset 16
109 ; CHECK-NEXT: pushq %r15
110 ; CHECK-NEXT: .cfi_def_cfa_offset 24
111 ; CHECK-NEXT: pushq %r14
112 ; CHECK-NEXT: .cfi_def_cfa_offset 32
113 ; CHECK-NEXT: pushq %r13
114 ; CHECK-NEXT: .cfi_def_cfa_offset 40
115 ; CHECK-NEXT: pushq %r12
116 ; CHECK-NEXT: .cfi_def_cfa_offset 48
117 ; CHECK-NEXT: pushq %rbx
118 ; CHECK-NEXT: .cfi_def_cfa_offset 56
119 ; CHECK-NEXT: .cfi_offset %rbx, -56
120 ; CHECK-NEXT: .cfi_offset %r12, -48
121 ; CHECK-NEXT: .cfi_offset %r13, -40
122 ; CHECK-NEXT: .cfi_offset %r14, -32
123 ; CHECK-NEXT: .cfi_offset %r15, -24
124 ; CHECK-NEXT: .cfi_offset %rbp, -16
125 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
128 ; CHECK-NEXT: #NO_APP
129 ; CHECK-NEXT: blcfilll {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
130 ; CHECK-NEXT: popq %rbx
131 ; CHECK-NEXT: .cfi_def_cfa_offset 48
132 ; CHECK-NEXT: popq %r12
133 ; CHECK-NEXT: .cfi_def_cfa_offset 40
134 ; CHECK-NEXT: popq %r13
135 ; CHECK-NEXT: .cfi_def_cfa_offset 32
136 ; CHECK-NEXT: popq %r14
137 ; CHECK-NEXT: .cfi_def_cfa_offset 24
138 ; CHECK-NEXT: popq %r15
139 ; CHECK-NEXT: .cfi_def_cfa_offset 16
140 ; CHECK-NEXT: popq %rbp
141 ; CHECK-NEXT: .cfi_def_cfa_offset 8
143 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
149 define i64 @stack_fold_blcfill_u64(i64 %a0) {
150 ; CHECK-LABEL: stack_fold_blcfill_u64:
152 ; CHECK-NEXT: pushq %rbp
153 ; CHECK-NEXT: .cfi_def_cfa_offset 16
154 ; CHECK-NEXT: pushq %r15
155 ; CHECK-NEXT: .cfi_def_cfa_offset 24
156 ; CHECK-NEXT: pushq %r14
157 ; CHECK-NEXT: .cfi_def_cfa_offset 32
158 ; CHECK-NEXT: pushq %r13
159 ; CHECK-NEXT: .cfi_def_cfa_offset 40
160 ; CHECK-NEXT: pushq %r12
161 ; CHECK-NEXT: .cfi_def_cfa_offset 48
162 ; CHECK-NEXT: pushq %rbx
163 ; CHECK-NEXT: .cfi_def_cfa_offset 56
164 ; CHECK-NEXT: .cfi_offset %rbx, -56
165 ; CHECK-NEXT: .cfi_offset %r12, -48
166 ; CHECK-NEXT: .cfi_offset %r13, -40
167 ; CHECK-NEXT: .cfi_offset %r14, -32
168 ; CHECK-NEXT: .cfi_offset %r15, -24
169 ; CHECK-NEXT: .cfi_offset %rbp, -16
170 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
173 ; CHECK-NEXT: #NO_APP
174 ; CHECK-NEXT: blcfillq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
175 ; CHECK-NEXT: popq %rbx
176 ; CHECK-NEXT: .cfi_def_cfa_offset 48
177 ; CHECK-NEXT: popq %r12
178 ; CHECK-NEXT: .cfi_def_cfa_offset 40
179 ; CHECK-NEXT: popq %r13
180 ; CHECK-NEXT: .cfi_def_cfa_offset 32
181 ; CHECK-NEXT: popq %r14
182 ; CHECK-NEXT: .cfi_def_cfa_offset 24
183 ; CHECK-NEXT: popq %r15
184 ; CHECK-NEXT: .cfi_def_cfa_offset 16
185 ; CHECK-NEXT: popq %rbp
186 ; CHECK-NEXT: .cfi_def_cfa_offset 8
188 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
194 define i32 @stack_fold_blci_u32(i32 %a0) {
195 ; CHECK-LABEL: stack_fold_blci_u32:
197 ; CHECK-NEXT: pushq %rbp
198 ; CHECK-NEXT: .cfi_def_cfa_offset 16
199 ; CHECK-NEXT: pushq %r15
200 ; CHECK-NEXT: .cfi_def_cfa_offset 24
201 ; CHECK-NEXT: pushq %r14
202 ; CHECK-NEXT: .cfi_def_cfa_offset 32
203 ; CHECK-NEXT: pushq %r13
204 ; CHECK-NEXT: .cfi_def_cfa_offset 40
205 ; CHECK-NEXT: pushq %r12
206 ; CHECK-NEXT: .cfi_def_cfa_offset 48
207 ; CHECK-NEXT: pushq %rbx
208 ; CHECK-NEXT: .cfi_def_cfa_offset 56
209 ; CHECK-NEXT: .cfi_offset %rbx, -56
210 ; CHECK-NEXT: .cfi_offset %r12, -48
211 ; CHECK-NEXT: .cfi_offset %r13, -40
212 ; CHECK-NEXT: .cfi_offset %r14, -32
213 ; CHECK-NEXT: .cfi_offset %r15, -24
214 ; CHECK-NEXT: .cfi_offset %rbp, -16
215 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
218 ; CHECK-NEXT: #NO_APP
219 ; CHECK-NEXT: blcil {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
220 ; CHECK-NEXT: popq %rbx
221 ; CHECK-NEXT: .cfi_def_cfa_offset 48
222 ; CHECK-NEXT: popq %r12
223 ; CHECK-NEXT: .cfi_def_cfa_offset 40
224 ; CHECK-NEXT: popq %r13
225 ; CHECK-NEXT: .cfi_def_cfa_offset 32
226 ; CHECK-NEXT: popq %r14
227 ; CHECK-NEXT: .cfi_def_cfa_offset 24
228 ; CHECK-NEXT: popq %r15
229 ; CHECK-NEXT: .cfi_def_cfa_offset 16
230 ; CHECK-NEXT: popq %rbp
231 ; CHECK-NEXT: .cfi_def_cfa_offset 8
233 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
240 define i64 @stack_fold_blci_u64(i64 %a0) {
241 ; CHECK-LABEL: stack_fold_blci_u64:
243 ; CHECK-NEXT: pushq %rbp
244 ; CHECK-NEXT: .cfi_def_cfa_offset 16
245 ; CHECK-NEXT: pushq %r15
246 ; CHECK-NEXT: .cfi_def_cfa_offset 24
247 ; CHECK-NEXT: pushq %r14
248 ; CHECK-NEXT: .cfi_def_cfa_offset 32
249 ; CHECK-NEXT: pushq %r13
250 ; CHECK-NEXT: .cfi_def_cfa_offset 40
251 ; CHECK-NEXT: pushq %r12
252 ; CHECK-NEXT: .cfi_def_cfa_offset 48
253 ; CHECK-NEXT: pushq %rbx
254 ; CHECK-NEXT: .cfi_def_cfa_offset 56
255 ; CHECK-NEXT: .cfi_offset %rbx, -56
256 ; CHECK-NEXT: .cfi_offset %r12, -48
257 ; CHECK-NEXT: .cfi_offset %r13, -40
258 ; CHECK-NEXT: .cfi_offset %r14, -32
259 ; CHECK-NEXT: .cfi_offset %r15, -24
260 ; CHECK-NEXT: .cfi_offset %rbp, -16
261 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
264 ; CHECK-NEXT: #NO_APP
265 ; CHECK-NEXT: blciq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
266 ; CHECK-NEXT: popq %rbx
267 ; CHECK-NEXT: .cfi_def_cfa_offset 48
268 ; CHECK-NEXT: popq %r12
269 ; CHECK-NEXT: .cfi_def_cfa_offset 40
270 ; CHECK-NEXT: popq %r13
271 ; CHECK-NEXT: .cfi_def_cfa_offset 32
272 ; CHECK-NEXT: popq %r14
273 ; CHECK-NEXT: .cfi_def_cfa_offset 24
274 ; CHECK-NEXT: popq %r15
275 ; CHECK-NEXT: .cfi_def_cfa_offset 16
276 ; CHECK-NEXT: popq %rbp
277 ; CHECK-NEXT: .cfi_def_cfa_offset 8
279 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
286 define i32 @stack_fold_blcic_u32(i32 %a0) {
287 ; CHECK-LABEL: stack_fold_blcic_u32:
289 ; CHECK-NEXT: pushq %rbp
290 ; CHECK-NEXT: .cfi_def_cfa_offset 16
291 ; CHECK-NEXT: pushq %r15
292 ; CHECK-NEXT: .cfi_def_cfa_offset 24
293 ; CHECK-NEXT: pushq %r14
294 ; CHECK-NEXT: .cfi_def_cfa_offset 32
295 ; CHECK-NEXT: pushq %r13
296 ; CHECK-NEXT: .cfi_def_cfa_offset 40
297 ; CHECK-NEXT: pushq %r12
298 ; CHECK-NEXT: .cfi_def_cfa_offset 48
299 ; CHECK-NEXT: pushq %rbx
300 ; CHECK-NEXT: .cfi_def_cfa_offset 56
301 ; CHECK-NEXT: .cfi_offset %rbx, -56
302 ; CHECK-NEXT: .cfi_offset %r12, -48
303 ; CHECK-NEXT: .cfi_offset %r13, -40
304 ; CHECK-NEXT: .cfi_offset %r14, -32
305 ; CHECK-NEXT: .cfi_offset %r15, -24
306 ; CHECK-NEXT: .cfi_offset %rbp, -16
307 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
310 ; CHECK-NEXT: #NO_APP
311 ; CHECK-NEXT: blcicl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
312 ; CHECK-NEXT: popq %rbx
313 ; CHECK-NEXT: .cfi_def_cfa_offset 48
314 ; CHECK-NEXT: popq %r12
315 ; CHECK-NEXT: .cfi_def_cfa_offset 40
316 ; CHECK-NEXT: popq %r13
317 ; CHECK-NEXT: .cfi_def_cfa_offset 32
318 ; CHECK-NEXT: popq %r14
319 ; CHECK-NEXT: .cfi_def_cfa_offset 24
320 ; CHECK-NEXT: popq %r15
321 ; CHECK-NEXT: .cfi_def_cfa_offset 16
322 ; CHECK-NEXT: popq %rbp
323 ; CHECK-NEXT: .cfi_def_cfa_offset 8
325 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
332 define i64 @stack_fold_blcic_u64(i64 %a0) {
333 ; CHECK-LABEL: stack_fold_blcic_u64:
335 ; CHECK-NEXT: pushq %rbp
336 ; CHECK-NEXT: .cfi_def_cfa_offset 16
337 ; CHECK-NEXT: pushq %r15
338 ; CHECK-NEXT: .cfi_def_cfa_offset 24
339 ; CHECK-NEXT: pushq %r14
340 ; CHECK-NEXT: .cfi_def_cfa_offset 32
341 ; CHECK-NEXT: pushq %r13
342 ; CHECK-NEXT: .cfi_def_cfa_offset 40
343 ; CHECK-NEXT: pushq %r12
344 ; CHECK-NEXT: .cfi_def_cfa_offset 48
345 ; CHECK-NEXT: pushq %rbx
346 ; CHECK-NEXT: .cfi_def_cfa_offset 56
347 ; CHECK-NEXT: .cfi_offset %rbx, -56
348 ; CHECK-NEXT: .cfi_offset %r12, -48
349 ; CHECK-NEXT: .cfi_offset %r13, -40
350 ; CHECK-NEXT: .cfi_offset %r14, -32
351 ; CHECK-NEXT: .cfi_offset %r15, -24
352 ; CHECK-NEXT: .cfi_offset %rbp, -16
353 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
356 ; CHECK-NEXT: #NO_APP
357 ; CHECK-NEXT: blcicq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
358 ; CHECK-NEXT: popq %rbx
359 ; CHECK-NEXT: .cfi_def_cfa_offset 48
360 ; CHECK-NEXT: popq %r12
361 ; CHECK-NEXT: .cfi_def_cfa_offset 40
362 ; CHECK-NEXT: popq %r13
363 ; CHECK-NEXT: .cfi_def_cfa_offset 32
364 ; CHECK-NEXT: popq %r14
365 ; CHECK-NEXT: .cfi_def_cfa_offset 24
366 ; CHECK-NEXT: popq %r15
367 ; CHECK-NEXT: .cfi_def_cfa_offset 16
368 ; CHECK-NEXT: popq %rbp
369 ; CHECK-NEXT: .cfi_def_cfa_offset 8
371 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
378 define i32 @stack_fold_blcmsk_u32(i32 %a0) {
379 ; CHECK-LABEL: stack_fold_blcmsk_u32:
381 ; CHECK-NEXT: pushq %rbp
382 ; CHECK-NEXT: .cfi_def_cfa_offset 16
383 ; CHECK-NEXT: pushq %r15
384 ; CHECK-NEXT: .cfi_def_cfa_offset 24
385 ; CHECK-NEXT: pushq %r14
386 ; CHECK-NEXT: .cfi_def_cfa_offset 32
387 ; CHECK-NEXT: pushq %r13
388 ; CHECK-NEXT: .cfi_def_cfa_offset 40
389 ; CHECK-NEXT: pushq %r12
390 ; CHECK-NEXT: .cfi_def_cfa_offset 48
391 ; CHECK-NEXT: pushq %rbx
392 ; CHECK-NEXT: .cfi_def_cfa_offset 56
393 ; CHECK-NEXT: .cfi_offset %rbx, -56
394 ; CHECK-NEXT: .cfi_offset %r12, -48
395 ; CHECK-NEXT: .cfi_offset %r13, -40
396 ; CHECK-NEXT: .cfi_offset %r14, -32
397 ; CHECK-NEXT: .cfi_offset %r15, -24
398 ; CHECK-NEXT: .cfi_offset %rbp, -16
399 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
402 ; CHECK-NEXT: #NO_APP
403 ; CHECK-NEXT: blcmskl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
404 ; CHECK-NEXT: popq %rbx
405 ; CHECK-NEXT: .cfi_def_cfa_offset 48
406 ; CHECK-NEXT: popq %r12
407 ; CHECK-NEXT: .cfi_def_cfa_offset 40
408 ; CHECK-NEXT: popq %r13
409 ; CHECK-NEXT: .cfi_def_cfa_offset 32
410 ; CHECK-NEXT: popq %r14
411 ; CHECK-NEXT: .cfi_def_cfa_offset 24
412 ; CHECK-NEXT: popq %r15
413 ; CHECK-NEXT: .cfi_def_cfa_offset 16
414 ; CHECK-NEXT: popq %rbp
415 ; CHECK-NEXT: .cfi_def_cfa_offset 8
417 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
423 define i64 @stack_fold_blcmsk_u64(i64 %a0) {
424 ; CHECK-LABEL: stack_fold_blcmsk_u64:
426 ; CHECK-NEXT: pushq %rbp
427 ; CHECK-NEXT: .cfi_def_cfa_offset 16
428 ; CHECK-NEXT: pushq %r15
429 ; CHECK-NEXT: .cfi_def_cfa_offset 24
430 ; CHECK-NEXT: pushq %r14
431 ; CHECK-NEXT: .cfi_def_cfa_offset 32
432 ; CHECK-NEXT: pushq %r13
433 ; CHECK-NEXT: .cfi_def_cfa_offset 40
434 ; CHECK-NEXT: pushq %r12
435 ; CHECK-NEXT: .cfi_def_cfa_offset 48
436 ; CHECK-NEXT: pushq %rbx
437 ; CHECK-NEXT: .cfi_def_cfa_offset 56
438 ; CHECK-NEXT: .cfi_offset %rbx, -56
439 ; CHECK-NEXT: .cfi_offset %r12, -48
440 ; CHECK-NEXT: .cfi_offset %r13, -40
441 ; CHECK-NEXT: .cfi_offset %r14, -32
442 ; CHECK-NEXT: .cfi_offset %r15, -24
443 ; CHECK-NEXT: .cfi_offset %rbp, -16
444 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
447 ; CHECK-NEXT: #NO_APP
448 ; CHECK-NEXT: blcmskq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
449 ; CHECK-NEXT: popq %rbx
450 ; CHECK-NEXT: .cfi_def_cfa_offset 48
451 ; CHECK-NEXT: popq %r12
452 ; CHECK-NEXT: .cfi_def_cfa_offset 40
453 ; CHECK-NEXT: popq %r13
454 ; CHECK-NEXT: .cfi_def_cfa_offset 32
455 ; CHECK-NEXT: popq %r14
456 ; CHECK-NEXT: .cfi_def_cfa_offset 24
457 ; CHECK-NEXT: popq %r15
458 ; CHECK-NEXT: .cfi_def_cfa_offset 16
459 ; CHECK-NEXT: popq %rbp
460 ; CHECK-NEXT: .cfi_def_cfa_offset 8
462 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
468 define i32 @stack_fold_blcs_u32(i32 %a0) {
469 ; CHECK-LABEL: stack_fold_blcs_u32:
471 ; CHECK-NEXT: pushq %rbp
472 ; CHECK-NEXT: .cfi_def_cfa_offset 16
473 ; CHECK-NEXT: pushq %r15
474 ; CHECK-NEXT: .cfi_def_cfa_offset 24
475 ; CHECK-NEXT: pushq %r14
476 ; CHECK-NEXT: .cfi_def_cfa_offset 32
477 ; CHECK-NEXT: pushq %r13
478 ; CHECK-NEXT: .cfi_def_cfa_offset 40
479 ; CHECK-NEXT: pushq %r12
480 ; CHECK-NEXT: .cfi_def_cfa_offset 48
481 ; CHECK-NEXT: pushq %rbx
482 ; CHECK-NEXT: .cfi_def_cfa_offset 56
483 ; CHECK-NEXT: .cfi_offset %rbx, -56
484 ; CHECK-NEXT: .cfi_offset %r12, -48
485 ; CHECK-NEXT: .cfi_offset %r13, -40
486 ; CHECK-NEXT: .cfi_offset %r14, -32
487 ; CHECK-NEXT: .cfi_offset %r15, -24
488 ; CHECK-NEXT: .cfi_offset %rbp, -16
489 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
492 ; CHECK-NEXT: #NO_APP
493 ; CHECK-NEXT: blcsl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
494 ; CHECK-NEXT: popq %rbx
495 ; CHECK-NEXT: .cfi_def_cfa_offset 48
496 ; CHECK-NEXT: popq %r12
497 ; CHECK-NEXT: .cfi_def_cfa_offset 40
498 ; CHECK-NEXT: popq %r13
499 ; CHECK-NEXT: .cfi_def_cfa_offset 32
500 ; CHECK-NEXT: popq %r14
501 ; CHECK-NEXT: .cfi_def_cfa_offset 24
502 ; CHECK-NEXT: popq %r15
503 ; CHECK-NEXT: .cfi_def_cfa_offset 16
504 ; CHECK-NEXT: popq %rbp
505 ; CHECK-NEXT: .cfi_def_cfa_offset 8
507 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
513 define i64 @stack_fold_blcs_u64(i64 %a0) {
514 ; CHECK-LABEL: stack_fold_blcs_u64:
516 ; CHECK-NEXT: pushq %rbp
517 ; CHECK-NEXT: .cfi_def_cfa_offset 16
518 ; CHECK-NEXT: pushq %r15
519 ; CHECK-NEXT: .cfi_def_cfa_offset 24
520 ; CHECK-NEXT: pushq %r14
521 ; CHECK-NEXT: .cfi_def_cfa_offset 32
522 ; CHECK-NEXT: pushq %r13
523 ; CHECK-NEXT: .cfi_def_cfa_offset 40
524 ; CHECK-NEXT: pushq %r12
525 ; CHECK-NEXT: .cfi_def_cfa_offset 48
526 ; CHECK-NEXT: pushq %rbx
527 ; CHECK-NEXT: .cfi_def_cfa_offset 56
528 ; CHECK-NEXT: .cfi_offset %rbx, -56
529 ; CHECK-NEXT: .cfi_offset %r12, -48
530 ; CHECK-NEXT: .cfi_offset %r13, -40
531 ; CHECK-NEXT: .cfi_offset %r14, -32
532 ; CHECK-NEXT: .cfi_offset %r15, -24
533 ; CHECK-NEXT: .cfi_offset %rbp, -16
534 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
537 ; CHECK-NEXT: #NO_APP
538 ; CHECK-NEXT: blcsq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
539 ; CHECK-NEXT: popq %rbx
540 ; CHECK-NEXT: .cfi_def_cfa_offset 48
541 ; CHECK-NEXT: popq %r12
542 ; CHECK-NEXT: .cfi_def_cfa_offset 40
543 ; CHECK-NEXT: popq %r13
544 ; CHECK-NEXT: .cfi_def_cfa_offset 32
545 ; CHECK-NEXT: popq %r14
546 ; CHECK-NEXT: .cfi_def_cfa_offset 24
547 ; CHECK-NEXT: popq %r15
548 ; CHECK-NEXT: .cfi_def_cfa_offset 16
549 ; CHECK-NEXT: popq %rbp
550 ; CHECK-NEXT: .cfi_def_cfa_offset 8
552 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
558 define i32 @stack_fold_blsfill_u32(i32 %a0) {
559 ; CHECK-LABEL: stack_fold_blsfill_u32:
561 ; CHECK-NEXT: pushq %rbp
562 ; CHECK-NEXT: .cfi_def_cfa_offset 16
563 ; CHECK-NEXT: pushq %r15
564 ; CHECK-NEXT: .cfi_def_cfa_offset 24
565 ; CHECK-NEXT: pushq %r14
566 ; CHECK-NEXT: .cfi_def_cfa_offset 32
567 ; CHECK-NEXT: pushq %r13
568 ; CHECK-NEXT: .cfi_def_cfa_offset 40
569 ; CHECK-NEXT: pushq %r12
570 ; CHECK-NEXT: .cfi_def_cfa_offset 48
571 ; CHECK-NEXT: pushq %rbx
572 ; CHECK-NEXT: .cfi_def_cfa_offset 56
573 ; CHECK-NEXT: .cfi_offset %rbx, -56
574 ; CHECK-NEXT: .cfi_offset %r12, -48
575 ; CHECK-NEXT: .cfi_offset %r13, -40
576 ; CHECK-NEXT: .cfi_offset %r14, -32
577 ; CHECK-NEXT: .cfi_offset %r15, -24
578 ; CHECK-NEXT: .cfi_offset %rbp, -16
579 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
582 ; CHECK-NEXT: #NO_APP
583 ; CHECK-NEXT: blsfilll {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
584 ; CHECK-NEXT: popq %rbx
585 ; CHECK-NEXT: .cfi_def_cfa_offset 48
586 ; CHECK-NEXT: popq %r12
587 ; CHECK-NEXT: .cfi_def_cfa_offset 40
588 ; CHECK-NEXT: popq %r13
589 ; CHECK-NEXT: .cfi_def_cfa_offset 32
590 ; CHECK-NEXT: popq %r14
591 ; CHECK-NEXT: .cfi_def_cfa_offset 24
592 ; CHECK-NEXT: popq %r15
593 ; CHECK-NEXT: .cfi_def_cfa_offset 16
594 ; CHECK-NEXT: popq %rbp
595 ; CHECK-NEXT: .cfi_def_cfa_offset 8
597 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
603 define i64 @stack_fold_blsfill_u64(i64 %a0) {
604 ; CHECK-LABEL: stack_fold_blsfill_u64:
606 ; CHECK-NEXT: pushq %rbp
607 ; CHECK-NEXT: .cfi_def_cfa_offset 16
608 ; CHECK-NEXT: pushq %r15
609 ; CHECK-NEXT: .cfi_def_cfa_offset 24
610 ; CHECK-NEXT: pushq %r14
611 ; CHECK-NEXT: .cfi_def_cfa_offset 32
612 ; CHECK-NEXT: pushq %r13
613 ; CHECK-NEXT: .cfi_def_cfa_offset 40
614 ; CHECK-NEXT: pushq %r12
615 ; CHECK-NEXT: .cfi_def_cfa_offset 48
616 ; CHECK-NEXT: pushq %rbx
617 ; CHECK-NEXT: .cfi_def_cfa_offset 56
618 ; CHECK-NEXT: .cfi_offset %rbx, -56
619 ; CHECK-NEXT: .cfi_offset %r12, -48
620 ; CHECK-NEXT: .cfi_offset %r13, -40
621 ; CHECK-NEXT: .cfi_offset %r14, -32
622 ; CHECK-NEXT: .cfi_offset %r15, -24
623 ; CHECK-NEXT: .cfi_offset %rbp, -16
624 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
627 ; CHECK-NEXT: #NO_APP
628 ; CHECK-NEXT: blsfillq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
629 ; CHECK-NEXT: popq %rbx
630 ; CHECK-NEXT: .cfi_def_cfa_offset 48
631 ; CHECK-NEXT: popq %r12
632 ; CHECK-NEXT: .cfi_def_cfa_offset 40
633 ; CHECK-NEXT: popq %r13
634 ; CHECK-NEXT: .cfi_def_cfa_offset 32
635 ; CHECK-NEXT: popq %r14
636 ; CHECK-NEXT: .cfi_def_cfa_offset 24
637 ; CHECK-NEXT: popq %r15
638 ; CHECK-NEXT: .cfi_def_cfa_offset 16
639 ; CHECK-NEXT: popq %rbp
640 ; CHECK-NEXT: .cfi_def_cfa_offset 8
642 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
648 define i32 @stack_fold_blsic_u32(i32 %a0) {
649 ; CHECK-LABEL: stack_fold_blsic_u32:
651 ; CHECK-NEXT: pushq %rbp
652 ; CHECK-NEXT: .cfi_def_cfa_offset 16
653 ; CHECK-NEXT: pushq %r15
654 ; CHECK-NEXT: .cfi_def_cfa_offset 24
655 ; CHECK-NEXT: pushq %r14
656 ; CHECK-NEXT: .cfi_def_cfa_offset 32
657 ; CHECK-NEXT: pushq %r13
658 ; CHECK-NEXT: .cfi_def_cfa_offset 40
659 ; CHECK-NEXT: pushq %r12
660 ; CHECK-NEXT: .cfi_def_cfa_offset 48
661 ; CHECK-NEXT: pushq %rbx
662 ; CHECK-NEXT: .cfi_def_cfa_offset 56
663 ; CHECK-NEXT: .cfi_offset %rbx, -56
664 ; CHECK-NEXT: .cfi_offset %r12, -48
665 ; CHECK-NEXT: .cfi_offset %r13, -40
666 ; CHECK-NEXT: .cfi_offset %r14, -32
667 ; CHECK-NEXT: .cfi_offset %r15, -24
668 ; CHECK-NEXT: .cfi_offset %rbp, -16
669 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
672 ; CHECK-NEXT: #NO_APP
673 ; CHECK-NEXT: blsicl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
674 ; CHECK-NEXT: popq %rbx
675 ; CHECK-NEXT: .cfi_def_cfa_offset 48
676 ; CHECK-NEXT: popq %r12
677 ; CHECK-NEXT: .cfi_def_cfa_offset 40
678 ; CHECK-NEXT: popq %r13
679 ; CHECK-NEXT: .cfi_def_cfa_offset 32
680 ; CHECK-NEXT: popq %r14
681 ; CHECK-NEXT: .cfi_def_cfa_offset 24
682 ; CHECK-NEXT: popq %r15
683 ; CHECK-NEXT: .cfi_def_cfa_offset 16
684 ; CHECK-NEXT: popq %rbp
685 ; CHECK-NEXT: .cfi_def_cfa_offset 8
687 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
694 define i64 @stack_fold_blsic_u64(i64 %a0) {
695 ; CHECK-LABEL: stack_fold_blsic_u64:
697 ; CHECK-NEXT: pushq %rbp
698 ; CHECK-NEXT: .cfi_def_cfa_offset 16
699 ; CHECK-NEXT: pushq %r15
700 ; CHECK-NEXT: .cfi_def_cfa_offset 24
701 ; CHECK-NEXT: pushq %r14
702 ; CHECK-NEXT: .cfi_def_cfa_offset 32
703 ; CHECK-NEXT: pushq %r13
704 ; CHECK-NEXT: .cfi_def_cfa_offset 40
705 ; CHECK-NEXT: pushq %r12
706 ; CHECK-NEXT: .cfi_def_cfa_offset 48
707 ; CHECK-NEXT: pushq %rbx
708 ; CHECK-NEXT: .cfi_def_cfa_offset 56
709 ; CHECK-NEXT: .cfi_offset %rbx, -56
710 ; CHECK-NEXT: .cfi_offset %r12, -48
711 ; CHECK-NEXT: .cfi_offset %r13, -40
712 ; CHECK-NEXT: .cfi_offset %r14, -32
713 ; CHECK-NEXT: .cfi_offset %r15, -24
714 ; CHECK-NEXT: .cfi_offset %rbp, -16
715 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
718 ; CHECK-NEXT: #NO_APP
719 ; CHECK-NEXT: blsicq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
720 ; CHECK-NEXT: popq %rbx
721 ; CHECK-NEXT: .cfi_def_cfa_offset 48
722 ; CHECK-NEXT: popq %r12
723 ; CHECK-NEXT: .cfi_def_cfa_offset 40
724 ; CHECK-NEXT: popq %r13
725 ; CHECK-NEXT: .cfi_def_cfa_offset 32
726 ; CHECK-NEXT: popq %r14
727 ; CHECK-NEXT: .cfi_def_cfa_offset 24
728 ; CHECK-NEXT: popq %r15
729 ; CHECK-NEXT: .cfi_def_cfa_offset 16
730 ; CHECK-NEXT: popq %rbp
731 ; CHECK-NEXT: .cfi_def_cfa_offset 8
733 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
740 define i32 @stack_fold_t1mskc_u32(i32 %a0) {
741 ; CHECK-LABEL: stack_fold_t1mskc_u32:
743 ; CHECK-NEXT: pushq %rbp
744 ; CHECK-NEXT: .cfi_def_cfa_offset 16
745 ; CHECK-NEXT: pushq %r15
746 ; CHECK-NEXT: .cfi_def_cfa_offset 24
747 ; CHECK-NEXT: pushq %r14
748 ; CHECK-NEXT: .cfi_def_cfa_offset 32
749 ; CHECK-NEXT: pushq %r13
750 ; CHECK-NEXT: .cfi_def_cfa_offset 40
751 ; CHECK-NEXT: pushq %r12
752 ; CHECK-NEXT: .cfi_def_cfa_offset 48
753 ; CHECK-NEXT: pushq %rbx
754 ; CHECK-NEXT: .cfi_def_cfa_offset 56
755 ; CHECK-NEXT: .cfi_offset %rbx, -56
756 ; CHECK-NEXT: .cfi_offset %r12, -48
757 ; CHECK-NEXT: .cfi_offset %r13, -40
758 ; CHECK-NEXT: .cfi_offset %r14, -32
759 ; CHECK-NEXT: .cfi_offset %r15, -24
760 ; CHECK-NEXT: .cfi_offset %rbp, -16
761 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
764 ; CHECK-NEXT: #NO_APP
765 ; CHECK-NEXT: t1mskcl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
766 ; CHECK-NEXT: popq %rbx
767 ; CHECK-NEXT: .cfi_def_cfa_offset 48
768 ; CHECK-NEXT: popq %r12
769 ; CHECK-NEXT: .cfi_def_cfa_offset 40
770 ; CHECK-NEXT: popq %r13
771 ; CHECK-NEXT: .cfi_def_cfa_offset 32
772 ; CHECK-NEXT: popq %r14
773 ; CHECK-NEXT: .cfi_def_cfa_offset 24
774 ; CHECK-NEXT: popq %r15
775 ; CHECK-NEXT: .cfi_def_cfa_offset 16
776 ; CHECK-NEXT: popq %rbp
777 ; CHECK-NEXT: .cfi_def_cfa_offset 8
779 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
786 define i64 @stack_fold_t1mskc_u64(i64 %a0) {
787 ; CHECK-LABEL: stack_fold_t1mskc_u64:
789 ; CHECK-NEXT: pushq %rbp
790 ; CHECK-NEXT: .cfi_def_cfa_offset 16
791 ; CHECK-NEXT: pushq %r15
792 ; CHECK-NEXT: .cfi_def_cfa_offset 24
793 ; CHECK-NEXT: pushq %r14
794 ; CHECK-NEXT: .cfi_def_cfa_offset 32
795 ; CHECK-NEXT: pushq %r13
796 ; CHECK-NEXT: .cfi_def_cfa_offset 40
797 ; CHECK-NEXT: pushq %r12
798 ; CHECK-NEXT: .cfi_def_cfa_offset 48
799 ; CHECK-NEXT: pushq %rbx
800 ; CHECK-NEXT: .cfi_def_cfa_offset 56
801 ; CHECK-NEXT: .cfi_offset %rbx, -56
802 ; CHECK-NEXT: .cfi_offset %r12, -48
803 ; CHECK-NEXT: .cfi_offset %r13, -40
804 ; CHECK-NEXT: .cfi_offset %r14, -32
805 ; CHECK-NEXT: .cfi_offset %r15, -24
806 ; CHECK-NEXT: .cfi_offset %rbp, -16
807 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
810 ; CHECK-NEXT: #NO_APP
811 ; CHECK-NEXT: t1mskcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
812 ; CHECK-NEXT: popq %rbx
813 ; CHECK-NEXT: .cfi_def_cfa_offset 48
814 ; CHECK-NEXT: popq %r12
815 ; CHECK-NEXT: .cfi_def_cfa_offset 40
816 ; CHECK-NEXT: popq %r13
817 ; CHECK-NEXT: .cfi_def_cfa_offset 32
818 ; CHECK-NEXT: popq %r14
819 ; CHECK-NEXT: .cfi_def_cfa_offset 24
820 ; CHECK-NEXT: popq %r15
821 ; CHECK-NEXT: .cfi_def_cfa_offset 16
822 ; CHECK-NEXT: popq %rbp
823 ; CHECK-NEXT: .cfi_def_cfa_offset 8
825 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
832 define i32 @stack_fold_tzmsk_u32(i32 %a0) {
833 ; CHECK-LABEL: stack_fold_tzmsk_u32:
835 ; CHECK-NEXT: pushq %rbp
836 ; CHECK-NEXT: .cfi_def_cfa_offset 16
837 ; CHECK-NEXT: pushq %r15
838 ; CHECK-NEXT: .cfi_def_cfa_offset 24
839 ; CHECK-NEXT: pushq %r14
840 ; CHECK-NEXT: .cfi_def_cfa_offset 32
841 ; CHECK-NEXT: pushq %r13
842 ; CHECK-NEXT: .cfi_def_cfa_offset 40
843 ; CHECK-NEXT: pushq %r12
844 ; CHECK-NEXT: .cfi_def_cfa_offset 48
845 ; CHECK-NEXT: pushq %rbx
846 ; CHECK-NEXT: .cfi_def_cfa_offset 56
847 ; CHECK-NEXT: .cfi_offset %rbx, -56
848 ; CHECK-NEXT: .cfi_offset %r12, -48
849 ; CHECK-NEXT: .cfi_offset %r13, -40
850 ; CHECK-NEXT: .cfi_offset %r14, -32
851 ; CHECK-NEXT: .cfi_offset %r15, -24
852 ; CHECK-NEXT: .cfi_offset %rbp, -16
853 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
856 ; CHECK-NEXT: #NO_APP
857 ; CHECK-NEXT: tzmskl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
858 ; CHECK-NEXT: popq %rbx
859 ; CHECK-NEXT: .cfi_def_cfa_offset 48
860 ; CHECK-NEXT: popq %r12
861 ; CHECK-NEXT: .cfi_def_cfa_offset 40
862 ; CHECK-NEXT: popq %r13
863 ; CHECK-NEXT: .cfi_def_cfa_offset 32
864 ; CHECK-NEXT: popq %r14
865 ; CHECK-NEXT: .cfi_def_cfa_offset 24
866 ; CHECK-NEXT: popq %r15
867 ; CHECK-NEXT: .cfi_def_cfa_offset 16
868 ; CHECK-NEXT: popq %rbp
869 ; CHECK-NEXT: .cfi_def_cfa_offset 8
871 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
878 define i64 @stack_fold_tzmsk_u64(i64 %a0) {
879 ; CHECK-LABEL: stack_fold_tzmsk_u64:
881 ; CHECK-NEXT: pushq %rbp
882 ; CHECK-NEXT: .cfi_def_cfa_offset 16
883 ; CHECK-NEXT: pushq %r15
884 ; CHECK-NEXT: .cfi_def_cfa_offset 24
885 ; CHECK-NEXT: pushq %r14
886 ; CHECK-NEXT: .cfi_def_cfa_offset 32
887 ; CHECK-NEXT: pushq %r13
888 ; CHECK-NEXT: .cfi_def_cfa_offset 40
889 ; CHECK-NEXT: pushq %r12
890 ; CHECK-NEXT: .cfi_def_cfa_offset 48
891 ; CHECK-NEXT: pushq %rbx
892 ; CHECK-NEXT: .cfi_def_cfa_offset 56
893 ; CHECK-NEXT: .cfi_offset %rbx, -56
894 ; CHECK-NEXT: .cfi_offset %r12, -48
895 ; CHECK-NEXT: .cfi_offset %r13, -40
896 ; CHECK-NEXT: .cfi_offset %r14, -32
897 ; CHECK-NEXT: .cfi_offset %r15, -24
898 ; CHECK-NEXT: .cfi_offset %rbp, -16
899 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
902 ; CHECK-NEXT: #NO_APP
903 ; CHECK-NEXT: tzmskq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
904 ; CHECK-NEXT: popq %rbx
905 ; CHECK-NEXT: .cfi_def_cfa_offset 48
906 ; CHECK-NEXT: popq %r12
907 ; CHECK-NEXT: .cfi_def_cfa_offset 40
908 ; CHECK-NEXT: popq %r13
909 ; CHECK-NEXT: .cfi_def_cfa_offset 32
910 ; CHECK-NEXT: popq %r14
911 ; CHECK-NEXT: .cfi_def_cfa_offset 24
912 ; CHECK-NEXT: popq %r15
913 ; CHECK-NEXT: .cfi_def_cfa_offset 16
914 ; CHECK-NEXT: popq %rbp
915 ; CHECK-NEXT: .cfi_def_cfa_offset 8
917 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()