1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+adx < %s | FileCheck %s --check-prefix=CHECK
3 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=-adx < %s | FileCheck %s --check-prefix=CHECK
5 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6 target triple = "x86_64-unknown-unknown"
8 ; Stack reload folding tests.
10 ; By including a nop call with sideeffects we can force a partial register spill of the
11 ; relevant registers and check that the reload is correctly folded into the instruction.
13 define i8 @stack_fold_addcarry_u32(i8 %a0, i32 %a1, i32 %a2, ptr %a3) {
14 ; CHECK-LABEL: stack_fold_addcarry_u32:
16 ; CHECK-NEXT: pushq %rbp
17 ; CHECK-NEXT: .cfi_def_cfa_offset 16
18 ; CHECK-NEXT: pushq %r15
19 ; CHECK-NEXT: .cfi_def_cfa_offset 24
20 ; CHECK-NEXT: pushq %r14
21 ; CHECK-NEXT: .cfi_def_cfa_offset 32
22 ; CHECK-NEXT: pushq %r13
23 ; CHECK-NEXT: .cfi_def_cfa_offset 40
24 ; CHECK-NEXT: pushq %r12
25 ; CHECK-NEXT: .cfi_def_cfa_offset 48
26 ; CHECK-NEXT: pushq %rbx
27 ; CHECK-NEXT: .cfi_def_cfa_offset 56
28 ; CHECK-NEXT: .cfi_offset %rbx, -56
29 ; CHECK-NEXT: .cfi_offset %r12, -48
30 ; CHECK-NEXT: .cfi_offset %r13, -40
31 ; CHECK-NEXT: .cfi_offset %r14, -32
32 ; CHECK-NEXT: .cfi_offset %r15, -24
33 ; CHECK-NEXT: .cfi_offset %rbp, -16
34 ; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
35 ; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
36 ; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
37 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
41 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
42 ; CHECK-NEXT: addb $-1, %al
43 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
44 ; CHECK-NEXT: adcl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
45 ; CHECK-NEXT: setb %al
46 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
47 ; CHECK-NEXT: movl %edx, (%rcx)
48 ; CHECK-NEXT: popq %rbx
49 ; CHECK-NEXT: .cfi_def_cfa_offset 48
50 ; CHECK-NEXT: popq %r12
51 ; CHECK-NEXT: .cfi_def_cfa_offset 40
52 ; CHECK-NEXT: popq %r13
53 ; CHECK-NEXT: .cfi_def_cfa_offset 32
54 ; CHECK-NEXT: popq %r14
55 ; CHECK-NEXT: .cfi_def_cfa_offset 24
56 ; CHECK-NEXT: popq %r15
57 ; CHECK-NEXT: .cfi_def_cfa_offset 16
58 ; CHECK-NEXT: popq %rbp
59 ; CHECK-NEXT: .cfi_def_cfa_offset 8
61 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
62 %2 = call { i8, i32 } @llvm.x86.addcarry.32(i8 %a0, i32 %a1, i32 %a2)
63 %3 = extractvalue { i8, i32 } %2, 1
64 store i32 %3, ptr %a3, align 1
65 %4 = extractvalue { i8, i32 } %2, 0
69 define i8 @stack_fold_addcarry_u64(i8 %a0, i64 %a1, i64 %a2, ptr %a3) {
70 ; CHECK-LABEL: stack_fold_addcarry_u64:
72 ; CHECK-NEXT: pushq %rbp
73 ; CHECK-NEXT: .cfi_def_cfa_offset 16
74 ; CHECK-NEXT: pushq %r15
75 ; CHECK-NEXT: .cfi_def_cfa_offset 24
76 ; CHECK-NEXT: pushq %r14
77 ; CHECK-NEXT: .cfi_def_cfa_offset 32
78 ; CHECK-NEXT: pushq %r13
79 ; CHECK-NEXT: .cfi_def_cfa_offset 40
80 ; CHECK-NEXT: pushq %r12
81 ; CHECK-NEXT: .cfi_def_cfa_offset 48
82 ; CHECK-NEXT: pushq %rbx
83 ; CHECK-NEXT: .cfi_def_cfa_offset 56
84 ; CHECK-NEXT: .cfi_offset %rbx, -56
85 ; CHECK-NEXT: .cfi_offset %r12, -48
86 ; CHECK-NEXT: .cfi_offset %r13, -40
87 ; CHECK-NEXT: .cfi_offset %r14, -32
88 ; CHECK-NEXT: .cfi_offset %r15, -24
89 ; CHECK-NEXT: .cfi_offset %rbp, -16
90 ; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
91 ; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
92 ; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
93 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
97 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
98 ; CHECK-NEXT: addb $-1, %al
99 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
100 ; CHECK-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
101 ; CHECK-NEXT: setb %al
102 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
103 ; CHECK-NEXT: movq %rdx, (%rcx)
104 ; CHECK-NEXT: popq %rbx
105 ; CHECK-NEXT: .cfi_def_cfa_offset 48
106 ; CHECK-NEXT: popq %r12
107 ; CHECK-NEXT: .cfi_def_cfa_offset 40
108 ; CHECK-NEXT: popq %r13
109 ; CHECK-NEXT: .cfi_def_cfa_offset 32
110 ; CHECK-NEXT: popq %r14
111 ; CHECK-NEXT: .cfi_def_cfa_offset 24
112 ; CHECK-NEXT: popq %r15
113 ; CHECK-NEXT: .cfi_def_cfa_offset 16
114 ; CHECK-NEXT: popq %rbp
115 ; CHECK-NEXT: .cfi_def_cfa_offset 8
117 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
118 %2 = call { i8, i64 } @llvm.x86.addcarry.64(i8 %a0, i64 %a1, i64 %a2)
119 %3 = extractvalue { i8, i64 } %2, 1
120 store i64 %3, ptr %a3, align 1
121 %4 = extractvalue { i8, i64 } %2, 0
125 define i8 @stack_fold_addcarryx_u32(i8 %a0, i32 %a1, i32 %a2, ptr %a3) {
126 ; CHECK-LABEL: stack_fold_addcarryx_u32:
128 ; CHECK-NEXT: pushq %rbp
129 ; CHECK-NEXT: .cfi_def_cfa_offset 16
130 ; CHECK-NEXT: pushq %r15
131 ; CHECK-NEXT: .cfi_def_cfa_offset 24
132 ; CHECK-NEXT: pushq %r14
133 ; CHECK-NEXT: .cfi_def_cfa_offset 32
134 ; CHECK-NEXT: pushq %r13
135 ; CHECK-NEXT: .cfi_def_cfa_offset 40
136 ; CHECK-NEXT: pushq %r12
137 ; CHECK-NEXT: .cfi_def_cfa_offset 48
138 ; CHECK-NEXT: pushq %rbx
139 ; CHECK-NEXT: .cfi_def_cfa_offset 56
140 ; CHECK-NEXT: .cfi_offset %rbx, -56
141 ; CHECK-NEXT: .cfi_offset %r12, -48
142 ; CHECK-NEXT: .cfi_offset %r13, -40
143 ; CHECK-NEXT: .cfi_offset %r14, -32
144 ; CHECK-NEXT: .cfi_offset %r15, -24
145 ; CHECK-NEXT: .cfi_offset %rbp, -16
146 ; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
147 ; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
148 ; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
149 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
152 ; CHECK-NEXT: #NO_APP
153 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
154 ; CHECK-NEXT: addb $-1, %al
155 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
156 ; CHECK-NEXT: adcl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
157 ; CHECK-NEXT: setb %al
158 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
159 ; CHECK-NEXT: movl %edx, (%rcx)
160 ; CHECK-NEXT: popq %rbx
161 ; CHECK-NEXT: .cfi_def_cfa_offset 48
162 ; CHECK-NEXT: popq %r12
163 ; CHECK-NEXT: .cfi_def_cfa_offset 40
164 ; CHECK-NEXT: popq %r13
165 ; CHECK-NEXT: .cfi_def_cfa_offset 32
166 ; CHECK-NEXT: popq %r14
167 ; CHECK-NEXT: .cfi_def_cfa_offset 24
168 ; CHECK-NEXT: popq %r15
169 ; CHECK-NEXT: .cfi_def_cfa_offset 16
170 ; CHECK-NEXT: popq %rbp
171 ; CHECK-NEXT: .cfi_def_cfa_offset 8
173 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
174 %2 = call { i8, i32 } @llvm.x86.addcarry.32(i8 %a0, i32 %a1, i32 %a2)
175 %3 = extractvalue { i8, i32 } %2, 1
176 store i32 %3, ptr %a3, align 1
177 %4 = extractvalue { i8, i32 } %2, 0
181 define i8 @stack_fold_addcarryx_u64(i8 %a0, i64 %a1, i64 %a2, ptr %a3) {
182 ; CHECK-LABEL: stack_fold_addcarryx_u64:
184 ; CHECK-NEXT: pushq %rbp
185 ; CHECK-NEXT: .cfi_def_cfa_offset 16
186 ; CHECK-NEXT: pushq %r15
187 ; CHECK-NEXT: .cfi_def_cfa_offset 24
188 ; CHECK-NEXT: pushq %r14
189 ; CHECK-NEXT: .cfi_def_cfa_offset 32
190 ; CHECK-NEXT: pushq %r13
191 ; CHECK-NEXT: .cfi_def_cfa_offset 40
192 ; CHECK-NEXT: pushq %r12
193 ; CHECK-NEXT: .cfi_def_cfa_offset 48
194 ; CHECK-NEXT: pushq %rbx
195 ; CHECK-NEXT: .cfi_def_cfa_offset 56
196 ; CHECK-NEXT: .cfi_offset %rbx, -56
197 ; CHECK-NEXT: .cfi_offset %r12, -48
198 ; CHECK-NEXT: .cfi_offset %r13, -40
199 ; CHECK-NEXT: .cfi_offset %r14, -32
200 ; CHECK-NEXT: .cfi_offset %r15, -24
201 ; CHECK-NEXT: .cfi_offset %rbp, -16
202 ; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
203 ; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
204 ; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
205 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
208 ; CHECK-NEXT: #NO_APP
209 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
210 ; CHECK-NEXT: addb $-1, %al
211 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
212 ; CHECK-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
213 ; CHECK-NEXT: setb %al
214 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
215 ; CHECK-NEXT: movq %rdx, (%rcx)
216 ; CHECK-NEXT: popq %rbx
217 ; CHECK-NEXT: .cfi_def_cfa_offset 48
218 ; CHECK-NEXT: popq %r12
219 ; CHECK-NEXT: .cfi_def_cfa_offset 40
220 ; CHECK-NEXT: popq %r13
221 ; CHECK-NEXT: .cfi_def_cfa_offset 32
222 ; CHECK-NEXT: popq %r14
223 ; CHECK-NEXT: .cfi_def_cfa_offset 24
224 ; CHECK-NEXT: popq %r15
225 ; CHECK-NEXT: .cfi_def_cfa_offset 16
226 ; CHECK-NEXT: popq %rbp
227 ; CHECK-NEXT: .cfi_def_cfa_offset 8
229 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
230 %2 = call { i8, i64 } @llvm.x86.addcarry.64(i8 %a0, i64 %a1, i64 %a2)
231 %3 = extractvalue { i8, i64 } %2, 1
232 store i64 %3, ptr %a3, align 1
233 %4 = extractvalue { i8, i64 } %2, 0
237 define i8 @stack_fold_subborrow_u32(i8 %a0, i32 %a1, i32 %a2, ptr %a3) {
238 ; CHECK-LABEL: stack_fold_subborrow_u32:
240 ; CHECK-NEXT: pushq %rbp
241 ; CHECK-NEXT: .cfi_def_cfa_offset 16
242 ; CHECK-NEXT: pushq %r15
243 ; CHECK-NEXT: .cfi_def_cfa_offset 24
244 ; CHECK-NEXT: pushq %r14
245 ; CHECK-NEXT: .cfi_def_cfa_offset 32
246 ; CHECK-NEXT: pushq %r13
247 ; CHECK-NEXT: .cfi_def_cfa_offset 40
248 ; CHECK-NEXT: pushq %r12
249 ; CHECK-NEXT: .cfi_def_cfa_offset 48
250 ; CHECK-NEXT: pushq %rbx
251 ; CHECK-NEXT: .cfi_def_cfa_offset 56
252 ; CHECK-NEXT: .cfi_offset %rbx, -56
253 ; CHECK-NEXT: .cfi_offset %r12, -48
254 ; CHECK-NEXT: .cfi_offset %r13, -40
255 ; CHECK-NEXT: .cfi_offset %r14, -32
256 ; CHECK-NEXT: .cfi_offset %r15, -24
257 ; CHECK-NEXT: .cfi_offset %rbp, -16
258 ; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
259 ; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
260 ; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
261 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
264 ; CHECK-NEXT: #NO_APP
265 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
266 ; CHECK-NEXT: addb $-1, %al
267 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
268 ; CHECK-NEXT: sbbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
269 ; CHECK-NEXT: setb %al
270 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
271 ; CHECK-NEXT: movl %edx, (%rcx)
272 ; CHECK-NEXT: popq %rbx
273 ; CHECK-NEXT: .cfi_def_cfa_offset 48
274 ; CHECK-NEXT: popq %r12
275 ; CHECK-NEXT: .cfi_def_cfa_offset 40
276 ; CHECK-NEXT: popq %r13
277 ; CHECK-NEXT: .cfi_def_cfa_offset 32
278 ; CHECK-NEXT: popq %r14
279 ; CHECK-NEXT: .cfi_def_cfa_offset 24
280 ; CHECK-NEXT: popq %r15
281 ; CHECK-NEXT: .cfi_def_cfa_offset 16
282 ; CHECK-NEXT: popq %rbp
283 ; CHECK-NEXT: .cfi_def_cfa_offset 8
285 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
286 %2 = call { i8, i32 } @llvm.x86.subborrow.32(i8 %a0, i32 %a1, i32 %a2)
287 %3 = extractvalue { i8, i32 } %2, 1
288 store i32 %3, ptr %a3, align 1
289 %4 = extractvalue { i8, i32 } %2, 0
293 define i8 @stack_fold_subborrow_u64(i8 %a0, i64 %a1, i64 %a2, ptr %a3) {
294 ; CHECK-LABEL: stack_fold_subborrow_u64:
296 ; CHECK-NEXT: pushq %rbp
297 ; CHECK-NEXT: .cfi_def_cfa_offset 16
298 ; CHECK-NEXT: pushq %r15
299 ; CHECK-NEXT: .cfi_def_cfa_offset 24
300 ; CHECK-NEXT: pushq %r14
301 ; CHECK-NEXT: .cfi_def_cfa_offset 32
302 ; CHECK-NEXT: pushq %r13
303 ; CHECK-NEXT: .cfi_def_cfa_offset 40
304 ; CHECK-NEXT: pushq %r12
305 ; CHECK-NEXT: .cfi_def_cfa_offset 48
306 ; CHECK-NEXT: pushq %rbx
307 ; CHECK-NEXT: .cfi_def_cfa_offset 56
308 ; CHECK-NEXT: .cfi_offset %rbx, -56
309 ; CHECK-NEXT: .cfi_offset %r12, -48
310 ; CHECK-NEXT: .cfi_offset %r13, -40
311 ; CHECK-NEXT: .cfi_offset %r14, -32
312 ; CHECK-NEXT: .cfi_offset %r15, -24
313 ; CHECK-NEXT: .cfi_offset %rbp, -16
314 ; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
315 ; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
316 ; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
317 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
320 ; CHECK-NEXT: #NO_APP
321 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
322 ; CHECK-NEXT: addb $-1, %al
323 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
324 ; CHECK-NEXT: sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
325 ; CHECK-NEXT: setb %al
326 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
327 ; CHECK-NEXT: movq %rdx, (%rcx)
328 ; CHECK-NEXT: popq %rbx
329 ; CHECK-NEXT: .cfi_def_cfa_offset 48
330 ; CHECK-NEXT: popq %r12
331 ; CHECK-NEXT: .cfi_def_cfa_offset 40
332 ; CHECK-NEXT: popq %r13
333 ; CHECK-NEXT: .cfi_def_cfa_offset 32
334 ; CHECK-NEXT: popq %r14
335 ; CHECK-NEXT: .cfi_def_cfa_offset 24
336 ; CHECK-NEXT: popq %r15
337 ; CHECK-NEXT: .cfi_def_cfa_offset 16
338 ; CHECK-NEXT: popq %rbp
339 ; CHECK-NEXT: .cfi_def_cfa_offset 8
341 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
342 %2 = call { i8, i64 } @llvm.x86.subborrow.64(i8 %a0, i64 %a1, i64 %a2)
343 %3 = extractvalue { i8, i64 } %2, 1
344 store i64 %3, ptr %a3, align 1
345 %4 = extractvalue { i8, i64 } %2, 0
349 declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32)
350 declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64)
351 declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32)
352 declare { i8, i64 } @llvm.x86.subborrow.64(i8, i64, i64)