1 ; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
2 ; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
4 ; RUN: llc -verify-machineinstrs -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
5 ; RUN: llc -verify-machineinstrs -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
9 ; Test a simple indirect call and tail call.
10 define void @icall_reg(ptr %fp, i32 %x) #0 {
12 tail call void @bar(i32 %x)
13 tail call void %fp(i32 %x)
14 tail call void @bar(i32 %x)
15 tail call void %fp(i32 %x)
19 ; X64-LABEL: icall_reg:
20 ; X64-DAG: movq %rdi, %[[fp:[^ ]*]]
21 ; X64-DAG: movl %esi, %[[x:[^ ]*]]
22 ; X64: movl %esi, %edi
24 ; X64-DAG: movl %[[x]], %edi
25 ; X64-DAG: movq %[[fp]], %r11
26 ; X64: callq __llvm_retpoline_r11
27 ; X64: movl %[[x]], %edi
29 ; X64-DAG: movl %[[x]], %edi
30 ; X64-DAG: movq %[[fp]], %r11
31 ; X64: jmp __llvm_retpoline_r11 # TAILCALL
33 ; X64FAST-LABEL: icall_reg:
35 ; X64FAST: callq __llvm_retpoline_r11
37 ; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
39 ; X86-LABEL: icall_reg:
40 ; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]]
41 ; X86-DAG: movl 16(%esp), %[[x:[^ ]*]]
44 ; X86: movl %[[fp]], %eax
46 ; X86: calll __llvm_retpoline_eax
49 ; X86: movl %[[fp]], %eax
51 ; X86: calll __llvm_retpoline_eax
54 ; X86FAST-LABEL: icall_reg:
56 ; X86FAST: calll __llvm_retpoline_eax
58 ; X86FAST: calll __llvm_retpoline_eax
61 @global_fp = external dso_local global ptr
63 ; Test an indirect call through a global variable.
64 define void @icall_global_fp(i32 %x, ptr %fpp) #0 {
65 %fp1 = load ptr, ptr @global_fp
66 call void %fp1(i32 %x)
67 %fp2 = load ptr, ptr @global_fp
68 tail call void %fp2(i32 %x)
72 ; X64-LABEL: icall_global_fp:
73 ; X64-DAG: movl %edi, %[[x:[^ ]*]]
74 ; X64-DAG: movq global_fp(%rip), %r11
75 ; X64: callq __llvm_retpoline_r11
76 ; X64-DAG: movl %[[x]], %edi
77 ; X64-DAG: movq global_fp(%rip), %r11
78 ; X64: jmp __llvm_retpoline_r11 # TAILCALL
80 ; X64FAST-LABEL: icall_global_fp:
81 ; X64FAST: movq global_fp(%rip), %r11
82 ; X64FAST: callq __llvm_retpoline_r11
83 ; X64FAST: movq global_fp(%rip), %r11
84 ; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
86 ; X86-LABEL: icall_global_fp:
87 ; X86: movl global_fp, %eax
89 ; X86: calll __llvm_retpoline_eax
91 ; X86: movl global_fp, %eax
92 ; X86: jmp __llvm_retpoline_eax # TAILCALL
94 ; X86FAST-LABEL: icall_global_fp:
95 ; X86FAST: calll __llvm_retpoline_eax
96 ; X86FAST: jmp __llvm_retpoline_eax # TAILCALL
99 %struct.Foo = type { ptr }
101 ; Test an indirect call through a vtable.
102 define void @vcall(ptr %obj) #0 {
103 %vptr = load ptr, ptr %obj
104 %vslot = getelementptr ptr, ptr %vptr, i32 1
105 %fp = load ptr, ptr %vslot
106 tail call void %fp(ptr %obj)
107 tail call void %fp(ptr %obj)
112 ; X64: movq %rdi, %[[obj:[^ ]*]]
113 ; X64: movq (%rdi), %[[vptr:[^ ]*]]
114 ; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]]
115 ; X64: movq %[[fp]], %r11
116 ; X64: callq __llvm_retpoline_r11
117 ; X64-DAG: movq %[[obj]], %rdi
118 ; X64-DAG: movq %[[fp]], %r11
119 ; X64: jmp __llvm_retpoline_r11 # TAILCALL
121 ; X64FAST-LABEL: vcall:
122 ; X64FAST: callq __llvm_retpoline_r11
123 ; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
126 ; X86: movl 8(%esp), %[[obj:[^ ]*]]
127 ; X86: movl (%[[obj]]), %[[vptr:[^ ]*]]
128 ; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]]
129 ; X86: movl %[[fp]], %eax
130 ; X86: pushl %[[obj]]
131 ; X86: calll __llvm_retpoline_eax
133 ; X86: movl %[[fp]], %eax
134 ; X86: jmp __llvm_retpoline_eax # TAILCALL
136 ; X86FAST-LABEL: vcall:
137 ; X86FAST: calll __llvm_retpoline_eax
138 ; X86FAST: jmp __llvm_retpoline_eax # TAILCALL
141 declare void @direct_callee()
143 define void @direct_tail() #0 {
144 tail call void @direct_callee()
148 ; X64-LABEL: direct_tail:
149 ; X64: jmp direct_callee@PLT # TAILCALL
150 ; X64FAST-LABEL: direct_tail:
151 ; X64FAST: jmp direct_callee@PLT # TAILCALL
152 ; X86-LABEL: direct_tail:
153 ; X86: jmp direct_callee@PLT # TAILCALL
154 ; X86FAST-LABEL: direct_tail:
155 ; X86FAST: jmp direct_callee@PLT # TAILCALL
158 declare void @nonlazybind_callee() #2
160 define void @nonlazybind_caller() #0 {
161 call void @nonlazybind_callee()
162 tail call void @nonlazybind_callee()
166 ; X64-LABEL: nonlazybind_caller:
167 ; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
168 ; X64: movq %[[REG]], %r11
169 ; X64: callq __llvm_retpoline_r11
170 ; X64: movq %[[REG]], %r11
171 ; X64: jmp __llvm_retpoline_r11 # TAILCALL
172 ; X64FAST-LABEL: nonlazybind_caller:
173 ; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11
174 ; X64FAST: callq __llvm_retpoline_r11
175 ; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11
176 ; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
177 ; X86-LABEL: nonlazybind_caller:
178 ; X86: calll nonlazybind_callee@PLT
179 ; X86: jmp nonlazybind_callee@PLT # TAILCALL
180 ; X86FAST-LABEL: nonlazybind_caller:
181 ; X86FAST: calll nonlazybind_callee@PLT
182 ; X86FAST: jmp nonlazybind_callee@PLT # TAILCALL
185 ; Check that a switch gets lowered using a jump table when retpolines are only
187 define void @switch_jumptable(ptr %ptr, ptr %sink) #0 {
188 ; X64-LABEL: switch_jumptable:
190 ; X86-LABEL: switch_jumptable:
196 %i = load volatile i32, ptr %ptr
197 switch i32 %i, label %bb0 [
210 store volatile i64 0, ptr %sink
214 store volatile i64 1, ptr %sink
218 store volatile i64 2, ptr %sink
222 store volatile i64 3, ptr %sink
226 store volatile i64 4, ptr %sink
230 store volatile i64 5, ptr %sink
234 store volatile i64 6, ptr %sink
238 store volatile i64 7, ptr %sink
242 store volatile i64 8, ptr %sink
246 store volatile i64 9, ptr %sink
251 @indirectbr_preserved.targets = constant [10 x ptr] [ptr blockaddress(@indirectbr_preserved, %bb0),
252 ptr blockaddress(@indirectbr_preserved, %bb1),
253 ptr blockaddress(@indirectbr_preserved, %bb2),
254 ptr blockaddress(@indirectbr_preserved, %bb3),
255 ptr blockaddress(@indirectbr_preserved, %bb4),
256 ptr blockaddress(@indirectbr_preserved, %bb5),
257 ptr blockaddress(@indirectbr_preserved, %bb6),
258 ptr blockaddress(@indirectbr_preserved, %bb7),
259 ptr blockaddress(@indirectbr_preserved, %bb8),
260 ptr blockaddress(@indirectbr_preserved, %bb9)]
262 ; Check that we preserve indirectbr when only calls are retpolined.
263 define void @indirectbr_preserved(ptr readonly %p, ptr %sink) #0 {
264 ; X64-LABEL: indirectbr_preserved:
266 ; X86-LABEL: indirectbr_preserved:
269 %i0 = load i64, ptr %p
270 %target.i0 = getelementptr [10 x ptr], ptr @indirectbr_preserved.targets, i64 0, i64 %i0
271 %target0 = load ptr, ptr %target.i0
272 indirectbr ptr %target0, [label %bb1, label %bb3]
275 store volatile i64 0, ptr %sink
279 store volatile i64 1, ptr %sink
283 store volatile i64 2, ptr %sink
287 store volatile i64 3, ptr %sink
291 store volatile i64 4, ptr %sink
295 store volatile i64 5, ptr %sink
299 store volatile i64 6, ptr %sink
303 store volatile i64 7, ptr %sink
307 store volatile i64 8, ptr %sink
311 store volatile i64 9, ptr %sink
315 %i.next = load i64, ptr %p
316 %target.i.next = getelementptr [10 x ptr], ptr @indirectbr_preserved.targets, i64 0, i64 %i.next
317 %target.next = load ptr, ptr %target.i.next
318 ; Potentially hit a full 10 successors here so that even if we rewrite as
319 ; a switch it will try to be lowered with a jump table.
320 indirectbr ptr %target.next, [label %bb0,
332 @indirectbr_rewrite.targets = constant [10 x ptr] [ptr blockaddress(@indirectbr_rewrite, %bb0),
333 ptr blockaddress(@indirectbr_rewrite, %bb1),
334 ptr blockaddress(@indirectbr_rewrite, %bb2),
335 ptr blockaddress(@indirectbr_rewrite, %bb3),
336 ptr blockaddress(@indirectbr_rewrite, %bb4),
337 ptr blockaddress(@indirectbr_rewrite, %bb5),
338 ptr blockaddress(@indirectbr_rewrite, %bb6),
339 ptr blockaddress(@indirectbr_rewrite, %bb7),
340 ptr blockaddress(@indirectbr_rewrite, %bb8),
341 ptr blockaddress(@indirectbr_rewrite, %bb9)]
343 ; Check that when retpolines are enabled for indirect branches the indirectbr
344 ; instruction gets rewritten to use switch, and that in turn doesn't get lowered
346 define void @indirectbr_rewrite(ptr readonly %p, ptr %sink) #1 {
347 ; X64-LABEL: indirectbr_rewrite:
349 ; X86-LABEL: indirectbr_rewrite:
352 %i0 = load i64, ptr %p
353 %target.i0 = getelementptr [10 x ptr], ptr @indirectbr_rewrite.targets, i64 0, i64 %i0
354 %target0 = load ptr, ptr %target.i0
355 indirectbr ptr %target0, [label %bb1, label %bb3]
358 store volatile i64 0, ptr %sink
362 store volatile i64 1, ptr %sink
366 store volatile i64 2, ptr %sink
370 store volatile i64 3, ptr %sink
374 store volatile i64 4, ptr %sink
378 store volatile i64 5, ptr %sink
382 store volatile i64 6, ptr %sink
386 store volatile i64 7, ptr %sink
390 store volatile i64 8, ptr %sink
394 store volatile i64 9, ptr %sink
398 %i.next = load i64, ptr %p
399 %target.i.next = getelementptr [10 x ptr], ptr @indirectbr_rewrite.targets, i64 0, i64 %i.next
400 %target.next = load ptr, ptr %target.i.next
401 ; Potentially hit a full 10 successors here so that even if we rewrite as
402 ; a switch it will try to be lowered with a jump table.
403 indirectbr ptr %target.next, [label %bb0,
415 ; Lastly check that the necessary thunks were emitted.
417 ; X64-LABEL: .section .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat
418 ; X64-NEXT: .hidden __llvm_retpoline_r11
419 ; X64-NEXT: .weak __llvm_retpoline_r11
420 ; X64: __llvm_retpoline_r11:
421 ; X64-NEXT: # {{.*}} # %entry
422 ; X64-NEXT: callq [[CALL_TARGET:.*]]
423 ; X64-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
425 ; X64-NEXT: # =>This Inner Loop Header: Depth=1
428 ; X64-NEXT: jmp [[CAPTURE_SPEC]]
429 ; X64-NEXT: .p2align 4, 0x90
430 ; X64-NEXT: {{.*}} # Block address taken
432 ; X64-NEXT: [[CALL_TARGET]]:
433 ; X64-NEXT: movq %r11, (%rsp)
436 ; X86-LABEL: .section .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat
437 ; X86-NEXT: .hidden __llvm_retpoline_eax
438 ; X86-NEXT: .weak __llvm_retpoline_eax
439 ; X86: __llvm_retpoline_eax:
440 ; X86-NEXT: # {{.*}} # %entry
441 ; X86-NEXT: calll [[CALL_TARGET:.*]]
442 ; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
444 ; X86-NEXT: # =>This Inner Loop Header: Depth=1
447 ; X86-NEXT: jmp [[CAPTURE_SPEC]]
448 ; X86-NEXT: .p2align 4, 0x90
449 ; X86-NEXT: {{.*}} # Block address taken
451 ; X86-NEXT: [[CALL_TARGET]]:
452 ; X86-NEXT: movl %eax, (%esp)
455 ; X86-LABEL: .section .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat
456 ; X86-NEXT: .hidden __llvm_retpoline_ecx
457 ; X86-NEXT: .weak __llvm_retpoline_ecx
458 ; X86: __llvm_retpoline_ecx:
459 ; X86-NEXT: # {{.*}} # %entry
460 ; X86-NEXT: calll [[CALL_TARGET:.*]]
461 ; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
463 ; X86-NEXT: # =>This Inner Loop Header: Depth=1
466 ; X86-NEXT: jmp [[CAPTURE_SPEC]]
467 ; X86-NEXT: .p2align 4, 0x90
468 ; X86-NEXT: {{.*}} # Block address taken
470 ; X86-NEXT: [[CALL_TARGET]]:
471 ; X86-NEXT: movl %ecx, (%esp)
474 ; X86-LABEL: .section .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat
475 ; X86-NEXT: .hidden __llvm_retpoline_edx
476 ; X86-NEXT: .weak __llvm_retpoline_edx
477 ; X86: __llvm_retpoline_edx:
478 ; X86-NEXT: # {{.*}} # %entry
479 ; X86-NEXT: calll [[CALL_TARGET:.*]]
480 ; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
482 ; X86-NEXT: # =>This Inner Loop Header: Depth=1
485 ; X86-NEXT: jmp [[CAPTURE_SPEC]]
486 ; X86-NEXT: .p2align 4, 0x90
487 ; X86-NEXT: {{.*}} # Block address taken
489 ; X86-NEXT: [[CALL_TARGET]]:
490 ; X86-NEXT: movl %edx, (%esp)
493 ; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
494 ; X86-NEXT: .hidden __llvm_retpoline_edi
495 ; X86-NEXT: .weak __llvm_retpoline_edi
496 ; X86: __llvm_retpoline_edi:
497 ; X86-NEXT: # {{.*}} # %entry
498 ; X86-NEXT: calll [[CALL_TARGET:.*]]
499 ; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
501 ; X86-NEXT: # =>This Inner Loop Header: Depth=1
504 ; X86-NEXT: jmp [[CAPTURE_SPEC]]
505 ; X86-NEXT: .p2align 4, 0x90
506 ; X86-NEXT: {{.*}} # Block address taken
508 ; X86-NEXT: [[CALL_TARGET]]:
509 ; X86-NEXT: movl %edi, (%esp)
513 attributes #0 = { "target-features"="+retpoline-indirect-calls" }
514 attributes #1 = { "target-features"="+retpoline-indirect-calls,+retpoline-indirect-branches" }
515 attributes #2 = { nonlazybind }