[ARM] Adjust how NEON shifts are lowered
[llvm-core.git] / test / CodeGen / X86 / retpoline.ll
blob9a1673e8a5672ccd1785e60a8cfcc1da575d7119
1 ; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
2 ; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
4 ; RUN: llc -verify-machineinstrs -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
5 ; RUN: llc -verify-machineinstrs -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
7 declare void @bar(i32)
9 ; Test a simple indirect call and tail call.
10 define void @icall_reg(void (i32)* %fp, i32 %x) #0 {
11 entry:
12   tail call void @bar(i32 %x)
13   tail call void %fp(i32 %x)
14   tail call void @bar(i32 %x)
15   tail call void %fp(i32 %x)
16   ret void
19 ; X64-LABEL: icall_reg:
20 ; X64-DAG:   movq %rdi, %[[fp:[^ ]*]]
21 ; X64-DAG:   movl %esi, %[[x:[^ ]*]]
22 ; X64:       movl %esi, %edi
23 ; X64:       callq bar
24 ; X64-DAG:   movl %[[x]], %edi
25 ; X64-DAG:   movq %[[fp]], %r11
26 ; X64:       callq __llvm_retpoline_r11
27 ; X64:       movl %[[x]], %edi
28 ; X64:       callq bar
29 ; X64-DAG:   movl %[[x]], %edi
30 ; X64-DAG:   movq %[[fp]], %r11
31 ; X64:       jmp __llvm_retpoline_r11 # TAILCALL
33 ; X64FAST-LABEL: icall_reg:
34 ; X64FAST:       callq bar
35 ; X64FAST:       callq __llvm_retpoline_r11
36 ; X64FAST:       callq bar
37 ; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
39 ; X86-LABEL: icall_reg:
40 ; X86-DAG:   movl 12(%esp), %[[fp:[^ ]*]]
41 ; X86-DAG:   movl 16(%esp), %[[x:[^ ]*]]
42 ; X86:       pushl %[[x]]
43 ; X86:       calll bar
44 ; X86:       movl %[[fp]], %eax
45 ; X86:       pushl %[[x]]
46 ; X86:       calll __llvm_retpoline_eax
47 ; X86:       pushl %[[x]]
48 ; X86:       calll bar
49 ; X86:       movl %[[fp]], %eax
50 ; X86:       pushl %[[x]]
51 ; X86:       calll __llvm_retpoline_eax
52 ; X86-NOT:   # TAILCALL
54 ; X86FAST-LABEL: icall_reg:
55 ; X86FAST:       calll bar
56 ; X86FAST:       calll __llvm_retpoline_eax
57 ; X86FAST:       calll bar
58 ; X86FAST:       calll __llvm_retpoline_eax
61 @global_fp = external global void (i32)*
63 ; Test an indirect call through a global variable.
64 define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
65   %fp1 = load void (i32)*, void (i32)** @global_fp
66   call void %fp1(i32 %x)
67   %fp2 = load void (i32)*, void (i32)** @global_fp
68   tail call void %fp2(i32 %x)
69   ret void
72 ; X64-LABEL: icall_global_fp:
73 ; X64-DAG:   movl %edi, %[[x:[^ ]*]]
74 ; X64-DAG:   movq global_fp(%rip), %r11
75 ; X64:       callq __llvm_retpoline_r11
76 ; X64-DAG:   movl %[[x]], %edi
77 ; X64-DAG:   movq global_fp(%rip), %r11
78 ; X64:       jmp __llvm_retpoline_r11 # TAILCALL
80 ; X64FAST-LABEL: icall_global_fp:
81 ; X64FAST:       movq global_fp(%rip), %r11
82 ; X64FAST:       callq __llvm_retpoline_r11
83 ; X64FAST:       movq global_fp(%rip), %r11
84 ; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
86 ; X86-LABEL: icall_global_fp:
87 ; X86:       movl global_fp, %eax
88 ; X86:       pushl 4(%esp)
89 ; X86:       calll __llvm_retpoline_eax
90 ; X86:       addl $4, %esp
91 ; X86:       movl global_fp, %eax
92 ; X86:       jmp __llvm_retpoline_eax # TAILCALL
94 ; X86FAST-LABEL: icall_global_fp:
95 ; X86FAST:       calll __llvm_retpoline_eax
96 ; X86FAST:       jmp __llvm_retpoline_eax # TAILCALL
99 %struct.Foo = type { void (%struct.Foo*)** }
101 ; Test an indirect call through a vtable.
102 define void @vcall(%struct.Foo* %obj) #0 {
103   %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
104   %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
105   %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
106   %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
107   tail call void %fp(%struct.Foo* %obj)
108   tail call void %fp(%struct.Foo* %obj)
109   ret void
112 ; X64-LABEL: vcall:
113 ; X64:       movq %rdi, %[[obj:[^ ]*]]
114 ; X64:       movq (%rdi), %[[vptr:[^ ]*]]
115 ; X64:       movq 8(%[[vptr]]), %[[fp:[^ ]*]]
116 ; X64:       movq %[[fp]], %r11
117 ; X64:       callq __llvm_retpoline_r11
118 ; X64-DAG:   movq %[[obj]], %rdi
119 ; X64-DAG:   movq %[[fp]], %r11
120 ; X64:       jmp __llvm_retpoline_r11 # TAILCALL
122 ; X64FAST-LABEL: vcall:
123 ; X64FAST:       callq __llvm_retpoline_r11
124 ; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
126 ; X86-LABEL: vcall:
127 ; X86:       movl 8(%esp), %[[obj:[^ ]*]]
128 ; X86:       movl (%[[obj]]), %[[vptr:[^ ]*]]
129 ; X86:       movl 4(%[[vptr]]), %[[fp:[^ ]*]]
130 ; X86:       movl %[[fp]], %eax
131 ; X86:       pushl %[[obj]]
132 ; X86:       calll __llvm_retpoline_eax
133 ; X86:       addl $4, %esp
134 ; X86:       movl %[[fp]], %eax
135 ; X86:       jmp __llvm_retpoline_eax # TAILCALL
137 ; X86FAST-LABEL: vcall:
138 ; X86FAST:       calll __llvm_retpoline_eax
139 ; X86FAST:       jmp __llvm_retpoline_eax # TAILCALL
142 declare void @direct_callee()
144 define void @direct_tail() #0 {
145   tail call void @direct_callee()
146   ret void
149 ; X64-LABEL: direct_tail:
150 ; X64:       jmp direct_callee # TAILCALL
151 ; X64FAST-LABEL: direct_tail:
152 ; X64FAST:   jmp direct_callee # TAILCALL
153 ; X86-LABEL: direct_tail:
154 ; X86:       jmp direct_callee # TAILCALL
155 ; X86FAST-LABEL: direct_tail:
156 ; X86FAST:   jmp direct_callee # TAILCALL
159 declare void @nonlazybind_callee() #2
161 define void @nonlazybind_caller() #0 {
162   call void @nonlazybind_callee()
163   tail call void @nonlazybind_callee()
164   ret void
167 ; X64-LABEL: nonlazybind_caller:
168 ; X64:       movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
169 ; X64:       movq %[[REG]], %r11
170 ; X64:       callq __llvm_retpoline_r11
171 ; X64:       movq %[[REG]], %r11
172 ; X64:       jmp __llvm_retpoline_r11 # TAILCALL
173 ; X64FAST-LABEL: nonlazybind_caller:
174 ; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
175 ; X64FAST:   callq __llvm_retpoline_r11
176 ; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
177 ; X64FAST:   jmp __llvm_retpoline_r11 # TAILCALL
178 ; X86-LABEL: nonlazybind_caller:
179 ; X86:       calll nonlazybind_callee@PLT
180 ; X86:       jmp nonlazybind_callee@PLT # TAILCALL
181 ; X86FAST-LABEL: nonlazybind_caller:
182 ; X86FAST:   calll nonlazybind_callee@PLT
183 ; X86FAST:   jmp nonlazybind_callee@PLT # TAILCALL
186 ; Check that a switch gets lowered using a jump table when retpolines are only
187 ; enabled for calls.
188 define void @switch_jumptable(i32* %ptr, i64* %sink) #0 {
189 ; X64-LABEL: switch_jumptable:
190 ; X64:         jmpq *
191 ; X86-LABEL: switch_jumptable:
192 ; X86:         jmpl *
193 entry:
194   br label %header
196 header:
197   %i = load volatile i32, i32* %ptr
198   switch i32 %i, label %bb0 [
199     i32 1, label %bb1
200     i32 2, label %bb2
201     i32 3, label %bb3
202     i32 4, label %bb4
203     i32 5, label %bb5
204     i32 6, label %bb6
205     i32 7, label %bb7
206     i32 8, label %bb8
207     i32 9, label %bb9
208   ]
210 bb0:
211   store volatile i64 0, i64* %sink
212   br label %header
214 bb1:
215   store volatile i64 1, i64* %sink
216   br label %header
218 bb2:
219   store volatile i64 2, i64* %sink
220   br label %header
222 bb3:
223   store volatile i64 3, i64* %sink
224   br label %header
226 bb4:
227   store volatile i64 4, i64* %sink
228   br label %header
230 bb5:
231   store volatile i64 5, i64* %sink
232   br label %header
234 bb6:
235   store volatile i64 6, i64* %sink
236   br label %header
238 bb7:
239   store volatile i64 7, i64* %sink
240   br label %header
242 bb8:
243   store volatile i64 8, i64* %sink
244   br label %header
246 bb9:
247   store volatile i64 9, i64* %sink
248   br label %header
252 @indirectbr_preserved.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_preserved, %bb0),
253                                                      i8* blockaddress(@indirectbr_preserved, %bb1),
254                                                      i8* blockaddress(@indirectbr_preserved, %bb2),
255                                                      i8* blockaddress(@indirectbr_preserved, %bb3),
256                                                      i8* blockaddress(@indirectbr_preserved, %bb4),
257                                                      i8* blockaddress(@indirectbr_preserved, %bb5),
258                                                      i8* blockaddress(@indirectbr_preserved, %bb6),
259                                                      i8* blockaddress(@indirectbr_preserved, %bb7),
260                                                      i8* blockaddress(@indirectbr_preserved, %bb8),
261                                                      i8* blockaddress(@indirectbr_preserved, %bb9)]
263 ; Check that we preserve indirectbr when only calls are retpolined.
264 define void @indirectbr_preserved(i64* readonly %p, i64* %sink) #0 {
265 ; X64-LABEL: indirectbr_preserved:
266 ; X64:         jmpq *
267 ; X86-LABEL: indirectbr_preserved:
268 ; X86:         jmpl *
269 entry:
270   %i0 = load i64, i64* %p
271   %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_preserved.targets, i64 0, i64 %i0
272   %target0 = load i8*, i8** %target.i0
273   indirectbr i8* %target0, [label %bb1, label %bb3]
275 bb0:
276   store volatile i64 0, i64* %sink
277   br label %latch
279 bb1:
280   store volatile i64 1, i64* %sink
281   br label %latch
283 bb2:
284   store volatile i64 2, i64* %sink
285   br label %latch
287 bb3:
288   store volatile i64 3, i64* %sink
289   br label %latch
291 bb4:
292   store volatile i64 4, i64* %sink
293   br label %latch
295 bb5:
296   store volatile i64 5, i64* %sink
297   br label %latch
299 bb6:
300   store volatile i64 6, i64* %sink
301   br label %latch
303 bb7:
304   store volatile i64 7, i64* %sink
305   br label %latch
307 bb8:
308   store volatile i64 8, i64* %sink
309   br label %latch
311 bb9:
312   store volatile i64 9, i64* %sink
313   br label %latch
315 latch:
316   %i.next = load i64, i64* %p
317   %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_preserved.targets, i64 0, i64 %i.next
318   %target.next = load i8*, i8** %target.i.next
319   ; Potentially hit a full 10 successors here so that even if we rewrite as
320   ; a switch it will try to be lowered with a jump table.
321   indirectbr i8* %target.next, [label %bb0,
322                                 label %bb1,
323                                 label %bb2,
324                                 label %bb3,
325                                 label %bb4,
326                                 label %bb5,
327                                 label %bb6,
328                                 label %bb7,
329                                 label %bb8,
330                                 label %bb9]
333 @indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0),
334                                                    i8* blockaddress(@indirectbr_rewrite, %bb1),
335                                                    i8* blockaddress(@indirectbr_rewrite, %bb2),
336                                                    i8* blockaddress(@indirectbr_rewrite, %bb3),
337                                                    i8* blockaddress(@indirectbr_rewrite, %bb4),
338                                                    i8* blockaddress(@indirectbr_rewrite, %bb5),
339                                                    i8* blockaddress(@indirectbr_rewrite, %bb6),
340                                                    i8* blockaddress(@indirectbr_rewrite, %bb7),
341                                                    i8* blockaddress(@indirectbr_rewrite, %bb8),
342                                                    i8* blockaddress(@indirectbr_rewrite, %bb9)]
344 ; Check that when retpolines are enabled for indirect branches the indirectbr
345 ; instruction gets rewritten to use switch, and that in turn doesn't get lowered
346 ; as a jump table.
347 define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #1 {
348 ; X64-LABEL: indirectbr_rewrite:
349 ; X64-NOT:     jmpq
350 ; X86-LABEL: indirectbr_rewrite:
351 ; X86-NOT:     jmpl
352 entry:
353   %i0 = load i64, i64* %p
354   %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0
355   %target0 = load i8*, i8** %target.i0
356   indirectbr i8* %target0, [label %bb1, label %bb3]
358 bb0:
359   store volatile i64 0, i64* %sink
360   br label %latch
362 bb1:
363   store volatile i64 1, i64* %sink
364   br label %latch
366 bb2:
367   store volatile i64 2, i64* %sink
368   br label %latch
370 bb3:
371   store volatile i64 3, i64* %sink
372   br label %latch
374 bb4:
375   store volatile i64 4, i64* %sink
376   br label %latch
378 bb5:
379   store volatile i64 5, i64* %sink
380   br label %latch
382 bb6:
383   store volatile i64 6, i64* %sink
384   br label %latch
386 bb7:
387   store volatile i64 7, i64* %sink
388   br label %latch
390 bb8:
391   store volatile i64 8, i64* %sink
392   br label %latch
394 bb9:
395   store volatile i64 9, i64* %sink
396   br label %latch
398 latch:
399   %i.next = load i64, i64* %p
400   %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next
401   %target.next = load i8*, i8** %target.i.next
402   ; Potentially hit a full 10 successors here so that even if we rewrite as
403   ; a switch it will try to be lowered with a jump table.
404   indirectbr i8* %target.next, [label %bb0,
405                                 label %bb1,
406                                 label %bb2,
407                                 label %bb3,
408                                 label %bb4,
409                                 label %bb5,
410                                 label %bb6,
411                                 label %bb7,
412                                 label %bb8,
413                                 label %bb9]
416 ; Lastly check that the necessary thunks were emitted.
418 ; X64-LABEL:         .section        .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat
419 ; X64-NEXT:          .hidden __llvm_retpoline_r11
420 ; X64-NEXT:          .weak   __llvm_retpoline_r11
421 ; X64:       __llvm_retpoline_r11:
422 ; X64-NEXT:  # {{.*}}                                # %entry
423 ; X64-NEXT:          callq   [[CALL_TARGET:.*]]
424 ; X64-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
425 ; X64-NEXT:                                          # %entry
426 ; X64-NEXT:                                          # =>This Inner Loop Header: Depth=1
427 ; X64-NEXT:          pause
428 ; X64-NEXT:          lfence
429 ; X64-NEXT:          jmp     [[CAPTURE_SPEC]]
430 ; X64-NEXT:          .p2align        4, 0x90
431 ; X64-NEXT:  {{.*}}                                  # Block address taken
432 ; X64-NEXT:                                          # %entry
433 ; X64-NEXT:  [[CALL_TARGET]]:
434 ; X64-NEXT:          movq    %r11, (%rsp)
435 ; X64-NEXT:          retq
437 ; X86-LABEL:         .section        .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat
438 ; X86-NEXT:          .hidden __llvm_retpoline_eax
439 ; X86-NEXT:          .weak   __llvm_retpoline_eax
440 ; X86:       __llvm_retpoline_eax:
441 ; X86-NEXT:  # {{.*}}                                # %entry
442 ; X86-NEXT:          calll   [[CALL_TARGET:.*]]
443 ; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
444 ; X86-NEXT:                                          # %entry
445 ; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
446 ; X86-NEXT:          pause
447 ; X86-NEXT:          lfence
448 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
449 ; X86-NEXT:          .p2align        4, 0x90
450 ; X86-NEXT:  {{.*}}                                  # Block address taken
451 ; X86-NEXT:                                          # %entry
452 ; X86-NEXT:  [[CALL_TARGET]]:
453 ; X86-NEXT:          movl    %eax, (%esp)
454 ; X86-NEXT:          retl
456 ; X86-LABEL:         .section        .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat
457 ; X86-NEXT:          .hidden __llvm_retpoline_ecx
458 ; X86-NEXT:          .weak   __llvm_retpoline_ecx
459 ; X86:       __llvm_retpoline_ecx:
460 ; X86-NEXT:  # {{.*}}                                # %entry
461 ; X86-NEXT:          calll   [[CALL_TARGET:.*]]
462 ; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
463 ; X86-NEXT:                                          # %entry
464 ; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
465 ; X86-NEXT:          pause
466 ; X86-NEXT:          lfence
467 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
468 ; X86-NEXT:          .p2align        4, 0x90
469 ; X86-NEXT:  {{.*}}                                  # Block address taken
470 ; X86-NEXT:                                          # %entry
471 ; X86-NEXT:  [[CALL_TARGET]]:
472 ; X86-NEXT:          movl    %ecx, (%esp)
473 ; X86-NEXT:          retl
475 ; X86-LABEL:         .section        .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat
476 ; X86-NEXT:          .hidden __llvm_retpoline_edx
477 ; X86-NEXT:          .weak   __llvm_retpoline_edx
478 ; X86:       __llvm_retpoline_edx:
479 ; X86-NEXT:  # {{.*}}                                # %entry
480 ; X86-NEXT:          calll   [[CALL_TARGET:.*]]
481 ; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
482 ; X86-NEXT:                                          # %entry
483 ; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
484 ; X86-NEXT:          pause
485 ; X86-NEXT:          lfence
486 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
487 ; X86-NEXT:          .p2align        4, 0x90
488 ; X86-NEXT:  {{.*}}                                  # Block address taken
489 ; X86-NEXT:                                          # %entry
490 ; X86-NEXT:  [[CALL_TARGET]]:
491 ; X86-NEXT:          movl    %edx, (%esp)
492 ; X86-NEXT:          retl
494 ; X86-LABEL:         .section        .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
495 ; X86-NEXT:          .hidden __llvm_retpoline_edi
496 ; X86-NEXT:          .weak   __llvm_retpoline_edi
497 ; X86:       __llvm_retpoline_edi:
498 ; X86-NEXT:  # {{.*}}                                # %entry
499 ; X86-NEXT:          calll   [[CALL_TARGET:.*]]
500 ; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
501 ; X86-NEXT:                                          # %entry
502 ; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
503 ; X86-NEXT:          pause
504 ; X86-NEXT:          lfence
505 ; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
506 ; X86-NEXT:          .p2align        4, 0x90
507 ; X86-NEXT:  {{.*}}                                  # Block address taken
508 ; X86-NEXT:                                          # %entry
509 ; X86-NEXT:  [[CALL_TARGET]]:
510 ; X86-NEXT:          movl    %edi, (%esp)
511 ; X86-NEXT:          retl
514 attributes #0 = { "target-features"="+retpoline-indirect-calls" }
515 attributes #1 = { "target-features"="+retpoline-indirect-calls,+retpoline-indirect-branches" }
516 attributes #2 = { nonlazybind }