1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s
5 target triple = "x86_64-pc-linux-gnu"
7 ; Can we lower a single vector?
8 define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" {
10 ; CHECK: # %bb.0: # %entry
11 ; CHECK-NEXT: subq $24, %rsp
12 ; CHECK-NEXT: .cfi_def_cfa_offset 32
13 ; CHECK-NEXT: movaps %xmm0, (%rsp)
14 ; CHECK-NEXT: callq do_safepoint
16 ; CHECK-NEXT: movaps (%rsp), %xmm0
17 ; CHECK-NEXT: addq $24, %rsp
18 ; CHECK-NEXT: .cfi_def_cfa_offset 8
21 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj)
22 %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
23 ret <2 x i8 addrspace(1)*> %obj.relocated
26 ; Can we lower the base, derived pairs if both are vectors?
27 define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" {
29 ; CHECK: # %bb.0: # %entry
30 ; CHECK-NEXT: subq $40, %rsp
31 ; CHECK-NEXT: .cfi_def_cfa_offset 48
32 ; CHECK-NEXT: movq %rdi, %xmm1
33 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
34 ; CHECK-NEXT: paddq %xmm0, %xmm1
35 ; CHECK-NEXT: movdqa %xmm0, {{[0-9]+}}(%rsp)
36 ; CHECK-NEXT: movdqa %xmm1, (%rsp)
37 ; CHECK-NEXT: callq do_safepoint
39 ; CHECK-NEXT: movaps (%rsp), %xmm0
40 ; CHECK-NEXT: addq $40, %rsp
41 ; CHECK-NEXT: .cfi_def_cfa_offset 8
44 %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset
45 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived)
46 %derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 8) ; (%obj, %derived)
47 ret <2 x i8 addrspace(1)*> %derived.relocated
50 ; Originally, this was just a variant of @test2 above, but it ends up
51 ; covering a bunch of interesting missed optimizations. Specifically:
52 ; - We waste a stack slot for a value that a backend transform pass
53 ; CSEd to another spilled one.
54 ; - We don't remove the testb even though it serves no purpose
55 ; - We could in principal reuse the argument memory (%rsi) and do away
56 ; with stack slots entirely.
57 define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
59 ; CHECK: # %bb.0: # %entry
60 ; CHECK-NEXT: subq $40, %rsp
61 ; CHECK-NEXT: .cfi_def_cfa_offset 48
62 ; CHECK-NEXT: testb $1, %dil
63 ; CHECK-NEXT: movaps (%rsi), %xmm0
64 ; CHECK-NEXT: movaps %xmm0, (%rsp)
65 ; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
66 ; CHECK-NEXT: callq do_safepoint
68 ; CHECK-NEXT: movaps (%rsp), %xmm0
69 ; CHECK-NEXT: addq $40, %rsp
70 ; CHECK-NEXT: .cfi_def_cfa_offset 8
73 br i1 %cnd, label %taken, label %untaken
75 taken: ; preds = %entry
76 %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
79 untaken: ; preds = %entry
80 %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
83 merge: ; preds = %untaken, %taken
84 %obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
85 %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
86 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base)
87 %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 7) ; (%obj.base, %obj)
88 %obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*>
89 %obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 8) ; (%obj.base, %obj.base)
90 %obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*>
91 ret <2 x i64 addrspace(1)*> %obj.relocated.casted
94 ; Can we handle vector constants? At the moment, we don't appear to actually
95 ; get selection dag nodes for these.
96 define <2 x i8 addrspace(1)*> @test4() gc "statepoint-example" {
98 ; CHECK: # %bb.0: # %entry
99 ; CHECK-NEXT: subq $24, %rsp
100 ; CHECK-NEXT: .cfi_def_cfa_offset 32
101 ; CHECK-NEXT: xorps %xmm0, %xmm0
102 ; CHECK-NEXT: movaps %xmm0, (%rsp)
103 ; CHECK-NEXT: callq do_safepoint
104 ; CHECK-NEXT: .Ltmp3:
105 ; CHECK-NEXT: movaps (%rsp), %xmm0
106 ; CHECK-NEXT: addq $24, %rsp
107 ; CHECK-NEXT: .cfi_def_cfa_offset 8
110 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> zeroinitializer)
111 %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
112 ret <2 x i8 addrspace(1)*> %obj.relocated
115 ; Check that we can lower a constant typed as i128 correctly. Note that the
116 ; actual value is representable in 64 bits. We don't have a representation
117 ; of larger than 64 bit constant in the StackMap format.
118 define void @test5() gc "statepoint-example" {
119 ; CHECK-LABEL: test5:
120 ; CHECK: # %bb.0: # %entry
121 ; CHECK-NEXT: pushq %rax
122 ; CHECK-NEXT: .cfi_def_cfa_offset 16
123 ; CHECK-NEXT: callq do_safepoint
124 ; CHECK-NEXT: .Ltmp4:
125 ; CHECK-NEXT: popq %rax
126 ; CHECK-NEXT: .cfi_def_cfa_offset 8
129 %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 1, i128 0)
133 ; CHECK: __LLVM_StackMaps:
136 ; Check for the two spill slots
137 ; Stack Maps: Loc 3: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
138 ; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
152 ; CHECK: .Ltmp1-test2
153 ; Check for the two spill slots
154 ; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
155 ; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
169 ; CHECK: .Ltmp2-test3
170 ; Check for the four spill slots
171 ; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
172 ; Stack Maps: Loc 4: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
173 ; Stack Maps: Loc 5: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
174 ; Stack Maps: Loc 6: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
200 declare void @do_safepoint()
202 declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
203 declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
204 declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)