1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3 ; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
4 ; RUN: < %s | FileCheck %s
6 ; On future CPU with PC Relative addressing enabled, it is possible for the
7 ; linker to optimize GOT indirect accesses. In order for the linker to do this
8 ; the compiler needs to add a hint using the R_PPC64_PCREL_OPT relocation.
9 ; This test checks that the compiler adds the R_PPC64_PCREL_OPT relocation
12 @input8 = external local_unnamed_addr global i8, align 1
13 @output8 = external local_unnamed_addr global i8, align 1
14 @input16 = external local_unnamed_addr global i16, align 2
15 @output16 = external local_unnamed_addr global i16, align 2
16 @input32 = external global i32, align 4
17 @output32 = external local_unnamed_addr global i32, align 4
18 @input64 = external local_unnamed_addr global i64, align 8
19 @output64 = external local_unnamed_addr global i64, align 8
20 @input128 = external local_unnamed_addr global i128, align 16
21 @output128 = external local_unnamed_addr global i128, align 16
22 @inputf32 = external local_unnamed_addr global float, align 4
23 @outputf32 = external local_unnamed_addr global float, align 4
24 @inputf64 = external local_unnamed_addr global double, align 8
25 @outputf64 = external local_unnamed_addr global double, align 8
26 @inputVi32 = external local_unnamed_addr global <4 x i32>, align 16
27 @outputVi32 = external local_unnamed_addr global <4 x i32>, align 16
28 @inputVi64 = external local_unnamed_addr global <2 x i64>, align 16
29 @outputVi64 = external local_unnamed_addr global <2 x i64>, align 16
30 @ArrayIn = external global [10 x i32], align 4
31 @ArrayOut = external local_unnamed_addr global [10 x i32], align 4
32 @IntPtrIn = external local_unnamed_addr global ptr, align 8
33 @IntPtrOut = external local_unnamed_addr global ptr, align 8
34 @FuncPtrIn = external local_unnamed_addr global ptr, align 8
35 @FuncPtrOut = external local_unnamed_addr global ptr, align 8
37 define dso_local void @ReadWrite8() local_unnamed_addr #0 {
38 ; In this test the stb r3, 0(r4) cannot be optimized because it
39 ; uses the register r3 and that register is defined by lbz r3, 0(r3)
40 ; which is defined between the pld and the stb.
41 ; CHECK-LABEL: ReadWrite8:
42 ; CHECK: # %bb.0: # %entry
43 ; CHECK-NEXT: pld r3, input8@got@pcrel(0), 1
44 ; CHECK-NEXT: .Lpcrel0:
45 ; CHECK-NEXT: pld r4, output8@got@pcrel(0), 1
46 ; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
47 ; CHECK-NEXT: lbz r3, 0(r3)
48 ; CHECK-NEXT: stb r3, 0(r4)
51 %0 = load i8, ptr @input8, align 1
52 store i8 %0, ptr @output8, align 1
56 define dso_local void @ReadWrite16() local_unnamed_addr #0 {
57 ; In this test the sth r3, 0(r4) cannot be optimized because it
58 ; uses the register r3 and that register is defined by lhz r3, 0(r3)
59 ; which is defined between the pld and the sth.
60 ; CHECK-LABEL: ReadWrite16:
61 ; CHECK: # %bb.0: # %entry
62 ; CHECK-NEXT: pld r3, input16@got@pcrel(0), 1
63 ; CHECK-NEXT: .Lpcrel1:
64 ; CHECK-NEXT: pld r4, output16@got@pcrel(0), 1
65 ; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
66 ; CHECK-NEXT: lhz r3, 0(r3)
67 ; CHECK-NEXT: sth r3, 0(r4)
70 %0 = load i16, ptr @input16, align 2
71 store i16 %0, ptr @output16, align 2
75 define dso_local void @ReadWrite32() local_unnamed_addr #0 {
76 ; CHECK-LABEL: ReadWrite32:
77 ; CHECK: # %bb.0: # %entry
78 ; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1
79 ; CHECK-NEXT: .Lpcrel2:
80 ; CHECK-NEXT: pld r4, output32@got@pcrel(0), 1
81 ; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
82 ; CHECK-NEXT: lwz r3, 0(r3)
83 ; CHECK-NEXT: stw r3, 0(r4)
86 %0 = load i32, ptr @input32, align 4
87 store i32 %0, ptr @output32, align 4
91 define dso_local void @ReadWrite64() local_unnamed_addr #0 {
92 ; CHECK-LABEL: ReadWrite64:
93 ; CHECK: # %bb.0: # %entry
94 ; CHECK-NEXT: pld r3, input64@got@pcrel(0), 1
95 ; CHECK-NEXT: .Lpcrel3:
96 ; CHECK-NEXT: pld r4, output64@got@pcrel(0), 1
97 ; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
98 ; CHECK-NEXT: ld r3, 0(r3)
99 ; CHECK-NEXT: std r3, 0(r4)
102 %0 = load i64, ptr @input64, align 8
103 store i64 %0, ptr @output64, align 8
107 ; FIXME: we should always convert X-Form instructions that use
108 ; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
109 define dso_local void @ReadWrite128() local_unnamed_addr #0 {
110 ; CHECK-LABEL: ReadWrite128:
111 ; CHECK: # %bb.0: # %entry
112 ; CHECK-NEXT: pld r3, input128@got@pcrel(0), 1
113 ; CHECK-NEXT: .Lpcrel4:
114 ; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
115 ; CHECK-NEXT: lxv vs0, 0(r3)
116 ; CHECK-NEXT: pld r3, output128@got@pcrel(0), 1
117 ; CHECK-NEXT: .Lpcrel5:
118 ; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
119 ; CHECK-NEXT: stxv vs0, 0(r3)
122 %0 = load i128, ptr @input128, align 16
123 store i128 %0, ptr @output128, align 16
127 define dso_local void @ReadWritef32() local_unnamed_addr #0 {
128 ; CHECK-LABEL: ReadWritef32:
129 ; CHECK: # %bb.0: # %entry
130 ; CHECK-NEXT: pld r3, inputf32@got@pcrel(0), 1
131 ; CHECK-NEXT: .Lpcrel6:
132 ; CHECK-NEXT: xxspltidp vs1, 1078103900
133 ; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
134 ; CHECK-NEXT: lfs f0, 0(r3)
135 ; CHECK-NEXT: pld r3, outputf32@got@pcrel(0), 1
136 ; CHECK-NEXT: xsaddsp f0, f0, f1
137 ; CHECK-NEXT: stfs f0, 0(r3)
140 %0 = load float, ptr @inputf32, align 4
141 %add = fadd float %0, 0x400851EB80000000
142 store float %add, ptr @outputf32, align 4
146 define dso_local void @ReadWritef64() local_unnamed_addr #0 {
147 ; CHECK-LABEL: ReadWritef64:
148 ; CHECK: # %bb.0: # %entry
149 ; CHECK-NEXT: pld r3, inputf64@got@pcrel(0), 1
150 ; CHECK-NEXT: .Lpcrel7:
151 ; CHECK-NEXT: xxsplti32dx vs1, 0, 1075524403
152 ; CHECK-NEXT: xxsplti32dx vs1, 1, 858993459
153 ; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
154 ; CHECK-NEXT: lfd f0, 0(r3)
155 ; CHECK-NEXT: pld r3, outputf64@got@pcrel(0), 1
156 ; CHECK-NEXT: xsadddp f0, f0, f1
157 ; CHECK-NEXT: stfd f0, 0(r3)
160 %0 = load double, ptr @inputf64, align 8
161 %add = fadd double %0, 6.800000e+00
162 store double %add, ptr @outputf64, align 8
166 ; FIXME: we should always convert X-Form instructions that use
167 ; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
168 define dso_local void @ReadWriteVi32() local_unnamed_addr #0 {
169 ; CHECK-LABEL: ReadWriteVi32:
170 ; CHECK: # %bb.0: # %entry
171 ; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1
172 ; CHECK-NEXT: .Lpcrel8:
173 ; CHECK-NEXT: li r4, 45
174 ; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
175 ; CHECK-NEXT: lxv v2, 0(r3)
176 ; CHECK-NEXT: pld r3, outputVi32@got@pcrel(0), 1
177 ; CHECK-NEXT: vinsw v2, r4, 8
178 ; CHECK-NEXT: stxv v2, 0(r3)
181 %0 = load <4 x i32>, ptr @inputVi32, align 16
182 %vecins = insertelement <4 x i32> %0, i32 45, i32 1
183 store <4 x i32> %vecins, ptr @outputVi32, align 16
187 define dso_local void @ReadWriteVi64() local_unnamed_addr #0 {
188 ; CHECK-LABEL: ReadWriteVi64:
189 ; CHECK: # %bb.0: # %entry
190 ; CHECK-NEXT: pld r3, inputVi64@got@pcrel(0), 1
191 ; CHECK-NEXT: .Lpcrel9:
192 ; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8)
193 ; CHECK-NEXT: lxv vs0, 0(r3)
194 ; CHECK-NEXT: pld r3, outputVi64@got@pcrel(0), 1
195 ; CHECK-NEXT: .Lpcrel10:
196 ; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
197 ; CHECK-NEXT: stxv vs0, 0(r3)
200 %0 = load <2 x i64>, ptr @inputVi64, align 16
201 store <2 x i64> %0, ptr @outputVi64, align 16
205 define dso_local void @ReadWriteArray() local_unnamed_addr #0 {
206 ; CHECK-LABEL: ReadWriteArray:
207 ; CHECK: # %bb.0: # %entry
208 ; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1
209 ; CHECK-NEXT: .Lpcrel11:
210 ; CHECK-NEXT: pld r4, ArrayOut@got@pcrel(0), 1
211 ; CHECK-NEXT: .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8)
212 ; CHECK-NEXT: lwz r3, 28(r3)
213 ; CHECK-NEXT: addi r3, r3, 42
214 ; CHECK-NEXT: stw r3, 8(r4)
217 %0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 7), align 4
218 %add = add nsw i32 %0, 42
219 store i32 %add, ptr getelementptr inbounds ([10 x i32], ptr @ArrayOut, i64 0, i64 2), align 4
223 define dso_local void @ReadWriteSameArray() local_unnamed_addr #0 {
224 ; CHECK-LABEL: ReadWriteSameArray:
225 ; CHECK: # %bb.0: # %entry
226 ; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1
227 ; CHECK-NEXT: lwz r4, 12(r3)
228 ; CHECK-NEXT: addi r4, r4, 8
229 ; CHECK-NEXT: stw r4, 24(r3)
232 %0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 3), align 4
233 %add = add nsw i32 %0, 8
234 store i32 %add, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 6), align 4
238 define dso_local void @ReadWriteIntPtr() local_unnamed_addr #0 {
239 ; CHECK-LABEL: ReadWriteIntPtr:
240 ; CHECK: # %bb.0: # %entry
241 ; CHECK-NEXT: pld r3, IntPtrIn@got@pcrel(0), 1
242 ; CHECK-NEXT: .Lpcrel12:
243 ; CHECK-NEXT: pld r4, IntPtrOut@got@pcrel(0), 1
244 ; CHECK-NEXT: .Lpcrel13:
245 ; CHECK-NEXT: .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8)
246 ; CHECK-NEXT: ld r3, 0(r3)
247 ; CHECK-NEXT: .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8)
248 ; CHECK-NEXT: ld r4, 0(r4)
249 ; CHECK-NEXT: lwz r5, 216(r3)
250 ; CHECK-NEXT: lwz r3, 48(r3)
251 ; CHECK-NEXT: add r3, r3, r5
252 ; CHECK-NEXT: stw r3, 136(r4)
255 %0 = load ptr, ptr @IntPtrIn, align 8
256 %arrayidx = getelementptr inbounds i32, ptr %0, i64 54
257 %1 = load i32, ptr %arrayidx, align 4
258 %arrayidx1 = getelementptr inbounds i32, ptr %0, i64 12
259 %2 = load i32, ptr %arrayidx1, align 4
260 %add = add nsw i32 %2, %1
261 %3 = load ptr, ptr @IntPtrOut, align 8
262 %arrayidx2 = getelementptr inbounds i32, ptr %3, i64 34
263 store i32 %add, ptr %arrayidx2, align 4
267 define dso_local void @ReadWriteFuncPtr() local_unnamed_addr #0 {
268 ; CHECK-LABEL: ReadWriteFuncPtr:
269 ; CHECK: # %bb.0: # %entry
270 ; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1
271 ; CHECK-NEXT: .Lpcrel14:
272 ; CHECK-NEXT: pld r4, FuncPtrOut@got@pcrel(0), 1
273 ; CHECK-NEXT: .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8)
274 ; CHECK-NEXT: ld r3, 0(r3)
275 ; CHECK-NEXT: std r3, 0(r4)
278 %0 = load i64, ptr @FuncPtrIn, align 8
279 store i64 %0, ptr @FuncPtrOut, align 8
283 define dso_local void @FuncPtrCopy() local_unnamed_addr #0 {
284 ; CHECK-LABEL: FuncPtrCopy:
285 ; CHECK: # %bb.0: # %entry
286 ; CHECK-NEXT: pld r3, FuncPtrOut@got@pcrel(0), 1
287 ; CHECK-NEXT: pld r4, Callee@got@pcrel(0), 1
288 ; CHECK-NEXT: std r4, 0(r3)
291 store ptr @Callee, ptr @FuncPtrOut, align 8
295 declare void @Callee(...)
297 define dso_local void @FuncPtrCall() local_unnamed_addr #0 {
298 ; CHECK-LABEL: FuncPtrCall:
299 ; CHECK: # %bb.0: # %entry
300 ; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1
301 ; CHECK-NEXT: .Lpcrel15:
302 ; CHECK-NEXT: .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8)
303 ; CHECK-NEXT: ld r12, 0(r3)
304 ; CHECK-NEXT: mtctr r12
306 ; CHECK-NEXT: #TC_RETURNr8 ctr 0
308 %0 = load ptr, ptr @FuncPtrIn, align 8
313 define dso_local signext i32 @ReadVecElement() local_unnamed_addr #0 {
314 ; CHECK-LABEL: ReadVecElement:
315 ; CHECK: # %bb.0: # %entry
316 ; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1
317 ; CHECK-NEXT: .Lpcrel16:
318 ; CHECK-NEXT: .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8)
319 ; CHECK-NEXT: lwa r3, 4(r3)
322 %0 = load <4 x i32>, ptr @inputVi32, align 16
323 %vecext = extractelement <4 x i32> %0, i32 1
327 define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 {
328 ; CHECK-LABEL: VecMultiUse:
329 ; CHECK: # %bb.0: # %entry
330 ; CHECK-NEXT: mflr r0
331 ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
332 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
333 ; CHECK-NEXT: std r0, 16(r1)
334 ; CHECK-NEXT: stdu r1, -64(r1)
335 ; CHECK-NEXT: pld r30, inputVi32@got@pcrel(0), 1
336 ; CHECK-NEXT: lwz r29, 4(r30)
337 ; CHECK-NEXT: bl Callee@notoc
338 ; CHECK-NEXT: lwz r3, 8(r30)
339 ; CHECK-NEXT: add r29, r3, r29
340 ; CHECK-NEXT: bl Callee@notoc
341 ; CHECK-NEXT: lwz r3, 0(r30)
342 ; CHECK-NEXT: add r3, r29, r3
343 ; CHECK-NEXT: extsw r3, r3
344 ; CHECK-NEXT: addi r1, r1, 64
345 ; CHECK-NEXT: ld r0, 16(r1)
346 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
347 ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
348 ; CHECK-NEXT: mtlr r0
351 %0 = load <4 x i32>, ptr @inputVi32, align 16
352 tail call void @Callee()
353 %1 = load <4 x i32>, ptr @inputVi32, align 16
354 %2 = extractelement <4 x i32> %1, i32 2
355 %3 = extractelement <4 x i32> %0, i64 1
356 %4 = add nsw i32 %2, %3
357 tail call void @Callee()
358 %5 = load <4 x i32>, ptr @inputVi32, align 16
359 %vecext2 = extractelement <4 x i32> %5, i32 0
360 %add3 = add nsw i32 %4, %vecext2
364 define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 {
365 ; CHECK-LABEL: UseAddr:
366 ; CHECK: # %bb.0: # %entry
367 ; CHECK-NEXT: mflr r0
368 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
369 ; CHECK-NEXT: std r0, 16(r1)
370 ; CHECK-NEXT: stdu r1, -48(r1)
371 ; CHECK-NEXT: pld r4, ArrayIn@got@pcrel(0), 1
372 ; CHECK-NEXT: lwz r5, 16(r4)
373 ; CHECK-NEXT: add r30, r5, r3
374 ; CHECK-NEXT: mr r3, r4
375 ; CHECK-NEXT: bl getAddr@notoc
376 ; CHECK-NEXT: add r3, r30, r3
377 ; CHECK-NEXT: extsw r3, r3
378 ; CHECK-NEXT: addi r1, r1, 48
379 ; CHECK-NEXT: ld r0, 16(r1)
380 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
381 ; CHECK-NEXT: mtlr r0
384 %0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 4), align 4
385 %add = add nsw i32 %0, %a
386 %call = tail call signext i32 @getAddr(ptr @ArrayIn)
387 %add1 = add nsw i32 %add, %call
391 declare signext i32 @getAddr(ptr) local_unnamed_addr
393 define dso_local nonnull ptr @AddrTaken32() local_unnamed_addr #0 {
394 ; CHECK-LABEL: AddrTaken32:
395 ; CHECK: # %bb.0: # %entry
396 ; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1
402 attributes #0 = { nounwind }