1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s
4 define i32 @ldp_int(i32* %p) nounwind {
5 ; CHECK-LABEL: ldp_int:
7 ; CHECK-NEXT: ldp w8, w9, [x0]
8 ; CHECK-NEXT: add w0, w9, w8
10 %tmp = load i32, i32* %p, align 4
11 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
12 %tmp1 = load i32, i32* %add.ptr, align 4
13 %add = add nsw i32 %tmp1, %tmp
17 define i64 @ldp_sext_int(i32* %p) nounwind {
18 ; CHECK-LABEL: ldp_sext_int:
20 ; CHECK-NEXT: ldpsw x8, x9, [x0]
21 ; CHECK-NEXT: add x0, x9, x8
23 %tmp = load i32, i32* %p, align 4
24 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
25 %tmp1 = load i32, i32* %add.ptr, align 4
26 %sexttmp = sext i32 %tmp to i64
27 %sexttmp1 = sext i32 %tmp1 to i64
28 %add = add nsw i64 %sexttmp1, %sexttmp
32 define i64 @ldp_half_sext_res0_int(i32* %p) nounwind {
33 ; CHECK-LABEL: ldp_half_sext_res0_int:
35 ; CHECK-NEXT: ldp w8, w9, [x0]
36 ; CHECK-NEXT: // kill: def $w8 killed $w8 def $x8
37 ; CHECK-NEXT: sxtw x8, w8
38 ; CHECK-NEXT: add x0, x9, x8
40 %tmp = load i32, i32* %p, align 4
41 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
42 %tmp1 = load i32, i32* %add.ptr, align 4
43 %sexttmp = sext i32 %tmp to i64
44 %sexttmp1 = zext i32 %tmp1 to i64
45 %add = add nsw i64 %sexttmp1, %sexttmp
49 define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
50 ; CHECK-LABEL: ldp_half_sext_res1_int:
52 ; CHECK-NEXT: ldp w8, w9, [x0]
53 ; CHECK-NEXT: // kill: def $w9 killed $w9 def $x9
54 ; CHECK-NEXT: sxtw x9, w9
55 ; CHECK-NEXT: add x0, x9, x8
57 %tmp = load i32, i32* %p, align 4
58 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
59 %tmp1 = load i32, i32* %add.ptr, align 4
60 %sexttmp = zext i32 %tmp to i64
61 %sexttmp1 = sext i32 %tmp1 to i64
62 %add = add nsw i64 %sexttmp1, %sexttmp
67 define i64 @ldp_long(i64* %p) nounwind {
68 ; CHECK-LABEL: ldp_long:
70 ; CHECK-NEXT: ldp x8, x9, [x0]
71 ; CHECK-NEXT: add x0, x9, x8
73 %tmp = load i64, i64* %p, align 8
74 %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
75 %tmp1 = load i64, i64* %add.ptr, align 8
76 %add = add nsw i64 %tmp1, %tmp
80 define float @ldp_float(float* %p) nounwind {
81 ; CHECK-LABEL: ldp_float:
83 ; CHECK-NEXT: ldp s0, s1, [x0]
84 ; CHECK-NEXT: fadd s0, s0, s1
86 %tmp = load float, float* %p, align 4
87 %add.ptr = getelementptr inbounds float, float* %p, i64 1
88 %tmp1 = load float, float* %add.ptr, align 4
89 %add = fadd float %tmp, %tmp1
93 define double @ldp_double(double* %p) nounwind {
94 ; CHECK-LABEL: ldp_double:
96 ; CHECK-NEXT: ldp d0, d1, [x0]
97 ; CHECK-NEXT: fadd d0, d0, d1
99 %tmp = load double, double* %p, align 8
100 %add.ptr = getelementptr inbounds double, double* %p, i64 1
101 %tmp1 = load double, double* %add.ptr, align 8
102 %add = fadd double %tmp, %tmp1
106 define <2 x double> @ldp_doublex2(<2 x double>* %p) nounwind {
107 ; CHECK-LABEL: ldp_doublex2:
109 ; CHECK-NEXT: ldp q0, q1, [x0]
110 ; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
112 %tmp = load <2 x double>, <2 x double>* %p, align 16
113 %add.ptr = getelementptr inbounds <2 x double>, <2 x double>* %p, i64 1
114 %tmp1 = load <2 x double>, <2 x double>* %add.ptr, align 16
115 %add = fadd <2 x double> %tmp, %tmp1
116 ret <2 x double> %add
119 ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
120 define i32 @ldur_int(i32* %a) nounwind {
121 ; CHECK-LABEL: ldur_int:
123 ; CHECK-NEXT: ldp w9, w8, [x0, #-8]
124 ; CHECK-NEXT: add w0, w8, w9
126 %p1 = getelementptr inbounds i32, i32* %a, i32 -1
127 %tmp1 = load i32, i32* %p1, align 2
128 %p2 = getelementptr inbounds i32, i32* %a, i32 -2
129 %tmp2 = load i32, i32* %p2, align 2
130 %tmp3 = add i32 %tmp1, %tmp2
134 define i64 @ldur_sext_int(i32* %a) nounwind {
135 ; CHECK-LABEL: ldur_sext_int:
137 ; CHECK-NEXT: ldpsw x9, x8, [x0, #-8]
138 ; CHECK-NEXT: add x0, x8, x9
140 %p1 = getelementptr inbounds i32, i32* %a, i32 -1
141 %tmp1 = load i32, i32* %p1, align 2
142 %p2 = getelementptr inbounds i32, i32* %a, i32 -2
143 %tmp2 = load i32, i32* %p2, align 2
144 %sexttmp1 = sext i32 %tmp1 to i64
145 %sexttmp2 = sext i32 %tmp2 to i64
146 %tmp3 = add i64 %sexttmp1, %sexttmp2
150 define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
151 ; CHECK-LABEL: ldur_half_sext_int_res0:
153 ; CHECK-NEXT: ldp w9, w8, [x0, #-8]
154 ; CHECK-NEXT: // kill: def $w9 killed $w9 def $x9
155 ; CHECK-NEXT: sxtw x9, w9
156 ; CHECK-NEXT: add x0, x8, x9
158 %p1 = getelementptr inbounds i32, i32* %a, i32 -1
159 %tmp1 = load i32, i32* %p1, align 2
160 %p2 = getelementptr inbounds i32, i32* %a, i32 -2
161 %tmp2 = load i32, i32* %p2, align 2
162 %sexttmp1 = zext i32 %tmp1 to i64
163 %sexttmp2 = sext i32 %tmp2 to i64
164 %tmp3 = add i64 %sexttmp1, %sexttmp2
168 define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
169 ; CHECK-LABEL: ldur_half_sext_int_res1:
171 ; CHECK-NEXT: ldp w9, w8, [x0, #-8]
172 ; CHECK-NEXT: // kill: def $w8 killed $w8 def $x8
173 ; CHECK-NEXT: sxtw x8, w8
174 ; CHECK-NEXT: add x0, x8, x9
176 %p1 = getelementptr inbounds i32, i32* %a, i32 -1
177 %tmp1 = load i32, i32* %p1, align 2
178 %p2 = getelementptr inbounds i32, i32* %a, i32 -2
179 %tmp2 = load i32, i32* %p2, align 2
180 %sexttmp1 = sext i32 %tmp1 to i64
181 %sexttmp2 = zext i32 %tmp2 to i64
182 %tmp3 = add i64 %sexttmp1, %sexttmp2
187 define i64 @ldur_long(i64* %a) nounwind ssp {
188 ; CHECK-LABEL: ldur_long:
190 ; CHECK-NEXT: ldp x9, x8, [x0, #-16]
191 ; CHECK-NEXT: add x0, x8, x9
193 %p1 = getelementptr inbounds i64, i64* %a, i64 -1
194 %tmp1 = load i64, i64* %p1, align 2
195 %p2 = getelementptr inbounds i64, i64* %a, i64 -2
196 %tmp2 = load i64, i64* %p2, align 2
197 %tmp3 = add i64 %tmp1, %tmp2
201 define float @ldur_float(float* %a) {
202 ; CHECK-LABEL: ldur_float:
204 ; CHECK-NEXT: ldp s1, s0, [x0, #-8]
205 ; CHECK-NEXT: fadd s0, s0, s1
207 %p1 = getelementptr inbounds float, float* %a, i64 -1
208 %tmp1 = load float, float* %p1, align 2
209 %p2 = getelementptr inbounds float, float* %a, i64 -2
210 %tmp2 = load float, float* %p2, align 2
211 %tmp3 = fadd float %tmp1, %tmp2
215 define double @ldur_double(double* %a) {
216 ; CHECK-LABEL: ldur_double:
218 ; CHECK-NEXT: ldp d1, d0, [x0, #-16]
219 ; CHECK-NEXT: fadd d0, d0, d1
221 %p1 = getelementptr inbounds double, double* %a, i64 -1
222 %tmp1 = load double, double* %p1, align 2
223 %p2 = getelementptr inbounds double, double* %a, i64 -2
224 %tmp2 = load double, double* %p2, align 2
225 %tmp3 = fadd double %tmp1, %tmp2
229 define <2 x double> @ldur_doublex2(<2 x double>* %a) {
230 ; CHECK-LABEL: ldur_doublex2:
232 ; CHECK-NEXT: ldp q1, q0, [x0, #-32]
233 ; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
235 %p1 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -1
236 %tmp1 = load <2 x double>, <2 x double>* %p1, align 2
237 %p2 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -2
238 %tmp2 = load <2 x double>, <2 x double>* %p2, align 2
239 %tmp3 = fadd <2 x double> %tmp1, %tmp2
240 ret <2 x double> %tmp3
243 ; Now check some boundary conditions
244 define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
245 ; CHECK-LABEL: pairUpBarelyIn:
247 ; CHECK-NEXT: ldp x9, x8, [x0, #-256]
248 ; CHECK-NEXT: add x0, x8, x9
250 %p1 = getelementptr inbounds i64, i64* %a, i64 -31
251 %tmp1 = load i64, i64* %p1, align 2
252 %p2 = getelementptr inbounds i64, i64* %a, i64 -32
253 %tmp2 = load i64, i64* %p2, align 2
254 %tmp3 = add i64 %tmp1, %tmp2
258 define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
259 ; CHECK-LABEL: pairUpBarelyInSext:
261 ; CHECK-NEXT: ldpsw x9, x8, [x0, #-256]
262 ; CHECK-NEXT: add x0, x8, x9
264 %p1 = getelementptr inbounds i32, i32* %a, i64 -63
265 %tmp1 = load i32, i32* %p1, align 2
266 %p2 = getelementptr inbounds i32, i32* %a, i64 -64
267 %tmp2 = load i32, i32* %p2, align 2
268 %sexttmp1 = sext i32 %tmp1 to i64
269 %sexttmp2 = sext i32 %tmp2 to i64
270 %tmp3 = add i64 %sexttmp1, %sexttmp2
274 define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
275 ; CHECK-LABEL: pairUpBarelyInHalfSextRes0:
277 ; CHECK-NEXT: ldp w9, w8, [x0, #-256]
278 ; CHECK-NEXT: // kill: def $w9 killed $w9 def $x9
279 ; CHECK-NEXT: sxtw x9, w9
280 ; CHECK-NEXT: add x0, x8, x9
282 %p1 = getelementptr inbounds i32, i32* %a, i64 -63
283 %tmp1 = load i32, i32* %p1, align 2
284 %p2 = getelementptr inbounds i32, i32* %a, i64 -64
285 %tmp2 = load i32, i32* %p2, align 2
286 %sexttmp1 = zext i32 %tmp1 to i64
287 %sexttmp2 = sext i32 %tmp2 to i64
288 %tmp3 = add i64 %sexttmp1, %sexttmp2
292 define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
293 ; CHECK-LABEL: pairUpBarelyInHalfSextRes1:
295 ; CHECK-NEXT: ldp w9, w8, [x0, #-256]
296 ; CHECK-NEXT: // kill: def $w8 killed $w8 def $x8
297 ; CHECK-NEXT: sxtw x8, w8
298 ; CHECK-NEXT: add x0, x8, x9
300 %p1 = getelementptr inbounds i32, i32* %a, i64 -63
301 %tmp1 = load i32, i32* %p1, align 2
302 %p2 = getelementptr inbounds i32, i32* %a, i64 -64
303 %tmp2 = load i32, i32* %p2, align 2
304 %sexttmp1 = sext i32 %tmp1 to i64
305 %sexttmp2 = zext i32 %tmp2 to i64
306 %tmp3 = add i64 %sexttmp1, %sexttmp2
310 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
311 ; Don't be fragile about which loads or manipulations of the base register
312 ; are used---just check that there isn't an ldp before the add
313 ; CHECK-LABEL: pairUpBarelyOut:
315 ; CHECK-NEXT: sub x8, x0, #264
316 ; CHECK-NEXT: ldur x9, [x0, #-256]
317 ; CHECK-NEXT: ldr x8, [x8]
318 ; CHECK-NEXT: add x0, x9, x8
320 %p1 = getelementptr inbounds i64, i64* %a, i64 -32
321 %tmp1 = load i64, i64* %p1, align 2
322 %p2 = getelementptr inbounds i64, i64* %a, i64 -33
323 %tmp2 = load i64, i64* %p2, align 2
324 %tmp3 = add i64 %tmp1, %tmp2
328 define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
329 ; Don't be fragile about which loads or manipulations of the base register
330 ; are used---just check that there isn't an ldp before the add
331 ; CHECK-LABEL: pairUpBarelyOutSext:
333 ; CHECK-NEXT: sub x8, x0, #260
334 ; CHECK-NEXT: ldursw x9, [x0, #-256]
335 ; CHECK-NEXT: ldrsw x8, [x8]
336 ; CHECK-NEXT: add x0, x9, x8
338 %p1 = getelementptr inbounds i32, i32* %a, i64 -64
339 %tmp1 = load i32, i32* %p1, align 2
340 %p2 = getelementptr inbounds i32, i32* %a, i64 -65
341 %tmp2 = load i32, i32* %p2, align 2
342 %sexttmp1 = sext i32 %tmp1 to i64
343 %sexttmp2 = sext i32 %tmp2 to i64
344 %tmp3 = add i64 %sexttmp1, %sexttmp2
348 define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
349 ; CHECK-LABEL: pairUpNotAligned:
351 ; CHECK-NEXT: ldur x8, [x0, #-143]
352 ; CHECK-NEXT: ldur x9, [x0, #-135]
353 ; CHECK-NEXT: add x0, x8, x9
355 %p1 = getelementptr inbounds i64, i64* %a, i64 -18
356 %bp1 = bitcast i64* %p1 to i8*
357 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
358 %dp1 = bitcast i8* %bp1p1 to i64*
359 %tmp1 = load i64, i64* %dp1, align 1
361 %p2 = getelementptr inbounds i64, i64* %a, i64 -17
362 %bp2 = bitcast i64* %p2 to i8*
363 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
364 %dp2 = bitcast i8* %bp2p1 to i64*
365 %tmp2 = load i64, i64* %dp2, align 1
367 %tmp3 = add i64 %tmp1, %tmp2
371 define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
372 ; CHECK-LABEL: pairUpNotAlignedSext:
374 ; CHECK-NEXT: ldursw x8, [x0, #-71]
375 ; CHECK-NEXT: ldursw x9, [x0, #-67]
376 ; CHECK-NEXT: add x0, x8, x9
378 %p1 = getelementptr inbounds i32, i32* %a, i64 -18
379 %bp1 = bitcast i32* %p1 to i8*
380 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
381 %dp1 = bitcast i8* %bp1p1 to i32*
382 %tmp1 = load i32, i32* %dp1, align 1
384 %p2 = getelementptr inbounds i32, i32* %a, i64 -17
385 %bp2 = bitcast i32* %p2 to i8*
386 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
387 %dp2 = bitcast i8* %bp2p1 to i32*
388 %tmp2 = load i32, i32* %dp2, align 1
390 %sexttmp1 = sext i32 %tmp1 to i64
391 %sexttmp2 = sext i32 %tmp2 to i64
392 %tmp3 = add i64 %sexttmp1, %sexttmp2
396 declare void @use-ptr(i32*)
398 define i64 @ldp_sext_int_pre(i32* %p) nounwind {
399 ; CHECK-LABEL: ldp_sext_int_pre:
401 ; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
402 ; CHECK-NEXT: mov x19, x0
403 ; CHECK-NEXT: add x0, x0, #8
404 ; CHECK-NEXT: bl "use-ptr"
405 ; CHECK-NEXT: ldpsw x8, x9, [x19, #8]
406 ; CHECK-NEXT: add x0, x9, x8
407 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
409 %ptr = getelementptr inbounds i32, i32* %p, i64 2
410 call void @use-ptr(i32* %ptr)
411 %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0
412 %tmp = load i32, i32* %add.ptr, align 4
413 %add.ptr1 = getelementptr inbounds i32, i32* %ptr, i64 1
414 %tmp1 = load i32, i32* %add.ptr1, align 4
415 %sexttmp = sext i32 %tmp to i64
416 %sexttmp1 = sext i32 %tmp1 to i64
417 %add = add nsw i64 %sexttmp1, %sexttmp
421 define i64 @ldp_sext_int_post(i32* %p) nounwind {
422 ; CHECK-LABEL: ldp_sext_int_post:
424 ; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
425 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
426 ; CHECK-NEXT: ldpsw x19, x20, [x0], #8
427 ; CHECK-NEXT: bl "use-ptr"
428 ; CHECK-NEXT: add x0, x20, x19
429 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
430 ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
432 %tmp = load i32, i32* %p, align 4
433 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
434 %tmp1 = load i32, i32* %add.ptr, align 4
435 %sexttmp = sext i32 %tmp to i64
436 %sexttmp1 = sext i32 %tmp1 to i64
437 %ptr = getelementptr inbounds i32, i32* %add.ptr, i64 1
438 call void @use-ptr(i32* %ptr)
439 %add = add nsw i64 %sexttmp1, %sexttmp