1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
5 ; unsigned long test_no_prep(char *p, int count) {
6 ; unsigned long i=0, res=0;
11 ; for (; i < count ; i++) {
12 ; unsigned long x1 = *(unsigned long *)(p + i + DISP1);
13 ; unsigned long x2 = *(unsigned long *)(p + i + DISP2);
14 ; unsigned long x3 = *(unsigned long *)(p + i + DISP3);
15 ; unsigned long x4 = *(unsigned long *)(p + i + DISP4);
21 define i64 @test_no_prep(i8* %0, i32 signext %1) {
22 ; CHECK-LABEL: test_no_prep:
24 ; CHECK-NEXT: cmplwi r4, 0
25 ; CHECK-NEXT: beq cr0, .LBB0_4
26 ; CHECK-NEXT: # %bb.1: # %.preheader
27 ; CHECK-NEXT: cmpldi r4, 1
28 ; CHECK-NEXT: li r5, 1
29 ; CHECK-NEXT: addi r3, r3, 4004
30 ; CHECK-NEXT: li r6, -3
31 ; CHECK-NEXT: li r7, -2
32 ; CHECK-NEXT: li r8, -1
33 ; CHECK-NEXT: iselgt r5, r4, r5
34 ; CHECK-NEXT: mtctr r5
35 ; CHECK-NEXT: li r5, 0
36 ; CHECK-NEXT: .p2align 4
37 ; CHECK-NEXT: .LBB0_2:
38 ; CHECK-NEXT: ldx r9, r3, r6
39 ; CHECK-NEXT: ldx r10, r3, r7
40 ; CHECK-NEXT: ldx r11, r3, r8
41 ; CHECK-NEXT: ld r12, 0(r3)
42 ; CHECK-NEXT: addi r3, r3, 1
43 ; CHECK-NEXT: mulld r9, r10, r9
44 ; CHECK-NEXT: mulld r9, r9, r11
45 ; CHECK-NEXT: maddld r5, r9, r12, r5
46 ; CHECK-NEXT: bdnz .LBB0_2
47 ; CHECK-NEXT: # %bb.3:
48 ; CHECK-NEXT: add r3, r5, r4
50 ; CHECK-NEXT: .LBB0_4:
51 ; CHECK-NEXT: addi r3, r4, 0
53 %3 = sext i32 %1 to i64
54 %4 = icmp eq i32 %1, 0
55 br i1 %4, label %27, label %5
58 %6 = phi i64 [ %25, %5 ], [ 0, %2 ]
59 %7 = phi i64 [ %24, %5 ], [ 0, %2 ]
60 %8 = getelementptr inbounds i8, i8* %0, i64 %6
61 %9 = getelementptr inbounds i8, i8* %8, i64 4001
62 %10 = bitcast i8* %9 to i64*
63 %11 = load i64, i64* %10, align 8
64 %12 = getelementptr inbounds i8, i8* %8, i64 4002
65 %13 = bitcast i8* %12 to i64*
66 %14 = load i64, i64* %13, align 8
67 %15 = getelementptr inbounds i8, i8* %8, i64 4003
68 %16 = bitcast i8* %15 to i64*
69 %17 = load i64, i64* %16, align 8
70 %18 = getelementptr inbounds i8, i8* %8, i64 4004
71 %19 = bitcast i8* %18 to i64*
72 %20 = load i64, i64* %19, align 8
73 %21 = mul i64 %14, %11
74 %22 = mul i64 %21, %17
75 %23 = mul i64 %22, %20
77 %25 = add nuw i64 %6, 1
78 %26 = icmp ult i64 %25, %3
79 br i1 %26, label %5, label %27
82 %28 = phi i64 [ 0, %2 ], [ %24, %5 ]
88 ; unsigned long test_ds_prep(char *p, int count) {
89 ; unsigned long i=0, res=0;
94 ; for (; i < count ; i++) {
95 ; unsigned long x1 = *(unsigned long *)(p + i + DISP1);
96 ; unsigned long x2 = *(unsigned long *)(p + i + DISP2);
97 ; unsigned long x3 = *(unsigned long *)(p + i + DISP3);
98 ; unsigned long x4 = *(unsigned long *)(p + i + DISP4);
101 ; return res + count;
104 define i64 @test_ds_prep(i8* %0, i32 signext %1) {
105 ; CHECK-LABEL: test_ds_prep:
107 ; CHECK-NEXT: cmplwi r4, 0
108 ; CHECK-NEXT: beq cr0, .LBB1_4
109 ; CHECK-NEXT: # %bb.1: # %.preheader
110 ; CHECK-NEXT: cmpldi r4, 1
111 ; CHECK-NEXT: li r5, 1
112 ; CHECK-NEXT: addi r6, r3, 4002
113 ; CHECK-NEXT: li r7, -1
114 ; CHECK-NEXT: iselgt r3, r4, r5
115 ; CHECK-NEXT: mtctr r3
116 ; CHECK-NEXT: li r3, 0
117 ; CHECK-NEXT: .p2align 4
118 ; CHECK-NEXT: .LBB1_2:
119 ; CHECK-NEXT: ldx r8, r6, r7
120 ; CHECK-NEXT: ld r9, 0(r6)
121 ; CHECK-NEXT: ldx r10, r6, r5
122 ; CHECK-NEXT: ld r11, 4(r6)
123 ; CHECK-NEXT: addi r6, r6, 1
124 ; CHECK-NEXT: mulld r8, r9, r8
125 ; CHECK-NEXT: mulld r8, r8, r10
126 ; CHECK-NEXT: maddld r3, r8, r11, r3
127 ; CHECK-NEXT: bdnz .LBB1_2
128 ; CHECK-NEXT: # %bb.3:
129 ; CHECK-NEXT: add r3, r3, r4
131 ; CHECK-NEXT: .LBB1_4:
132 ; CHECK-NEXT: addi r3, r4, 0
134 %3 = sext i32 %1 to i64
135 %4 = icmp eq i32 %1, 0
136 br i1 %4, label %27, label %5
139 %6 = phi i64 [ %25, %5 ], [ 0, %2 ]
140 %7 = phi i64 [ %24, %5 ], [ 0, %2 ]
141 %8 = getelementptr inbounds i8, i8* %0, i64 %6
142 %9 = getelementptr inbounds i8, i8* %8, i64 4001
143 %10 = bitcast i8* %9 to i64*
144 %11 = load i64, i64* %10, align 8
145 %12 = getelementptr inbounds i8, i8* %8, i64 4002
146 %13 = bitcast i8* %12 to i64*
147 %14 = load i64, i64* %13, align 8
148 %15 = getelementptr inbounds i8, i8* %8, i64 4003
149 %16 = bitcast i8* %15 to i64*
150 %17 = load i64, i64* %16, align 8
151 %18 = getelementptr inbounds i8, i8* %8, i64 4006
152 %19 = bitcast i8* %18 to i64*
153 %20 = load i64, i64* %19, align 8
154 %21 = mul i64 %14, %11
155 %22 = mul i64 %21, %17
156 %23 = mul i64 %22, %20
157 %24 = add i64 %23, %7
158 %25 = add nuw i64 %6, 1
159 %26 = icmp ult i64 %25, %3
160 br i1 %26, label %5, label %27
163 %28 = phi i64 [ 0, %2 ], [ %24, %5 ]
164 %29 = add i64 %28, %3
168 ; test_max_number_reminder:
169 ; unsigned long test_max_number_reminder(char *p, int count) {
170 ; unsigned long i=0, res=0;
180 ; for (; i < count ; i++) {
181 ; unsigned long x1 = *(unsigned long *)(p + i + DISP1);
182 ; unsigned long x2 = *(unsigned long *)(p + i + DISP2);
183 ; unsigned long x3 = *(unsigned long *)(p + i + DISP3);
184 ; unsigned long x4 = *(unsigned long *)(p + i + DISP4);
185 ; unsigned long x5 = *(unsigned long *)(p + i + DISP5);
186 ; unsigned long x6 = *(unsigned long *)(p + i + DISP6);
187 ; unsigned long x7 = *(unsigned long *)(p + i + DISP7);
188 ; unsigned long x8 = *(unsigned long *)(p + i + DISP8);
189 ; unsigned long x9 = *(unsigned long *)(p + i + DISP9);
190 ; res += x1*x2*x3*x4*x5*x6*x7*x8*x9;
192 ; return res + count;
195 define i64 @test_max_number_reminder(i8* %0, i32 signext %1) {
196 ; CHECK-LABEL: test_max_number_reminder:
198 ; CHECK-NEXT: cmplwi r4, 0
199 ; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill
200 ; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill
201 ; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill
202 ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
203 ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
204 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
205 ; CHECK-NEXT: beq cr0, .LBB2_3
206 ; CHECK-NEXT: # %bb.1: # %.preheader
207 ; CHECK-NEXT: cmpldi r4, 1
208 ; CHECK-NEXT: li r5, 1
209 ; CHECK-NEXT: addi r9, r3, 4002
210 ; CHECK-NEXT: li r6, -1
211 ; CHECK-NEXT: li r7, 3
212 ; CHECK-NEXT: li r8, 5
213 ; CHECK-NEXT: li r10, 9
214 ; CHECK-NEXT: iselgt r3, r4, r5
215 ; CHECK-NEXT: mtctr r3
216 ; CHECK-NEXT: li r3, 0
217 ; CHECK-NEXT: .p2align 4
218 ; CHECK-NEXT: .LBB2_2:
219 ; CHECK-NEXT: ldx r11, r9, r6
220 ; CHECK-NEXT: ld r12, 0(r9)
221 ; CHECK-NEXT: ldx r0, r9, r5
222 ; CHECK-NEXT: ldx r30, r9, r7
223 ; CHECK-NEXT: mulld r11, r12, r11
224 ; CHECK-NEXT: ld r29, 4(r9)
225 ; CHECK-NEXT: ldx r28, r9, r8
226 ; CHECK-NEXT: ld r27, 12(r9)
227 ; CHECK-NEXT: ld r26, 8(r9)
228 ; CHECK-NEXT: ldx r25, r9, r10
229 ; CHECK-NEXT: addi r9, r9, 1
230 ; CHECK-NEXT: mulld r11, r11, r0
231 ; CHECK-NEXT: mulld r11, r11, r30
232 ; CHECK-NEXT: mulld r11, r11, r29
233 ; CHECK-NEXT: mulld r11, r11, r28
234 ; CHECK-NEXT: mulld r11, r11, r27
235 ; CHECK-NEXT: mulld r11, r11, r26
236 ; CHECK-NEXT: maddld r3, r11, r25, r3
237 ; CHECK-NEXT: bdnz .LBB2_2
238 ; CHECK-NEXT: b .LBB2_4
239 ; CHECK-NEXT: .LBB2_3:
240 ; CHECK-NEXT: li r3, 0
241 ; CHECK-NEXT: .LBB2_4:
242 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
243 ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
244 ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
245 ; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
246 ; CHECK-NEXT: add r3, r3, r4
247 ; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
248 ; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
250 %3 = sext i32 %1 to i64
251 %4 = icmp eq i32 %1, 0
252 br i1 %4, label %47, label %5
255 %6 = phi i64 [ %45, %5 ], [ 0, %2 ]
256 %7 = phi i64 [ %44, %5 ], [ 0, %2 ]
257 %8 = getelementptr inbounds i8, i8* %0, i64 %6
258 %9 = getelementptr inbounds i8, i8* %8, i64 4001
259 %10 = bitcast i8* %9 to i64*
260 %11 = load i64, i64* %10, align 8
261 %12 = getelementptr inbounds i8, i8* %8, i64 4002
262 %13 = bitcast i8* %12 to i64*
263 %14 = load i64, i64* %13, align 8
264 %15 = getelementptr inbounds i8, i8* %8, i64 4003
265 %16 = bitcast i8* %15 to i64*
266 %17 = load i64, i64* %16, align 8
267 %18 = getelementptr inbounds i8, i8* %8, i64 4005
268 %19 = bitcast i8* %18 to i64*
269 %20 = load i64, i64* %19, align 8
270 %21 = getelementptr inbounds i8, i8* %8, i64 4006
271 %22 = bitcast i8* %21 to i64*
272 %23 = load i64, i64* %22, align 8
273 %24 = getelementptr inbounds i8, i8* %8, i64 4007
274 %25 = bitcast i8* %24 to i64*
275 %26 = load i64, i64* %25, align 8
276 %27 = getelementptr inbounds i8, i8* %8, i64 4014
277 %28 = bitcast i8* %27 to i64*
278 %29 = load i64, i64* %28, align 8
279 %30 = getelementptr inbounds i8, i8* %8, i64 4010
280 %31 = bitcast i8* %30 to i64*
281 %32 = load i64, i64* %31, align 8
282 %33 = getelementptr inbounds i8, i8* %8, i64 4011
283 %34 = bitcast i8* %33 to i64*
284 %35 = load i64, i64* %34, align 8
285 %36 = mul i64 %14, %11
286 %37 = mul i64 %36, %17
287 %38 = mul i64 %37, %20
288 %39 = mul i64 %38, %23
289 %40 = mul i64 %39, %26
290 %41 = mul i64 %40, %29
291 %42 = mul i64 %41, %32
292 %43 = mul i64 %42, %35
293 %44 = add i64 %43, %7
294 %45 = add nuw i64 %6, 1
295 %46 = icmp ult i64 %45, %3
296 br i1 %46, label %5, label %47
299 %48 = phi i64 [ 0, %2 ], [ %44, %5 ]
300 %49 = add i64 %48, %3
304 ; test_update_ds_prep_interact:
305 ; unsigned long test_update_ds_prep_interact(char *p, int count) {
306 ; unsigned long i=0, res=0;
311 ; for (; i < count ; i++) {
312 ; unsigned long x1 = *(unsigned long *)(p + 4 * i + DISP1);
313 ; unsigned long x2 = *(unsigned long *)(p + 4 * i + DISP2);
314 ; unsigned long x3 = *(unsigned long *)(p + 4 * i + DISP3);
315 ; unsigned long x4 = *(unsigned long *)(p + 4 * i + DISP4);
316 ; res += x1*x2*x3*x4;
318 ; return res + count;
321 define dso_local i64 @test_update_ds_prep_interact(i8* %0, i32 signext %1) {
322 ; CHECK-LABEL: test_update_ds_prep_interact:
324 ; CHECK-NEXT: cmplwi r4, 0
325 ; CHECK-NEXT: beq cr0, .LBB3_4
326 ; CHECK-NEXT: # %bb.1: # %.preheader
327 ; CHECK-NEXT: cmpldi r4, 1
328 ; CHECK-NEXT: li r6, 1
329 ; CHECK-NEXT: addi r3, r3, 3998
330 ; CHECK-NEXT: li r7, -1
331 ; CHECK-NEXT: iselgt r5, r4, r6
332 ; CHECK-NEXT: mtctr r5
333 ; CHECK-NEXT: li r5, 0
334 ; CHECK-NEXT: .p2align 5
335 ; CHECK-NEXT: .LBB3_2:
336 ; CHECK-NEXT: ldu r8, 4(r3)
337 ; CHECK-NEXT: ldx r9, r3, r7
338 ; CHECK-NEXT: ldx r10, r3, r6
339 ; CHECK-NEXT: ld r11, 4(r3)
340 ; CHECK-NEXT: mulld r8, r8, r9
341 ; CHECK-NEXT: mulld r8, r8, r10
342 ; CHECK-NEXT: maddld r5, r8, r11, r5
343 ; CHECK-NEXT: bdnz .LBB3_2
344 ; CHECK-NEXT: # %bb.3:
345 ; CHECK-NEXT: add r3, r5, r4
347 ; CHECK-NEXT: .LBB3_4:
348 ; CHECK-NEXT: addi r3, r4, 0
350 %3 = sext i32 %1 to i64
351 %4 = icmp eq i32 %1, 0
352 br i1 %4, label %28, label %5
355 %6 = phi i64 [ %26, %5 ], [ 0, %2 ]
356 %7 = phi i64 [ %25, %5 ], [ 0, %2 ]
358 %9 = getelementptr inbounds i8, i8* %0, i64 %8
359 %10 = getelementptr inbounds i8, i8* %9, i64 4001
360 %11 = bitcast i8* %10 to i64*
361 %12 = load i64, i64* %11, align 8
362 %13 = getelementptr inbounds i8, i8* %9, i64 4002
363 %14 = bitcast i8* %13 to i64*
364 %15 = load i64, i64* %14, align 8
365 %16 = getelementptr inbounds i8, i8* %9, i64 4003
366 %17 = bitcast i8* %16 to i64*
367 %18 = load i64, i64* %17, align 8
368 %19 = getelementptr inbounds i8, i8* %9, i64 4006
369 %20 = bitcast i8* %19 to i64*
370 %21 = load i64, i64* %20, align 8
371 %22 = mul i64 %15, %12
372 %23 = mul i64 %22, %18
373 %24 = mul i64 %23, %21
374 %25 = add i64 %24, %7
375 %26 = add nuw i64 %6, 1
376 %27 = icmp ult i64 %26, %3
377 br i1 %27, label %5, label %28
380 %29 = phi i64 [ 0, %2 ], [ %25, %5 ]
381 %30 = add i64 %29, %3
385 ; test_update_ds_prep_nointeract:
386 ; unsigned long test_update_ds_prep_nointeract(char *p, int count) {
387 ; unsigned long i=0, res=0;
392 ; for (; i < count ; i++) {
393 ; char x1 = *(p + i + DISP1);
394 ; unsigned long x2 = *(unsigned long *)(p + i + DISP2);
395 ; unsigned long x3 = *(unsigned long *)(p + i + DISP3);
396 ; unsigned long x4 = *(unsigned long *)(p + i + DISP4);
397 ; res += (unsigned long)x1*x2*x3*x4;
399 ; return res + count;
402 define i64 @test_update_ds_prep_nointeract(i8* %0, i32 signext %1) {
403 ; CHECK-LABEL: test_update_ds_prep_nointeract:
405 ; CHECK-NEXT: cmplwi r4, 0
406 ; CHECK-NEXT: beq cr0, .LBB4_4
407 ; CHECK-NEXT: # %bb.1: # %.preheader
408 ; CHECK-NEXT: cmpldi r4, 1
409 ; CHECK-NEXT: li r6, 1
410 ; CHECK-NEXT: addi r5, r3, 4000
411 ; CHECK-NEXT: addi r3, r3, 4003
412 ; CHECK-NEXT: li r7, -1
413 ; CHECK-NEXT: iselgt r6, r4, r6
414 ; CHECK-NEXT: mtctr r6
415 ; CHECK-NEXT: li r6, 0
416 ; CHECK-NEXT: .p2align 4
417 ; CHECK-NEXT: .LBB4_2:
418 ; CHECK-NEXT: lbzu r8, 1(r5)
419 ; CHECK-NEXT: ldx r9, r3, r7
420 ; CHECK-NEXT: ld r10, 0(r3)
421 ; CHECK-NEXT: ld r11, 4(r3)
422 ; CHECK-NEXT: addi r3, r3, 1
423 ; CHECK-NEXT: mulld r8, r9, r8
424 ; CHECK-NEXT: mulld r8, r8, r10
425 ; CHECK-NEXT: maddld r6, r8, r11, r6
426 ; CHECK-NEXT: bdnz .LBB4_2
427 ; CHECK-NEXT: # %bb.3:
428 ; CHECK-NEXT: add r3, r6, r4
430 ; CHECK-NEXT: .LBB4_4:
431 ; CHECK-NEXT: addi r3, r4, 0
433 %3 = sext i32 %1 to i64
434 %4 = icmp eq i32 %1, 0
435 br i1 %4, label %27, label %5
438 %6 = phi i64 [ %25, %5 ], [ 0, %2 ]
439 %7 = phi i64 [ %24, %5 ], [ 0, %2 ]
440 %8 = getelementptr inbounds i8, i8* %0, i64 %6
441 %9 = getelementptr inbounds i8, i8* %8, i64 4001
442 %10 = load i8, i8* %9, align 1
443 %11 = getelementptr inbounds i8, i8* %8, i64 4002
444 %12 = bitcast i8* %11 to i64*
445 %13 = load i64, i64* %12, align 8
446 %14 = getelementptr inbounds i8, i8* %8, i64 4003
447 %15 = bitcast i8* %14 to i64*
448 %16 = load i64, i64* %15, align 8
449 %17 = getelementptr inbounds i8, i8* %8, i64 4007
450 %18 = bitcast i8* %17 to i64*
451 %19 = load i64, i64* %18, align 8
452 %20 = zext i8 %10 to i64
453 %21 = mul i64 %13, %20
454 %22 = mul i64 %21, %16
455 %23 = mul i64 %22, %19
456 %24 = add i64 %23, %7
457 %25 = add nuw i64 %6, 1
458 %26 = icmp ult i64 %25, %3
459 br i1 %26, label %5, label %27
462 %28 = phi i64 [ 0, %2 ], [ %24, %5 ]
463 %29 = add i64 %28, %3
467 ; test_ds_multiple_chains:
468 ; unsigned long test_ds_multiple_chains(char *p, char *q, int count) {
469 ; unsigned long i=0, res=0;
474 ; for (; i < count ; i++) {
475 ; unsigned long x1 = *(unsigned long *)(p + i + DISP1);
476 ; unsigned long x2 = *(unsigned long *)(p + i + DISP2);
477 ; unsigned long x3 = *(unsigned long *)(p + i + DISP3);
478 ; unsigned long x4 = *(unsigned long *)(p + i + DISP4);
479 ; unsigned long x5 = *(unsigned long *)(q + i + DISP1);
480 ; unsigned long x6 = *(unsigned long *)(q + i + DISP2);
481 ; unsigned long x7 = *(unsigned long *)(q + i + DISP3);
482 ; unsigned long x8 = *(unsigned long *)(q + i + DISP4);
483 ; res += x1*x2*x3*x4*x5*x6*x7*x8;
485 ; return res + count;
488 define dso_local i64 @test_ds_multiple_chains(i8* %0, i8* %1, i32 signext %2) {
489 ; CHECK-LABEL: test_ds_multiple_chains:
491 ; CHECK-NEXT: cmplwi r5, 0
492 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
493 ; CHECK-NEXT: beq cr0, .LBB5_3
494 ; CHECK-NEXT: # %bb.1: # %.preheader
495 ; CHECK-NEXT: cmpldi r5, 1
496 ; CHECK-NEXT: li r6, 1
497 ; CHECK-NEXT: addi r3, r3, 4001
498 ; CHECK-NEXT: addi r4, r4, 4001
499 ; CHECK-NEXT: li r7, 9
500 ; CHECK-NEXT: iselgt r6, r5, r6
501 ; CHECK-NEXT: mtctr r6
502 ; CHECK-NEXT: li r6, 0
503 ; CHECK-NEXT: .p2align 4
504 ; CHECK-NEXT: .LBB5_2:
505 ; CHECK-NEXT: ld r8, 0(r3)
506 ; CHECK-NEXT: ldx r9, r3, r7
507 ; CHECK-NEXT: ld r10, 4(r3)
508 ; CHECK-NEXT: ld r11, 8(r3)
509 ; CHECK-NEXT: addi r3, r3, 1
510 ; CHECK-NEXT: mulld r8, r9, r8
511 ; CHECK-NEXT: ld r12, 0(r4)
512 ; CHECK-NEXT: ldx r0, r4, r7
513 ; CHECK-NEXT: ld r30, 4(r4)
514 ; CHECK-NEXT: ld r9, 8(r4)
515 ; CHECK-NEXT: addi r4, r4, 1
516 ; CHECK-NEXT: mulld r8, r8, r10
517 ; CHECK-NEXT: mulld r8, r8, r11
518 ; CHECK-NEXT: mulld r8, r8, r12
519 ; CHECK-NEXT: mulld r8, r8, r0
520 ; CHECK-NEXT: mulld r8, r8, r30
521 ; CHECK-NEXT: maddld r6, r8, r9, r6
522 ; CHECK-NEXT: bdnz .LBB5_2
523 ; CHECK-NEXT: b .LBB5_4
524 ; CHECK-NEXT: .LBB5_3:
525 ; CHECK-NEXT: li r6, 0
526 ; CHECK-NEXT: .LBB5_4:
527 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
528 ; CHECK-NEXT: add r3, r6, r5
530 %4 = sext i32 %2 to i64
531 %5 = icmp eq i32 %2, 0
532 br i1 %5, label %45, label %6
535 %7 = phi i64 [ %43, %6 ], [ 0, %3 ]
536 %8 = phi i64 [ %42, %6 ], [ 0, %3 ]
537 %9 = getelementptr inbounds i8, i8* %0, i64 %7
538 %10 = getelementptr inbounds i8, i8* %9, i64 4001
539 %11 = bitcast i8* %10 to i64*
540 %12 = load i64, i64* %11, align 8
541 %13 = getelementptr inbounds i8, i8* %9, i64 4010
542 %14 = bitcast i8* %13 to i64*
543 %15 = load i64, i64* %14, align 8
544 %16 = getelementptr inbounds i8, i8* %9, i64 4005
545 %17 = bitcast i8* %16 to i64*
546 %18 = load i64, i64* %17, align 8
547 %19 = getelementptr inbounds i8, i8* %9, i64 4009
548 %20 = bitcast i8* %19 to i64*
549 %21 = load i64, i64* %20, align 8
550 %22 = getelementptr inbounds i8, i8* %1, i64 %7
551 %23 = getelementptr inbounds i8, i8* %22, i64 4001
552 %24 = bitcast i8* %23 to i64*
553 %25 = load i64, i64* %24, align 8
554 %26 = getelementptr inbounds i8, i8* %22, i64 4010
555 %27 = bitcast i8* %26 to i64*
556 %28 = load i64, i64* %27, align 8
557 %29 = getelementptr inbounds i8, i8* %22, i64 4005
558 %30 = bitcast i8* %29 to i64*
559 %31 = load i64, i64* %30, align 8
560 %32 = getelementptr inbounds i8, i8* %22, i64 4009
561 %33 = bitcast i8* %32 to i64*
562 %34 = load i64, i64* %33, align 8
563 %35 = mul i64 %15, %12
564 %36 = mul i64 %35, %18
565 %37 = mul i64 %36, %21
566 %38 = mul i64 %37, %25
567 %39 = mul i64 %38, %28
568 %40 = mul i64 %39, %31
569 %41 = mul i64 %40, %34
570 %42 = add i64 %41, %8
571 %43 = add nuw i64 %7, 1
572 %44 = icmp ult i64 %43, %4
573 br i1 %44, label %6, label %45
576 %46 = phi i64 [ 0, %3 ], [ %42, %6 ]
577 %47 = add i64 %46, %4
581 ; test_ds_cross_basic_blocks:
583 ;unsigned long foo(char *p, int count)
585 ; unsigned long i=0, res=0;
592 ; unsigned long x1, x2, x3, x4, x5, x6;
593 ; x1=x2=x3=x4=x5=x6=1;
594 ; for (; i < count ; i++) {
595 ; if (arr[i] % 3 == 1) {
596 ; x1 += *(unsigned long *)(p + i + DISP1);
597 ; x2 += *(unsigned long *)(p + i + DISP2);
599 ; else if (arr[i] % 3 == 2) {
600 ; x3 += *(unsigned long *)(p + i + DISP3);
601 ; x4 += *(unsigned long *)(p + i + DISP5);
604 ; x5 += *(unsigned long *)(p + i + DISP4);
605 ; x6 += *(unsigned long *)(p + i + DISP6);
607 ; res += x1*x2*x3*x4*x5*x6;
612 @arr = external local_unnamed_addr global i8*, align 8
614 define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) {
615 ; CHECK-LABEL: test_ds_cross_basic_blocks:
617 ; CHECK-NEXT: cmplwi r4, 0
618 ; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill
619 ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
620 ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
621 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
622 ; CHECK-NEXT: beq cr0, .LBB6_8
623 ; CHECK-NEXT: # %bb.1:
624 ; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
625 ; CHECK-NEXT: cmpldi r4, 1
626 ; CHECK-NEXT: li r7, 1
627 ; CHECK-NEXT: addi r6, r3, 4009
628 ; CHECK-NEXT: ld r5, .LC0@toc@l(r5)
629 ; CHECK-NEXT: iselgt r4, r4, r7
630 ; CHECK-NEXT: li r3, 0
631 ; CHECK-NEXT: li r8, -7
632 ; CHECK-NEXT: li r9, -6
633 ; CHECK-NEXT: li r10, 1
634 ; CHECK-NEXT: li r11, 1
635 ; CHECK-NEXT: li r12, 1
636 ; CHECK-NEXT: li r30, 1
637 ; CHECK-NEXT: ld r5, 0(r5)
638 ; CHECK-NEXT: mtctr r4
639 ; CHECK-NEXT: li r4, -9
640 ; CHECK-NEXT: li r29, 1
641 ; CHECK-NEXT: addi r5, r5, -1
642 ; CHECK-NEXT: b .LBB6_4
643 ; CHECK-NEXT: .p2align 4
644 ; CHECK-NEXT: .LBB6_2:
645 ; CHECK-NEXT: ldx r0, r6, r4
646 ; CHECK-NEXT: add r29, r0, r29
647 ; CHECK-NEXT: ld r0, -8(r6)
648 ; CHECK-NEXT: add r30, r0, r30
649 ; CHECK-NEXT: .LBB6_3:
650 ; CHECK-NEXT: mulld r0, r30, r29
651 ; CHECK-NEXT: addi r6, r6, 1
652 ; CHECK-NEXT: mulld r0, r0, r12
653 ; CHECK-NEXT: mulld r0, r0, r11
654 ; CHECK-NEXT: mulld r0, r0, r10
655 ; CHECK-NEXT: maddld r3, r0, r7, r3
656 ; CHECK-NEXT: bdz .LBB6_9
657 ; CHECK-NEXT: .LBB6_4:
658 ; CHECK-NEXT: lbzu r0, 1(r5)
659 ; CHECK-NEXT: mulli r28, r0, 171
660 ; CHECK-NEXT: rlwinm r27, r28, 24, 8, 30
661 ; CHECK-NEXT: srwi r28, r28, 9
662 ; CHECK-NEXT: add r28, r28, r27
663 ; CHECK-NEXT: sub r0, r0, r28
664 ; CHECK-NEXT: clrlwi r0, r0, 24
665 ; CHECK-NEXT: cmplwi r0, 1
666 ; CHECK-NEXT: beq cr0, .LBB6_2
667 ; CHECK-NEXT: # %bb.5:
668 ; CHECK-NEXT: cmplwi r0, 2
669 ; CHECK-NEXT: bne cr0, .LBB6_7
670 ; CHECK-NEXT: # %bb.6:
671 ; CHECK-NEXT: ldx r0, r6, r8
672 ; CHECK-NEXT: add r12, r0, r12
673 ; CHECK-NEXT: ld r0, -4(r6)
674 ; CHECK-NEXT: add r11, r0, r11
675 ; CHECK-NEXT: b .LBB6_3
676 ; CHECK-NEXT: .p2align 4
677 ; CHECK-NEXT: .LBB6_7:
678 ; CHECK-NEXT: ldx r0, r6, r9
679 ; CHECK-NEXT: add r10, r0, r10
680 ; CHECK-NEXT: ld r0, 0(r6)
681 ; CHECK-NEXT: add r7, r0, r7
682 ; CHECK-NEXT: b .LBB6_3
683 ; CHECK-NEXT: .LBB6_8:
684 ; CHECK-NEXT: li r3, 0
685 ; CHECK-NEXT: .LBB6_9:
686 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
687 ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
688 ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
689 ; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
691 %3 = sext i32 %1 to i64
692 %4 = icmp eq i32 %1, 0
693 br i1 %4, label %66, label %5
696 %6 = load i8*, i8** @arr, align 8
700 %8 = phi i64 [ 1, %5 ], [ %57, %51 ]
701 %9 = phi i64 [ 1, %5 ], [ %56, %51 ]
702 %10 = phi i64 [ 1, %5 ], [ %55, %51 ]
703 %11 = phi i64 [ 1, %5 ], [ %54, %51 ]
704 %12 = phi i64 [ 1, %5 ], [ %53, %51 ]
705 %13 = phi i64 [ 1, %5 ], [ %52, %51 ]
706 %14 = phi i64 [ 0, %5 ], [ %64, %51 ]
707 %15 = phi i64 [ 0, %5 ], [ %63, %51 ]
708 %16 = getelementptr inbounds i8, i8* %6, i64 %14
709 %17 = load i8, i8* %16, align 1
711 %19 = icmp eq i8 %18, 1
712 br i1 %19, label %20, label %30
715 %21 = getelementptr inbounds i8, i8* %0, i64 %14
716 %22 = getelementptr inbounds i8, i8* %21, i64 4000
717 %23 = bitcast i8* %22 to i64*
718 %24 = load i64, i64* %23, align 8
719 %25 = add i64 %24, %13
720 %26 = getelementptr inbounds i8, i8* %21, i64 4001
721 %27 = bitcast i8* %26 to i64*
722 %28 = load i64, i64* %27, align 8
723 %29 = add i64 %28, %12
727 %31 = icmp eq i8 %18, 2
728 %32 = getelementptr inbounds i8, i8* %0, i64 %14
729 br i1 %31, label %33, label %42
732 %34 = getelementptr inbounds i8, i8* %32, i64 4002
733 %35 = bitcast i8* %34 to i64*
734 %36 = load i64, i64* %35, align 8
735 %37 = add i64 %36, %11
736 %38 = getelementptr inbounds i8, i8* %32, i64 4005
737 %39 = bitcast i8* %38 to i64*
738 %40 = load i64, i64* %39, align 8
739 %41 = add i64 %40, %10
743 %43 = getelementptr inbounds i8, i8* %32, i64 4003
744 %44 = bitcast i8* %43 to i64*
745 %45 = load i64, i64* %44, align 8
746 %46 = add i64 %45, %9
747 %47 = getelementptr inbounds i8, i8* %32, i64 4009
748 %48 = bitcast i8* %47 to i64*
749 %49 = load i64, i64* %48, align 8
750 %50 = add i64 %49, %8
753 51: ; preds = %33, %42, %20
754 %52 = phi i64 [ %25, %20 ], [ %13, %33 ], [ %13, %42 ]
755 %53 = phi i64 [ %29, %20 ], [ %12, %33 ], [ %12, %42 ]
756 %54 = phi i64 [ %11, %20 ], [ %37, %33 ], [ %11, %42 ]
757 %55 = phi i64 [ %10, %20 ], [ %41, %33 ], [ %10, %42 ]
758 %56 = phi i64 [ %9, %20 ], [ %9, %33 ], [ %46, %42 ]
759 %57 = phi i64 [ %8, %20 ], [ %8, %33 ], [ %50, %42 ]
760 %58 = mul i64 %53, %52
761 %59 = mul i64 %58, %54
762 %60 = mul i64 %59, %55
763 %61 = mul i64 %60, %56
764 %62 = mul i64 %61, %57
765 %63 = add i64 %62, %15
766 %64 = add nuw i64 %14, 1
767 %65 = icmp ult i64 %64, %3
768 br i1 %65, label %7, label %66
770 66: ; preds = %51, %2
771 %67 = phi i64 [ 0, %2 ], [ %63, %51 ]
776 ;float test_ds_float(char *p, int count) {
783 ; for (; i < count ; i++) {
784 ; float x1 = *(float *)(p + i + DISP1);
785 ; float x2 = *(float *)(p + i + DISP2);
786 ; float x3 = *(float *)(p + i + DISP3);
787 ; float x4 = *(float *)(p + i + DISP4);
788 ; res += x1*x2*x3*x4;
793 define float @test_ds_float(i8* %0, i32 signext %1) {
794 ; CHECK-LABEL: test_ds_float:
796 ; CHECK-NEXT: cmpwi r4, 1
797 ; CHECK-NEXT: blt cr0, .LBB7_4
798 ; CHECK-NEXT: # %bb.1:
799 ; CHECK-NEXT: clrldi r4, r4, 32
800 ; CHECK-NEXT: addi r3, r3, 4002
801 ; CHECK-NEXT: xxlxor f1, f1, f1
802 ; CHECK-NEXT: mtctr r4
803 ; CHECK-NEXT: li r4, -1
804 ; CHECK-NEXT: .p2align 4
805 ; CHECK-NEXT: .LBB7_2:
806 ; CHECK-NEXT: lfsx f0, r3, r4
807 ; CHECK-NEXT: lfs f2, 0(r3)
808 ; CHECK-NEXT: xsmulsp f0, f0, f2
809 ; CHECK-NEXT: lfs f3, 20(r3)
810 ; CHECK-NEXT: xsmulsp f0, f0, f3
811 ; CHECK-NEXT: lfs f4, 60(r3)
812 ; CHECK-NEXT: addi r3, r3, 1
813 ; CHECK-NEXT: xsmulsp f0, f0, f4
814 ; CHECK-NEXT: xsaddsp f1, f1, f0
815 ; CHECK-NEXT: bdnz .LBB7_2
816 ; CHECK-NEXT: # %bb.3:
818 ; CHECK-NEXT: .LBB7_4:
819 ; CHECK-NEXT: xxlxor f1, f1, f1
821 %3 = icmp sgt i32 %1, 0
822 br i1 %3, label %4, label %28
825 %5 = zext i32 %1 to i64
829 %7 = phi i64 [ 0, %4 ], [ %26, %6 ]
830 %8 = phi float [ 0.000000e+00, %4 ], [ %25, %6 ]
831 %9 = getelementptr inbounds i8, i8* %0, i64 %7
832 %10 = getelementptr inbounds i8, i8* %9, i64 4001
833 %11 = bitcast i8* %10 to float*
834 %12 = load float, float* %11, align 4
835 %13 = getelementptr inbounds i8, i8* %9, i64 4002
836 %14 = bitcast i8* %13 to float*
837 %15 = load float, float* %14, align 4
838 %16 = getelementptr inbounds i8, i8* %9, i64 4022
839 %17 = bitcast i8* %16 to float*
840 %18 = load float, float* %17, align 4
841 %19 = getelementptr inbounds i8, i8* %9, i64 4062
842 %20 = bitcast i8* %19 to float*
843 %21 = load float, float* %20, align 4
844 %22 = fmul float %12, %15
845 %23 = fmul float %22, %18
846 %24 = fmul float %23, %21
847 %25 = fadd float %8, %24
848 %26 = add nuw nsw i64 %7, 1
849 %27 = icmp eq i64 %26, %5
850 br i1 %27, label %28, label %6
853 %29 = phi float [ 0.000000e+00, %2 ], [ %25, %6 ]
857 ; test_ds_combine_float_int:
858 ;float test_ds_combine_float_int(char *p, int count) {
865 ; for (; i < count ; i++) {
866 ; float x1 = *(float *)(p + i + DISP1);
867 ; unsigned long x2 = *(unsigned long*)(p + i + DISP2);
868 ; float x3 = *(float *)(p + i + DISP3);
869 ; float x4 = *(float *)(p + i + DISP4);
870 ; res += x1*x2*x3*x4;
875 define float @test_ds_combine_float_int(i8* %0, i32 signext %1) {
876 ; CHECK-LABEL: test_ds_combine_float_int:
878 ; CHECK-NEXT: cmpwi r4, 1
879 ; CHECK-NEXT: blt cr0, .LBB8_4
880 ; CHECK-NEXT: # %bb.1:
881 ; CHECK-NEXT: clrldi r4, r4, 32
882 ; CHECK-NEXT: addi r3, r3, 4002
883 ; CHECK-NEXT: xxlxor f1, f1, f1
884 ; CHECK-NEXT: mtctr r4
885 ; CHECK-NEXT: li r4, -1
886 ; CHECK-NEXT: .p2align 4
887 ; CHECK-NEXT: .LBB8_2:
888 ; CHECK-NEXT: lfd f4, 0(r3)
889 ; CHECK-NEXT: lfsx f0, r3, r4
890 ; CHECK-NEXT: xscvuxdsp f4, f4
891 ; CHECK-NEXT: lfs f2, 20(r3)
892 ; CHECK-NEXT: lfs f3, 60(r3)
893 ; CHECK-NEXT: addi r3, r3, 1
894 ; CHECK-NEXT: xsmulsp f0, f0, f4
895 ; CHECK-NEXT: xsmulsp f0, f2, f0
896 ; CHECK-NEXT: xsmulsp f0, f3, f0
897 ; CHECK-NEXT: xsaddsp f1, f1, f0
898 ; CHECK-NEXT: bdnz .LBB8_2
899 ; CHECK-NEXT: # %bb.3:
901 ; CHECK-NEXT: .LBB8_4:
902 ; CHECK-NEXT: xxlxor f1, f1, f1
904 %3 = icmp sgt i32 %1, 0
905 br i1 %3, label %4, label %29
908 %5 = zext i32 %1 to i64
912 %7 = phi i64 [ 0, %4 ], [ %27, %6 ]
913 %8 = phi float [ 0.000000e+00, %4 ], [ %26, %6 ]
914 %9 = getelementptr inbounds i8, i8* %0, i64 %7
915 %10 = getelementptr inbounds i8, i8* %9, i64 4001
916 %11 = bitcast i8* %10 to float*
917 %12 = load float, float* %11, align 4
918 %13 = getelementptr inbounds i8, i8* %9, i64 4002
919 %14 = bitcast i8* %13 to i64*
920 %15 = load i64, i64* %14, align 8
921 %16 = getelementptr inbounds i8, i8* %9, i64 4022
922 %17 = bitcast i8* %16 to float*
923 %18 = load float, float* %17, align 4
924 %19 = getelementptr inbounds i8, i8* %9, i64 4062
925 %20 = bitcast i8* %19 to float*
926 %21 = load float, float* %20, align 4
927 %22 = uitofp i64 %15 to float
928 %23 = fmul float %12, %22
929 %24 = fmul float %18, %23
930 %25 = fmul float %21, %24
931 %26 = fadd float %8, %25
932 %27 = add nuw nsw i64 %7, 1
933 %28 = icmp eq i64 %27, %5
934 br i1 %28, label %29, label %6
937 %30 = phi float [ 0.000000e+00, %2 ], [ %26, %6 ]
942 ; long long test_ds_lwa_prep(char *p, int count) {
943 ; long long i=0, res=0;
948 ; for (; i < count ; i++) {
949 ; long long x1 = *(int *)(p + i + DISP1);
950 ; long long x2 = *(int *)(p + i + DISP2);
951 ; long long x3 = *(int *)(p + i + DISP3);
952 ; long long x4 = *(int *)(p + i + DISP4);
953 ; res += x1*x2*x3*x4;
955 ; return res + count;
958 define i64 @test_ds_lwa_prep(i8* %0, i32 signext %1) {
959 ; CHECK-LABEL: test_ds_lwa_prep:
961 ; CHECK-NEXT: cmpwi r4, 1
962 ; CHECK-NEXT: blt cr0, .LBB9_4
963 ; CHECK-NEXT: # %bb.1: # %.preheader
964 ; CHECK-NEXT: mtctr r4
965 ; CHECK-NEXT: addi r5, r3, 2
966 ; CHECK-NEXT: li r3, 0
967 ; CHECK-NEXT: li r6, -1
968 ; CHECK-NEXT: .p2align 4
969 ; CHECK-NEXT: .LBB9_2:
970 ; CHECK-NEXT: lwax r7, r5, r6
971 ; CHECK-NEXT: lwa r8, 0(r5)
972 ; CHECK-NEXT: lwa r9, 4(r5)
973 ; CHECK-NEXT: lwa r10, 8(r5)
974 ; CHECK-NEXT: addi r5, r5, 1
975 ; CHECK-NEXT: mulld r7, r8, r7
976 ; CHECK-NEXT: mulld r7, r7, r9
977 ; CHECK-NEXT: maddld r3, r7, r10, r3
978 ; CHECK-NEXT: bdnz .LBB9_2
979 ; CHECK-NEXT: # %bb.3:
980 ; CHECK-NEXT: add r3, r3, r4
982 ; CHECK-NEXT: .LBB9_4:
983 ; CHECK-NEXT: addi r3, r4, 0
986 %3 = sext i32 %1 to i64
987 %4 = icmp sgt i32 %1, 0
988 br i1 %4, label %5, label %31
991 %6 = phi i64 [ %29, %5 ], [ 0, %2 ]
992 %7 = phi i64 [ %28, %5 ], [ 0, %2 ]
993 %8 = getelementptr inbounds i8, i8* %0, i64 %6
994 %9 = getelementptr inbounds i8, i8* %8, i64 1
995 %10 = bitcast i8* %9 to i32*
996 %11 = load i32, i32* %10, align 4
997 %12 = sext i32 %11 to i64
998 %13 = getelementptr inbounds i8, i8* %8, i64 2
999 %14 = bitcast i8* %13 to i32*
1000 %15 = load i32, i32* %14, align 4
1001 %16 = sext i32 %15 to i64
1002 %17 = getelementptr inbounds i8, i8* %8, i64 6
1003 %18 = bitcast i8* %17 to i32*
1004 %19 = load i32, i32* %18, align 4
1005 %20 = sext i32 %19 to i64
1006 %21 = getelementptr inbounds i8, i8* %8, i64 10
1007 %22 = bitcast i8* %21 to i32*
1008 %23 = load i32, i32* %22, align 4
1009 %24 = sext i32 %23 to i64
1010 %25 = mul nsw i64 %16, %12
1011 %26 = mul nsw i64 %25, %20
1012 %27 = mul nsw i64 %26, %24
1013 %28 = add nsw i64 %27, %7
1014 %29 = add nuw nsw i64 %6, 1
1015 %30 = icmp eq i64 %29, %3
1016 br i1 %30, label %31, label %5
1018 31: ; preds = %5, %2
1019 %32 = phi i64 [ 0, %2 ], [ %28, %5 ]
1020 %33 = add nsw i64 %32, %3