test/CodeGen/SystemZ/loop-03.ll

   1 ; Test that loops with sufficient registers do not reload or spill on
   2 ; stack. These cases include calls and it is necessary to have the GR128 /
   3 ; FP128 registers part of the callee saved registers list in order to avoid
   4 ; spilling / reloading.
   5 ;
   6 ; RUN: llc -switch-peel-threshold=101 < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
   7
   8 %0 = type { %0*, %0*, %0*, i32, %1*, i64, i64, i64, i64, i64, i64, %2, %5, %7 }
   9 %1 = type { i32, i32, i32 (%1*, i64, i32)*, i32 (%1*, i64, i64, i32, i8**)*, i32 (%1*, i64, i64, i64, i32)*, i32 (%1*)*, void (i8*)*, i8*, i8* }
  10 %2 = type { i64, i64, %3** }
  11 %3 = type { %4*, i64 }
  12 %4 = type { i64, i8* }
  13 %5 = type { i64, i64, %6** }
  14 %6 = type { i64, %4*, i32, i64, i8* }
  15 %7 = type { i64, i64, %8** }
  16 %8 = type { i64, i64*, i64*, %4*, i64, i32*, %5, i32, i64, i64 }
  17
  18 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
  19
  20 define void @fun0(%0*) {
  21 ; CHECK-LABEL: .LBB0_4
  22 ; CHECK: =>  This Inner Loop Header: Depth=2
  23 ; CHECK-NOT: 16-byte Folded Spill
  24 ; CHECK-NOT: 16-byte Folded Reload
  25
  26   %2 = load i64, i64* undef, align 8
  27   %3 = udiv i64 128, %2
  28   %4 = mul i64 %3, %2
  29   %5 = load i64, i64* undef, align 8
  30   switch i32 undef, label %36 [
  31     i32 1, label %6
  32     i32 2, label %7
  33     i32 3, label %8
  34     i32 4, label %9
  35     i32 5, label %10
  36     i32 6, label %11
  37   ]
  38
  39 ; <label>:6:                                      ; preds = %1
  40   br label %12
  41
  42 ; <label>:7:                                      ; preds = %1
  43   br label %12
  44
  45 ; <label>:8:                                      ; preds = %1
  46   unreachable
  47
  48 ; <label>:9:                                      ; preds = %1
  49   unreachable
  50
  51 ; <label>:10:                                     ; preds = %1
  52   unreachable
  53
  54 ; <label>:11:                                     ; preds = %1
  55   unreachable
  56
  57 ; <label>:12:                                     ; preds = %7, %6
  58   %13 = getelementptr inbounds %0, %0* %0, i64 0, i32 5
  59   br label %14
  60
  61 ; <label>:14:                                     ; preds = %31, %12
  62   %15 = phi i64 [ undef, %31 ], [ %5, %12 ]
  63   %16 = phi i64 [ %35, %31 ], [ undef, %12 ]
  64   %17 = load i64, i64* %13, align 8
  65   %18 = icmp ult i64 %15, %17
  66   %19 = select i1 %18, i64 %15, i64 %17
  67   %20 = udiv i64 %19, %4
  68   %21 = icmp ugt i64 %20, 1
  69   %22 = select i1 %21, i64 %20, i64 1
  70   %23 = sub i64 %22, 0
  71   br label %24
  72
  73 ; <label>:24:                                     ; preds = %24, %14
  74   %25 = phi i64 [ %23, %14 ], [ %27, %24 ]
  75   call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* nonnull undef, i64 %4, i1 false)
  76   %26 = getelementptr inbounds i8, i8* null, i64 %4
  77   store i8* %26, i8** undef, align 8
  78   %27 = add i64 %25, -4
  79   %28 = icmp eq i64 %27, 0
  80   br i1 %28, label %31, label %24
  81
  82 ; <label>:29:                                     ; preds = %24
  83   br i1 undef, label %31, label %30
  84
  85 ; <label>:30:                                     ; preds = %29
  86   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %26, i8* nonnull undef, i64 %4, i1 false)
  87   br label %31
  88
  89 ; <label>:31:                                     ; preds = %30, %29
  90   %32 = call signext i32 undef(%1* undef, i64 %16, i32 signext 8)
  91   %33 = icmp eq i64 undef, 0
  92   %34 = select i1 %33, i64 0, i64 %19
  93   %35 = add i64 %34, %16
  94   br i1 %33, label %36, label %14
  95
  96 ; <label>:36:                                     ; preds = %31, %1
  97   ret void
  98 }
  99
 100 declare fp128 @llvm.pow.f128(fp128, fp128)
 101
 102 define void @fun1(fp128*) {
 103 ; CHECK-LABEL: .LBB1_2
 104 ; CHECK: =>This Inner Loop Header: Depth=1
 105 ; CHECK-NOT: 16-byte Folded Spill
 106 ; CHECK-NOT: 16-byte Folded Reload
 107 ; CHECK-LABEL: .LBB1_3
 108
 109   br i1 undef, label %7, label %2
 110
 111 ; <label>:2:                                      ; preds = %2, %1
 112   %3 = phi fp128 [ %5, %2 ], [ 0xL00000000000000000000000000000000, %1 ]
 113   %4 = tail call fp128 @llvm.pow.f128(fp128 0xL00000000000000000000000000000000, fp128 0xL00000000000000000000000000000000) #2
 114   %5 = fadd fp128 %3, %4
 115   %6 = icmp eq i64 undef, 0
 116   br i1 %6, label %7, label %2
 117
 118 ; <label>:7:                                      ; preds = %2, %1
 119   %8 = phi fp128 [ 0xL00000000000000000000000000000000, %1 ], [ %5, %2 ]
 120   %9 = fadd fp128 0xL00000000000000000000000000000000, %8
 121   %10 = fadd fp128 0xL00000000000000000000000000000000, %9
 122   %11 = fadd fp128 0xL00000000000000000000000000000000, %10
 123   %12 = tail call fp128 @llvm.pow.f128(fp128 %11, fp128 0xL00000000000000000000000000000000) #2
 124   store fp128 %12, fp128* %0, align 8
 125   ret void
 126 }