test/CodeGen/SystemZ/frame-14.ll

   1 ; Test the handling of base + displacement addresses for large frames,
   2 ; in cases where both 12-bit and 20-bit displacements are allowed.
   3 ; The tests here assume z10 register pressure, without the high words
   4 ; being available.
   5 ;
   6 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
   7 ; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
   8 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -frame-pointer=all | \
   9 ; RUN:   FileCheck -check-prefix=CHECK-FP %s
  10 ;
  11 ; This file tests what happens when a displacement is converted from
  12 ; being relative to the start of a frame object to being relative to
  13 ; the frame itself.  In some cases the test is only possible if two
  14 ; objects are allocated.
  15 ;
  16 ; Rather than rely on a particular order for those objects, the tests
  17 ; instead allocate two objects of the same size and apply the test to
  18 ; both of them.  For consistency, all tests follow this model, even if
  19 ; one object would actually be enough.
  20
  21 ; First check the highest offset that is in range of the 12-bit form.
  22 ;
  23 ; The last in-range doubleword offset is 4088.  Since the frame has two
  24 ; emergency spill slots at 160(%r15), the amount that we need to allocate
  25 ; in order to put another object at offset 4088 is 4088 - 176 = 3912 bytes.
  26 define void @f1() {
  27 ; CHECK-NOFP-LABEL: f1:
  28 ; CHECK-NOFP: mvi 4095(%r15), 42
  29 ; CHECK-NOFP: br %r14
  30 ;
  31 ; CHECK-FP-LABEL: f1:
  32 ; CHECK-FP: mvi 4095(%r11), 42
  33 ; CHECK-FP: br %r14
  34   %region1 = alloca [3912 x i8], align 8
  35   %region2 = alloca [3912 x i8], align 8
  36   %ptr1 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region1, i64 0, i64 7
  37   %ptr2 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region2, i64 0, i64 7
  38   store volatile i8 42, i8 *%ptr1
  39   store volatile i8 42, i8 *%ptr2
  40   ret void
  41 }
  42
  43 ; Test the first offset that is out-of-range of the 12-bit form.
  44 define void @f2() {
  45 ; CHECK-NOFP-LABEL: f2:
  46 ; CHECK-NOFP: mviy 4096(%r15), 42
  47 ; CHECK-NOFP: br %r14
  48 ;
  49 ; CHECK-FP-LABEL: f2:
  50 ; CHECK-FP: mviy 4096(%r11), 42
  51 ; CHECK-FP: br %r14
  52   %region1 = alloca [3912 x i8], align 8
  53   %region2 = alloca [3912 x i8], align 8
  54   %ptr1 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region1, i64 0, i64 8
  55   %ptr2 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region2, i64 0, i64 8
  56   store volatile i8 42, i8 *%ptr1
  57   store volatile i8 42, i8 *%ptr2
  58   ret void
  59 }
  60
  61 ; Test the last offset that is in range of the 20-bit form.
  62 ;
  63 ; The last in-range doubleword offset is 524280, so by the same reasoning
  64 ; as above, we need to allocate objects of 524280 - 176 = 524104 bytes.
  65 define void @f3() {
  66 ; CHECK-NOFP-LABEL: f3:
  67 ; CHECK-NOFP: mviy 524287(%r15), 42
  68 ; CHECK-NOFP: br %r14
  69 ;
  70 ; CHECK-FP-LABEL: f3:
  71 ; CHECK-FP: mviy 524287(%r11), 42
  72 ; CHECK-FP: br %r14
  73   %region1 = alloca [524104 x i8], align 8
  74   %region2 = alloca [524104 x i8], align 8
  75   %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 7
  76   %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 7
  77   store volatile i8 42, i8 *%ptr1
  78   store volatile i8 42, i8 *%ptr2
  79   ret void
  80 }
  81
  82 ; Test the first out-of-range offset.  We can't use an index register here,
  83 ; and the offset is also out of LAY's range, so expect a constant load
  84 ; followed by an addition.
  85 define void @f4() {
  86 ; CHECK-NOFP-LABEL: f4:
  87 ; CHECK-NOFP: llilh %r1, 8
  88 ; CHECK-NOFP: agr %r1, %r15
  89 ; CHECK-NOFP: mvi 0(%r1), 42
  90 ; CHECK-NOFP: br %r14
  91 ;
  92 ; CHECK-FP-LABEL: f4:
  93 ; CHECK-FP: llilh %r1, 8
  94 ; CHECK-FP: agr %r1, %r11
  95 ; CHECK-FP: mvi 0(%r1), 42
  96 ; CHECK-FP: br %r14
  97   %region1 = alloca [524104 x i8], align 8
  98   %region2 = alloca [524104 x i8], align 8
  99   %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
 100   %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
 101   store volatile i8 42, i8 *%ptr1
 102   store volatile i8 42, i8 *%ptr2
 103   ret void
 104 }
 105
 106 ; Add 4095 to the previous offset, to test the other end of the MVI range.
 107 ; The instruction will actually be STCY before frame lowering.
 108 define void @f5() {
 109 ; CHECK-NOFP-LABEL: f5:
 110 ; CHECK-NOFP: llilh %r1, 8
 111 ; CHECK-NOFP: agr %r1, %r15
 112 ; CHECK-NOFP: mvi 4095(%r1), 42
 113 ; CHECK-NOFP: br %r14
 114 ;
 115 ; CHECK-FP-LABEL: f5:
 116 ; CHECK-FP: llilh %r1, 8
 117 ; CHECK-FP: agr %r1, %r11
 118 ; CHECK-FP: mvi 4095(%r1), 42
 119 ; CHECK-FP: br %r14
 120   %region1 = alloca [524104 x i8], align 8
 121   %region2 = alloca [524104 x i8], align 8
 122   %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 4103
 123   %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 4103
 124   store volatile i8 42, i8 *%ptr1
 125   store volatile i8 42, i8 *%ptr2
 126   ret void
 127 }
 128
 129 ; Test the next offset after that, which uses MVIY instead of MVI.
 130 define void @f6() {
 131 ; CHECK-NOFP-LABEL: f6:
 132 ; CHECK-NOFP: llilh %r1, 8
 133 ; CHECK-NOFP: agr %r1, %r15
 134 ; CHECK-NOFP: mviy 4096(%r1), 42
 135 ; CHECK-NOFP: br %r14
 136 ;
 137 ; CHECK-FP-LABEL: f6:
 138 ; CHECK-FP: llilh %r1, 8
 139 ; CHECK-FP: agr %r1, %r11
 140 ; CHECK-FP: mviy 4096(%r1), 42
 141 ; CHECK-FP: br %r14
 142   %region1 = alloca [524104 x i8], align 8
 143   %region2 = alloca [524104 x i8], align 8
 144   %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 4104
 145   %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 4104
 146   store volatile i8 42, i8 *%ptr1
 147   store volatile i8 42, i8 *%ptr2
 148   ret void
 149 }
 150
 151 ; Now try an offset of 524287 from the start of the object, with the
 152 ; object being at offset 1048576 (1 << 20).  The backend prefers to create
 153 ; anchors 0x10000 bytes apart, so that the high part can be loaded using
 154 ; LLILH while still using MVI in more cases than 0x40000 anchors would.
 155 define void @f7() {
 156 ; CHECK-NOFP-LABEL: f7:
 157 ; CHECK-NOFP: llilh %r1, 23
 158 ; CHECK-NOFP: agr %r1, %r15
 159 ; CHECK-NOFP: mviy 65535(%r1), 42
 160 ; CHECK-NOFP: br %r14
 161 ;
 162 ; CHECK-FP-LABEL: f7:
 163 ; CHECK-FP: llilh %r1, 23
 164 ; CHECK-FP: agr %r1, %r11
 165 ; CHECK-FP: mviy 65535(%r1), 42
 166 ; CHECK-FP: br %r14
 167   %region1 = alloca [1048400 x i8], align 8
 168   %region2 = alloca [1048400 x i8], align 8
 169   %ptr1 = getelementptr inbounds [1048400 x i8], [1048400 x i8]* %region1, i64 0, i64 524287
 170   %ptr2 = getelementptr inbounds [1048400 x i8], [1048400 x i8]* %region2, i64 0, i64 524287
 171   store volatile i8 42, i8 *%ptr1
 172   store volatile i8 42, i8 *%ptr2
 173   ret void
 174 }
 175
 176 ; Keep the object-relative offset the same but bump the size of the
 177 ; objects by one doubleword.
 178 define void @f8() {
 179 ; CHECK-NOFP-LABEL: f8:
 180 ; CHECK-NOFP: llilh %r1, 24
 181 ; CHECK-NOFP: agr %r1, %r15
 182 ; CHECK-NOFP: mvi 7(%r1), 42
 183 ; CHECK-NOFP: br %r14
 184 ;
 185 ; CHECK-FP-LABEL: f8:
 186 ; CHECK-FP: llilh %r1, 24
 187 ; CHECK-FP: agr %r1, %r11
 188 ; CHECK-FP: mvi 7(%r1), 42
 189 ; CHECK-FP: br %r14
 190   %region1 = alloca [1048408 x i8], align 8
 191   %region2 = alloca [1048408 x i8], align 8
 192   %ptr1 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region1, i64 0, i64 524287
 193   %ptr2 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region2, i64 0, i64 524287
 194   store volatile i8 42, i8 *%ptr1
 195   store volatile i8 42, i8 *%ptr2
 196   ret void
 197 }
 198
 199 ; Check a case where the original displacement is out of range.  The backend
 200 ; should force separate address logic from the outset.  We don't yet do any
 201 ; kind of anchor optimization, so there should be no offset on the MVI itself.
 202 ;
 203 ; Before frame lowering this is an LA followed by the AGFI seen below.
 204 ; The LA then gets lowered into the LLILH/LA form.  The exact sequence
 205 ; isn't that important though.
 206 define void @f9() {
 207 ; CHECK-NOFP-LABEL: f9:
 208 ; CHECK-NOFP: llilh [[R1:%r[1-5]]], 16
 209 ; CHECK-NOFP: la [[R2:%r[1-5]]], 8([[R1]],%r15)
 210 ; CHECK-NOFP: agfi [[R2]], 524288
 211 ; CHECK-NOFP: mvi 0([[R2]]), 42
 212 ; CHECK-NOFP: br %r14
 213 ;
 214 ; CHECK-FP-LABEL: f9:
 215 ; CHECK-FP: llilh [[R1:%r[1-5]]], 16
 216 ; CHECK-FP: la [[R2:%r[1-5]]], 8([[R1]],%r11)
 217 ; CHECK-FP: agfi [[R2]], 524288
 218 ; CHECK-FP: mvi 0([[R2]]), 42
 219 ; CHECK-FP: br %r14
 220   %region1 = alloca [1048408 x i8], align 8
 221   %region2 = alloca [1048408 x i8], align 8
 222   %ptr1 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region1, i64 0, i64 524288
 223   %ptr2 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region2, i64 0, i64 524288
 224   store volatile i8 42, i8 *%ptr1
 225   store volatile i8 42, i8 *%ptr2
 226   ret void
 227 }
 228
 229 ; Repeat f4 in a case that needs the emergency spill slots (because all
 230 ; call-clobbered registers are live and no call-saved ones have been
 231 ; allocated).
 232 define void @f10(i32 *%vptr) {
 233 ; CHECK-NOFP-LABEL: f10:
 234 ; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
 235 ; CHECK-NOFP: llilh [[REGISTER]], 8
 236 ; CHECK-NOFP: agr [[REGISTER]], %r15
 237 ; CHECK-NOFP: mvi 0([[REGISTER]]), 42
 238 ; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 239 ; CHECK-NOFP: br %r14
 240 ;
 241 ; CHECK-FP-LABEL: f10:
 242 ; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
 243 ; CHECK-FP: llilh [[REGISTER]], 8
 244 ; CHECK-FP: agr [[REGISTER]], %r11
 245 ; CHECK-FP: mvi 0([[REGISTER]]), 42
 246 ; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 247 ; CHECK-FP: br %r14
 248   %i0 = load volatile i32, i32 *%vptr
 249   %i1 = load volatile i32, i32 *%vptr
 250   %i3 = load volatile i32, i32 *%vptr
 251   %i4 = load volatile i32, i32 *%vptr
 252   %i5 = load volatile i32, i32 *%vptr
 253   %region1 = alloca [524104 x i8], align 8
 254   %region2 = alloca [524104 x i8], align 8
 255   %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
 256   %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
 257   store volatile i8 42, i8 *%ptr1
 258   store volatile i8 42, i8 *%ptr2
 259   store volatile i32 %i0, i32 *%vptr
 260   store volatile i32 %i1, i32 *%vptr
 261   store volatile i32 %i3, i32 *%vptr
 262   store volatile i32 %i4, i32 *%vptr
 263   store volatile i32 %i5, i32 *%vptr
 264   ret void
 265 }
 266
 267 ; And again with maximum register pressure.  The only spill slots that the
 268 ; NOFP case needs are the emergency ones, so the offsets are the same as for f4.
 269 ; The FP case needs to spill an extra register and is too dependent on
 270 ; register allocation heuristics for a stable test.
 271 define void @f11(i32 *%vptr) {
 272 ; CHECK-NOFP-LABEL: f11:
 273 ; CHECK-NOFP: stmg %r6, %r15,
 274 ; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
 275 ; CHECK-NOFP: llilh [[REGISTER]], 8
 276 ; CHECK-NOFP: agr [[REGISTER]], %r15
 277 ; CHECK-NOFP: mvi 0([[REGISTER]]), 42
 278 ; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 279 ; CHECK-NOFP: lmg %r6, %r15,
 280 ; CHECK-NOFP: br %r14
 281   %i0 = load volatile i32, i32 *%vptr
 282   %i1 = load volatile i32, i32 *%vptr
 283   %i3 = load volatile i32, i32 *%vptr
 284   %i4 = load volatile i32, i32 *%vptr
 285   %i5 = load volatile i32, i32 *%vptr
 286   %i6 = load volatile i32, i32 *%vptr
 287   %i7 = load volatile i32, i32 *%vptr
 288   %i8 = load volatile i32, i32 *%vptr
 289   %i9 = load volatile i32, i32 *%vptr
 290   %i10 = load volatile i32, i32 *%vptr
 291   %i11 = load volatile i32, i32 *%vptr
 292   %i12 = load volatile i32, i32 *%vptr
 293   %i13 = load volatile i32, i32 *%vptr
 294   %i14 = load volatile i32, i32 *%vptr
 295   %region1 = alloca [524104 x i8], align 8
 296   %region2 = alloca [524104 x i8], align 8
 297   %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
 298   %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
 299   store volatile i8 42, i8 *%ptr1
 300   store volatile i8 42, i8 *%ptr2
 301   store volatile i32 %i0, i32 *%vptr
 302   store volatile i32 %i1, i32 *%vptr
 303   store volatile i32 %i3, i32 *%vptr
 304   store volatile i32 %i4, i32 *%vptr
 305   store volatile i32 %i5, i32 *%vptr
 306   store volatile i32 %i6, i32 *%vptr
 307   store volatile i32 %i7, i32 *%vptr
 308   store volatile i32 %i8, i32 *%vptr
 309   store volatile i32 %i9, i32 *%vptr
 310   store volatile i32 %i10, i32 *%vptr
 311   store volatile i32 %i11, i32 *%vptr
 312   store volatile i32 %i12, i32 *%vptr
 313   store volatile i32 %i13, i32 *%vptr
 314   store volatile i32 %i14, i32 *%vptr
 315   ret void
 316 }