llvm/test/CodeGen/SystemZ/cond-store-02.ll

   1 ; Test 16-bit conditional stores that are presented as selects.  The volatile
   2 ; tests require z10, which use a branch instead of a LOCR.
   3 ;
   4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
   5
   6 declare void @foo(i16 *)
   7
   8 ; Test the simple case, with the loaded value first.
   9 define void @f1(i16 *%ptr, i16 %alt, i32 %limit) {
  10 ; CHECK-LABEL: f1:
  11 ; CHECK-NOT: %r2
  12 ; CHECK: blr %r14
  13 ; CHECK-NOT: %r2
  14 ; CHECK: sth %r3, 0(%r2)
  15 ; CHECK: br %r14
  16   %cond = icmp ult i32 %limit, 420
  17   %orig = load i16, i16 *%ptr
  18   %res = select i1 %cond, i16 %orig, i16 %alt
  19   store i16 %res, i16 *%ptr
  20   ret void
  21 }
  22
  23 ; ...and with the loaded value second
  24 define void @f2(i16 *%ptr, i16 %alt, i32 %limit) {
  25 ; CHECK-LABEL: f2:
  26 ; CHECK-NOT: %r2
  27 ; CHECK: bher %r14
  28 ; CHECK-NOT: %r2
  29 ; CHECK: sth %r3, 0(%r2)
  30 ; CHECK: br %r14
  31   %cond = icmp ult i32 %limit, 420
  32   %orig = load i16, i16 *%ptr
  33   %res = select i1 %cond, i16 %alt, i16 %orig
  34   store i16 %res, i16 *%ptr
  35   ret void
  36 }
  37
  38 ; Test cases where the value is explicitly sign-extended to 32 bits, with the
  39 ; loaded value first.
  40 define void @f3(i16 *%ptr, i32 %alt, i32 %limit) {
  41 ; CHECK-LABEL: f3:
  42 ; CHECK-NOT: %r2
  43 ; CHECK: blr %r14
  44 ; CHECK-NOT: %r2
  45 ; CHECK: sth %r3, 0(%r2)
  46 ; CHECK: br %r14
  47   %cond = icmp ult i32 %limit, 420
  48   %orig = load i16, i16 *%ptr
  49   %ext = sext i16 %orig to i32
  50   %res = select i1 %cond, i32 %ext, i32 %alt
  51   %trunc = trunc i32 %res to i16
  52   store i16 %trunc, i16 *%ptr
  53   ret void
  54 }
  55
  56 ; ...and with the loaded value second
  57 define void @f4(i16 *%ptr, i32 %alt, i32 %limit) {
  58 ; CHECK-LABEL: f4:
  59 ; CHECK-NOT: %r2
  60 ; CHECK: bher %r14
  61 ; CHECK-NOT: %r2
  62 ; CHECK: sth %r3, 0(%r2)
  63 ; CHECK: br %r14
  64   %cond = icmp ult i32 %limit, 420
  65   %orig = load i16, i16 *%ptr
  66   %ext = sext i16 %orig to i32
  67   %res = select i1 %cond, i32 %alt, i32 %ext
  68   %trunc = trunc i32 %res to i16
  69   store i16 %trunc, i16 *%ptr
  70   ret void
  71 }
  72
  73 ; Test cases where the value is explicitly zero-extended to 32 bits, with the
  74 ; loaded value first.
  75 define void @f5(i16 *%ptr, i32 %alt, i32 %limit) {
  76 ; CHECK-LABEL: f5:
  77 ; CHECK-NOT: %r2
  78 ; CHECK: blr %r14
  79 ; CHECK-NOT: %r2
  80 ; CHECK: sth %r3, 0(%r2)
  81 ; CHECK: br %r14
  82   %cond = icmp ult i32 %limit, 420
  83   %orig = load i16, i16 *%ptr
  84   %ext = zext i16 %orig to i32
  85   %res = select i1 %cond, i32 %ext, i32 %alt
  86   %trunc = trunc i32 %res to i16
  87   store i16 %trunc, i16 *%ptr
  88   ret void
  89 }
  90
  91 ; ...and with the loaded value second
  92 define void @f6(i16 *%ptr, i32 %alt, i32 %limit) {
  93 ; CHECK-LABEL: f6:
  94 ; CHECK-NOT: %r2
  95 ; CHECK: bher %r14
  96 ; CHECK-NOT: %r2
  97 ; CHECK: sth %r3, 0(%r2)
  98 ; CHECK: br %r14
  99   %cond = icmp ult i32 %limit, 420
 100   %orig = load i16, i16 *%ptr
 101   %ext = zext i16 %orig to i32
 102   %res = select i1 %cond, i32 %alt, i32 %ext
 103   %trunc = trunc i32 %res to i16
 104   store i16 %trunc, i16 *%ptr
 105   ret void
 106 }
 107
 108 ; Test cases where the value is explicitly sign-extended to 64 bits, with the
 109 ; loaded value first.
 110 define void @f7(i16 *%ptr, i64 %alt, i32 %limit) {
 111 ; CHECK-LABEL: f7:
 112 ; CHECK-NOT: %r2
 113 ; CHECK: blr %r14
 114 ; CHECK-NOT: %r2
 115 ; CHECK: sth %r3, 0(%r2)
 116 ; CHECK: br %r14
 117   %cond = icmp ult i32 %limit, 420
 118   %orig = load i16, i16 *%ptr
 119   %ext = sext i16 %orig to i64
 120   %res = select i1 %cond, i64 %ext, i64 %alt
 121   %trunc = trunc i64 %res to i16
 122   store i16 %trunc, i16 *%ptr
 123   ret void
 124 }
 125
 126 ; ...and with the loaded value second
 127 define void @f8(i16 *%ptr, i64 %alt, i32 %limit) {
 128 ; CHECK-LABEL: f8:
 129 ; CHECK-NOT: %r2
 130 ; CHECK: bher %r14
 131 ; CHECK-NOT: %r2
 132 ; CHECK: sth %r3, 0(%r2)
 133 ; CHECK: br %r14
 134   %cond = icmp ult i32 %limit, 420
 135   %orig = load i16, i16 *%ptr
 136   %ext = sext i16 %orig to i64
 137   %res = select i1 %cond, i64 %alt, i64 %ext
 138   %trunc = trunc i64 %res to i16
 139   store i16 %trunc, i16 *%ptr
 140   ret void
 141 }
 142
 143 ; Test cases where the value is explicitly zero-extended to 64 bits, with the
 144 ; loaded value first.
 145 define void @f9(i16 *%ptr, i64 %alt, i32 %limit) {
 146 ; CHECK-LABEL: f9:
 147 ; CHECK-NOT: %r2
 148 ; CHECK: blr %r14
 149 ; CHECK-NOT: %r2
 150 ; CHECK: sth %r3, 0(%r2)
 151 ; CHECK: br %r14
 152   %cond = icmp ult i32 %limit, 420
 153   %orig = load i16, i16 *%ptr
 154   %ext = zext i16 %orig to i64
 155   %res = select i1 %cond, i64 %ext, i64 %alt
 156   %trunc = trunc i64 %res to i16
 157   store i16 %trunc, i16 *%ptr
 158   ret void
 159 }
 160
 161 ; ...and with the loaded value second
 162 define void @f10(i16 *%ptr, i64 %alt, i32 %limit) {
 163 ; CHECK-LABEL: f10:
 164 ; CHECK-NOT: %r2
 165 ; CHECK: bher %r14
 166 ; CHECK-NOT: %r2
 167 ; CHECK: sth %r3, 0(%r2)
 168 ; CHECK: br %r14
 169   %cond = icmp ult i32 %limit, 420
 170   %orig = load i16, i16 *%ptr
 171   %ext = zext i16 %orig to i64
 172   %res = select i1 %cond, i64 %alt, i64 %ext
 173   %trunc = trunc i64 %res to i16
 174   store i16 %trunc, i16 *%ptr
 175   ret void
 176 }
 177
 178 ; Check the high end of the aligned STH range.
 179 define void @f11(i16 *%base, i16 %alt, i32 %limit) {
 180 ; CHECK-LABEL: f11:
 181 ; CHECK-NOT: %r2
 182 ; CHECK: blr %r14
 183 ; CHECK-NOT: %r2
 184 ; CHECK: sth %r3, 4094(%r2)
 185 ; CHECK: br %r14
 186   %ptr = getelementptr i16, i16 *%base, i64 2047
 187   %cond = icmp ult i32 %limit, 420
 188   %orig = load i16, i16 *%ptr
 189   %res = select i1 %cond, i16 %orig, i16 %alt
 190   store i16 %res, i16 *%ptr
 191   ret void
 192 }
 193
 194 ; Check the next halfword up, which should use STHY instead of STH.
 195 define void @f12(i16 *%base, i16 %alt, i32 %limit) {
 196 ; CHECK-LABEL: f12:
 197 ; CHECK-NOT: %r2
 198 ; CHECK: blr %r14
 199 ; CHECK-NOT: %r2
 200 ; CHECK: sthy %r3, 4096(%r2)
 201 ; CHECK: br %r14
 202   %ptr = getelementptr i16, i16 *%base, i64 2048
 203   %cond = icmp ult i32 %limit, 420
 204   %orig = load i16, i16 *%ptr
 205   %res = select i1 %cond, i16 %orig, i16 %alt
 206   store i16 %res, i16 *%ptr
 207   ret void
 208 }
 209
 210 ; Check the high end of the aligned STHY range.
 211 define void @f13(i16 *%base, i16 %alt, i32 %limit) {
 212 ; CHECK-LABEL: f13:
 213 ; CHECK-NOT: %r2
 214 ; CHECK: blr %r14
 215 ; CHECK-NOT: %r2
 216 ; CHECK: sthy %r3, 524286(%r2)
 217 ; CHECK: br %r14
 218   %ptr = getelementptr i16, i16 *%base, i64 262143
 219   %cond = icmp ult i32 %limit, 420
 220   %orig = load i16, i16 *%ptr
 221   %res = select i1 %cond, i16 %orig, i16 %alt
 222   store i16 %res, i16 *%ptr
 223   ret void
 224 }
 225
 226 ; Check the next halfword up, which needs separate address logic.
 227 ; Other sequences besides this one would be OK.
 228 define void @f14(i16 *%base, i16 %alt, i32 %limit) {
 229 ; CHECK-LABEL: f14:
 230 ; CHECK-NOT: %r2
 231 ; CHECK: blr %r14
 232 ; CHECK-NOT: %r2
 233 ; CHECK: agfi %r2, 524288
 234 ; CHECK: sth %r3, 0(%r2)
 235 ; CHECK: br %r14
 236   %ptr = getelementptr i16, i16 *%base, i64 262144
 237   %cond = icmp ult i32 %limit, 420
 238   %orig = load i16, i16 *%ptr
 239   %res = select i1 %cond, i16 %orig, i16 %alt
 240   store i16 %res, i16 *%ptr
 241   ret void
 242 }
 243
 244 ; Check the low end of the STHY range.
 245 define void @f15(i16 *%base, i16 %alt, i32 %limit) {
 246 ; CHECK-LABEL: f15:
 247 ; CHECK-NOT: %r2
 248 ; CHECK: blr %r14
 249 ; CHECK-NOT: %r2
 250 ; CHECK: sthy %r3, -524288(%r2)
 251 ; CHECK: br %r14
 252   %ptr = getelementptr i16, i16 *%base, i64 -262144
 253   %cond = icmp ult i32 %limit, 420
 254   %orig = load i16, i16 *%ptr
 255   %res = select i1 %cond, i16 %orig, i16 %alt
 256   store i16 %res, i16 *%ptr
 257   ret void
 258 }
 259
 260 ; Check the next halfword down, which needs separate address logic.
 261 ; Other sequences besides this one would be OK.
 262 define void @f16(i16 *%base, i16 %alt, i32 %limit) {
 263 ; CHECK-LABEL: f16:
 264 ; CHECK-NOT: %r2
 265 ; CHECK: blr %r14
 266 ; CHECK-NOT: %r2
 267 ; CHECK: agfi %r2, -524290
 268 ; CHECK: sth %r3, 0(%r2)
 269 ; CHECK: br %r14
 270   %ptr = getelementptr i16, i16 *%base, i64 -262145
 271   %cond = icmp ult i32 %limit, 420
 272   %orig = load i16, i16 *%ptr
 273   %res = select i1 %cond, i16 %orig, i16 %alt
 274   store i16 %res, i16 *%ptr
 275   ret void
 276 }
 277
 278 ; Check that STHY allows an index.
 279 define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) {
 280 ; CHECK-LABEL: f17:
 281 ; CHECK-NOT: %r2
 282 ; CHECK: blr %r14
 283 ; CHECK-NOT: %r2
 284 ; CHECK: sthy %r4, 4096(%r3,%r2)
 285 ; CHECK: br %r14
 286   %add1 = add i64 %base, %index
 287   %add2 = add i64 %add1, 4096
 288   %ptr = inttoptr i64 %add2 to i16 *
 289   %cond = icmp ult i32 %limit, 420
 290   %orig = load i16, i16 *%ptr
 291   %res = select i1 %cond, i16 %orig, i16 %alt
 292   store i16 %res, i16 *%ptr
 293   ret void
 294 }
 295
 296 ; Check that volatile loads are not matched.
 297 define void @f18(i16 *%ptr, i16 %alt, i32 %limit) {
 298 ; CHECK-LABEL: f18:
 299 ; CHECK: lh {{%r[0-5]}}, 0(%r2)
 300 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
 301 ; CHECK: [[LABEL]]:
 302 ; CHECK: sth {{%r[0-5]}}, 0(%r2)
 303 ; CHECK: br %r14
 304   %cond = icmp ult i32 %limit, 420
 305   %orig = load volatile i16, i16 *%ptr
 306   %res = select i1 %cond, i16 %orig, i16 %alt
 307   store i16 %res, i16 *%ptr
 308   ret void
 309 }
 310
 311 ; ...likewise stores.  In this case we should have a conditional load into %r3.
 312 define void @f19(i16 *%ptr, i16 %alt, i32 %limit) {
 313 ; CHECK-LABEL: f19:
 314 ; CHECK: jhe [[LABEL:[^ ]*]]
 315 ; CHECK: lh %r3, 0(%r2)
 316 ; CHECK: [[LABEL]]:
 317 ; CHECK: sth %r3, 0(%r2)
 318 ; CHECK: br %r14
 319   %cond = icmp ult i32 %limit, 420
 320   %orig = load i16, i16 *%ptr
 321   %res = select i1 %cond, i16 %orig, i16 %alt
 322   store volatile i16 %res, i16 *%ptr
 323   ret void
 324 }
 325
 326 ; Check that atomic loads are not matched.  The transformation is OK for
 327 ; the "unordered" case tested here, but since we don't try to handle atomic
 328 ; operations at all in this context, it seems better to assert that than
 329 ; to restrict the test to a stronger ordering.
 330 define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
 331 ; FIXME: should use a normal load instead of CS.
 332 ; CHECK-LABEL: f20:
 333 ; CHECK: lh {{%r[0-9]+}}, 0(%r2)
 334 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
 335 ; CHECK: [[LABEL]]:
 336 ; CHECK: sth {{%r[0-9]+}}, 0(%r2)
 337 ; CHECK: br %r14
 338   %cond = icmp ult i32 %limit, 420
 339   %orig = load atomic i16, i16 *%ptr unordered, align 2
 340   %res = select i1 %cond, i16 %orig, i16 %alt
 341   store i16 %res, i16 *%ptr
 342   ret void
 343 }
 344
 345 ; ...likewise stores.
 346 define void @f21(i16 *%ptr, i16 %alt, i32 %limit) {
 347 ; FIXME: should use a normal store instead of CS.
 348 ; CHECK-LABEL: f21:
 349 ; CHECK: jhe [[LABEL:[^ ]*]]
 350 ; CHECK: lh %r3, 0(%r2)
 351 ; CHECK: [[LABEL]]:
 352 ; CHECK: sth %r3, 0(%r2)
 353 ; CHECK: br %r14
 354   %cond = icmp ult i32 %limit, 420
 355   %orig = load i16, i16 *%ptr
 356   %res = select i1 %cond, i16 %orig, i16 %alt
 357   store atomic i16 %res, i16 *%ptr unordered, align 2
 358   ret void
 359 }
 360
 361 ; Try a frame index base.
 362 define void @f22(i16 %alt, i32 %limit) {
 363 ; CHECK-LABEL: f22:
 364 ; CHECK: brasl %r14, foo@PLT
 365 ; CHECK-NOT: %r15
 366 ; CHECK: jl [[LABEL:[^ ]*]]
 367 ; CHECK-NOT: %r15
 368 ; CHECK: sth {{%r[0-9]+}}, {{[0-9]+}}(%r15)
 369 ; CHECK: [[LABEL]]:
 370 ; CHECK: brasl %r14, foo@PLT
 371 ; CHECK: br %r14
 372   %ptr = alloca i16
 373   call void @foo(i16 *%ptr)
 374   %cond = icmp ult i32 %limit, 420
 375   %orig = load i16, i16 *%ptr
 376   %res = select i1 %cond, i16 %orig, i16 %alt
 377   store i16 %res, i16 *%ptr
 378   call void @foo(i16 *%ptr)
 379   ret void
 380 }