llvm/test/CodeGen/AArch64/load-combine-big-endian.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=aarch64_be-unknown | FileCheck %s
   3
   4 ; ptr p; // p is 4 byte aligned
   5 ; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
   6 define i32 @load_i32_by_i8_big_endian(ptr %arg) {
   7 ; CHECK-LABEL: load_i32_by_i8_big_endian:
   8 ; CHECK:       // %bb.0:
   9 ; CHECK-NEXT:    ldr w0, [x0]
  10 ; CHECK-NEXT:    ret
  11   %tmp1 = load i8, ptr %arg, align 4
  12   %tmp2 = zext i8 %tmp1 to i32
  13   %tmp3 = shl nuw nsw i32 %tmp2, 24
  14   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
  15   %tmp5 = load i8, ptr %tmp4, align 1
  16   %tmp6 = zext i8 %tmp5 to i32
  17   %tmp7 = shl nuw nsw i32 %tmp6, 16
  18   %tmp8 = or i32 %tmp7, %tmp3
  19   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
  20   %tmp10 = load i8, ptr %tmp9, align 1
  21   %tmp11 = zext i8 %tmp10 to i32
  22   %tmp12 = shl nuw nsw i32 %tmp11, 8
  23   %tmp13 = or i32 %tmp8, %tmp12
  24   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
  25   %tmp15 = load i8, ptr %tmp14, align 1
  26   %tmp16 = zext i8 %tmp15 to i32
  27   %tmp17 = or i32 %tmp13, %tmp16
  28   ret i32 %tmp17
  29 }
  30
  31 ; ptr p; // p is 4 byte aligned
  32 ; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
  33 define i32 @load_i32_by_i16_by_i8_big_endian(ptr %arg) {
  34 ; CHECK-LABEL: load_i32_by_i16_by_i8_big_endian:
  35 ; CHECK:       // %bb.0:
  36 ; CHECK-NEXT:    ldr w0, [x0]
  37 ; CHECK-NEXT:    ret
  38   %tmp1 = load i8, ptr %arg, align 4
  39   %tmp2 = zext i8 %tmp1 to i16
  40   %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
  41   %tmp4 = load i8, ptr %tmp3, align 1
  42   %tmp5 = zext i8 %tmp4 to i16
  43   %tmp6 = shl nuw nsw i16 %tmp2, 8
  44   %tmp7 = or i16 %tmp6, %tmp5
  45   %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
  46   %tmp9 = load i8, ptr %tmp8, align 1
  47   %tmp10 = zext i8 %tmp9 to i16
  48   %tmp11 = getelementptr inbounds i8, ptr %arg, i32 3
  49   %tmp12 = load i8, ptr %tmp11, align 1
  50   %tmp13 = zext i8 %tmp12 to i16
  51   %tmp14 = shl nuw nsw i16 %tmp10, 8
  52   %tmp15 = or i16 %tmp14, %tmp13
  53   %tmp16 = zext i16 %tmp7 to i32
  54   %tmp17 = zext i16 %tmp15 to i32
  55   %tmp18 = shl nuw nsw i32 %tmp16, 16
  56   %tmp19 = or i32 %tmp18, %tmp17
  57   ret i32 %tmp19
  58 }
  59
  60 ; ptr p; // p is 4 byte aligned
  61 ; ((i32) p[0] << 16) | (i32) p[1]
  62 define i32 @load_i32_by_i16(ptr %arg) {
  63 ; CHECK-LABEL: load_i32_by_i16:
  64 ; CHECK:       // %bb.0:
  65 ; CHECK-NEXT:    ldr w0, [x0]
  66 ; CHECK-NEXT:    ret
  67   %tmp1 = load i16, ptr %arg, align 4
  68   %tmp2 = zext i16 %tmp1 to i32
  69   %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
  70   %tmp4 = load i16, ptr %tmp3, align 1
  71   %tmp5 = zext i16 %tmp4 to i32
  72   %tmp6 = shl nuw nsw i32 %tmp2, 16
  73   %tmp7 = or i32 %tmp6, %tmp5
  74   ret i32 %tmp7
  75 }
  76
  77 ; ptr p_16; // p_16 is 4 byte aligned
  78 ; ptr p_8 = (ptr) p_16;
  79 ; (i32) (p_16[0] << 16) | ((i32) p[2] << 8) | (i32) p[3]
  80 define i32 @load_i32_by_i16_i8(ptr %arg) {
  81 ; CHECK-LABEL: load_i32_by_i16_i8:
  82 ; CHECK:       // %bb.0:
  83 ; CHECK-NEXT:    ldr w0, [x0]
  84 ; CHECK-NEXT:    ret
  85   %tmp2 = load i16, ptr %arg, align 4
  86   %tmp3 = zext i16 %tmp2 to i32
  87   %tmp4 = shl nuw nsw i32 %tmp3, 16
  88   %tmp5 = getelementptr inbounds i8, ptr %arg, i32 2
  89   %tmp6 = load i8, ptr %tmp5, align 1
  90   %tmp7 = zext i8 %tmp6 to i32
  91   %tmp8 = shl nuw nsw i32 %tmp7, 8
  92   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
  93   %tmp10 = load i8, ptr %tmp9, align 1
  94   %tmp11 = zext i8 %tmp10 to i32
  95   %tmp12 = or i32 %tmp8, %tmp11
  96   %tmp13 = or i32 %tmp12, %tmp4
  97   ret i32 %tmp13
  98 }
  99
 100 ; ptr p; // p is 8 byte aligned
 101 ; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
 102 define i64 @load_i64_by_i8_bswap(ptr %arg) {
 103 ; CHECK-LABEL: load_i64_by_i8_bswap:
 104 ; CHECK:       // %bb.0:
 105 ; CHECK-NEXT:    ldr x8, [x0]
 106 ; CHECK-NEXT:    rev x0, x8
 107 ; CHECK-NEXT:    ret
 108   %tmp1 = load i8, ptr %arg, align 8
 109   %tmp2 = zext i8 %tmp1 to i64
 110   %tmp3 = getelementptr inbounds i8, ptr %arg, i64 1
 111   %tmp4 = load i8, ptr %tmp3, align 1
 112   %tmp5 = zext i8 %tmp4 to i64
 113   %tmp6 = shl nuw nsw i64 %tmp5, 8
 114   %tmp7 = or i64 %tmp6, %tmp2
 115   %tmp8 = getelementptr inbounds i8, ptr %arg, i64 2
 116   %tmp9 = load i8, ptr %tmp8, align 1
 117   %tmp10 = zext i8 %tmp9 to i64
 118   %tmp11 = shl nuw nsw i64 %tmp10, 16
 119   %tmp12 = or i64 %tmp7, %tmp11
 120   %tmp13 = getelementptr inbounds i8, ptr %arg, i64 3
 121   %tmp14 = load i8, ptr %tmp13, align 1
 122   %tmp15 = zext i8 %tmp14 to i64
 123   %tmp16 = shl nuw nsw i64 %tmp15, 24
 124   %tmp17 = or i64 %tmp12, %tmp16
 125   %tmp18 = getelementptr inbounds i8, ptr %arg, i64 4
 126   %tmp19 = load i8, ptr %tmp18, align 1
 127   %tmp20 = zext i8 %tmp19 to i64
 128   %tmp21 = shl nuw nsw i64 %tmp20, 32
 129   %tmp22 = or i64 %tmp17, %tmp21
 130   %tmp23 = getelementptr inbounds i8, ptr %arg, i64 5
 131   %tmp24 = load i8, ptr %tmp23, align 1
 132   %tmp25 = zext i8 %tmp24 to i64
 133   %tmp26 = shl nuw nsw i64 %tmp25, 40
 134   %tmp27 = or i64 %tmp22, %tmp26
 135   %tmp28 = getelementptr inbounds i8, ptr %arg, i64 6
 136   %tmp29 = load i8, ptr %tmp28, align 1
 137   %tmp30 = zext i8 %tmp29 to i64
 138   %tmp31 = shl nuw nsw i64 %tmp30, 48
 139   %tmp32 = or i64 %tmp27, %tmp31
 140   %tmp33 = getelementptr inbounds i8, ptr %arg, i64 7
 141   %tmp34 = load i8, ptr %tmp33, align 1
 142   %tmp35 = zext i8 %tmp34 to i64
 143   %tmp36 = shl nuw i64 %tmp35, 56
 144   %tmp37 = or i64 %tmp32, %tmp36
 145   ret i64 %tmp37
 146 }
 147
 148 ; ptr p; // p is 8 byte aligned
 149 ; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
 150 define i64 @load_i64_by_i8(ptr %arg) {
 151 ; CHECK-LABEL: load_i64_by_i8:
 152 ; CHECK:       // %bb.0:
 153 ; CHECK-NEXT:    ldr x0, [x0]
 154 ; CHECK-NEXT:    ret
 155   %tmp1 = load i8, ptr %arg, align 8
 156   %tmp2 = zext i8 %tmp1 to i64
 157   %tmp3 = shl nuw i64 %tmp2, 56
 158   %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
 159   %tmp5 = load i8, ptr %tmp4, align 1
 160   %tmp6 = zext i8 %tmp5 to i64
 161   %tmp7 = shl nuw nsw i64 %tmp6, 48
 162   %tmp8 = or i64 %tmp7, %tmp3
 163   %tmp9 = getelementptr inbounds i8, ptr %arg, i64 2
 164   %tmp10 = load i8, ptr %tmp9, align 1
 165   %tmp11 = zext i8 %tmp10 to i64
 166   %tmp12 = shl nuw nsw i64 %tmp11, 40
 167   %tmp13 = or i64 %tmp8, %tmp12
 168   %tmp14 = getelementptr inbounds i8, ptr %arg, i64 3
 169   %tmp15 = load i8, ptr %tmp14, align 1
 170   %tmp16 = zext i8 %tmp15 to i64
 171   %tmp17 = shl nuw nsw i64 %tmp16, 32
 172   %tmp18 = or i64 %tmp13, %tmp17
 173   %tmp19 = getelementptr inbounds i8, ptr %arg, i64 4
 174   %tmp20 = load i8, ptr %tmp19, align 1
 175   %tmp21 = zext i8 %tmp20 to i64
 176   %tmp22 = shl nuw nsw i64 %tmp21, 24
 177   %tmp23 = or i64 %tmp18, %tmp22
 178   %tmp24 = getelementptr inbounds i8, ptr %arg, i64 5
 179   %tmp25 = load i8, ptr %tmp24, align 1
 180   %tmp26 = zext i8 %tmp25 to i64
 181   %tmp27 = shl nuw nsw i64 %tmp26, 16
 182   %tmp28 = or i64 %tmp23, %tmp27
 183   %tmp29 = getelementptr inbounds i8, ptr %arg, i64 6
 184   %tmp30 = load i8, ptr %tmp29, align 1
 185   %tmp31 = zext i8 %tmp30 to i64
 186   %tmp32 = shl nuw nsw i64 %tmp31, 8
 187   %tmp33 = or i64 %tmp28, %tmp32
 188   %tmp34 = getelementptr inbounds i8, ptr %arg, i64 7
 189   %tmp35 = load i8, ptr %tmp34, align 1
 190   %tmp36 = zext i8 %tmp35 to i64
 191   %tmp37 = or i64 %tmp33, %tmp36
 192   ret i64 %tmp37
 193 }
 194
 195 ; ptr p; // p[1] is 4 byte aligned
 196 ; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
 197 define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
 198 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
 199 ; CHECK:       // %bb.0:
 200 ; CHECK-NEXT:    ldur w8, [x0, #1]
 201 ; CHECK-NEXT:    rev w0, w8
 202 ; CHECK-NEXT:    ret
 203   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
 204   %tmp2 = load i8, ptr %tmp1, align 4
 205   %tmp3 = zext i8 %tmp2 to i32
 206   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
 207   %tmp5 = load i8, ptr %tmp4, align 1
 208   %tmp6 = zext i8 %tmp5 to i32
 209   %tmp7 = shl nuw nsw i32 %tmp6, 8
 210   %tmp8 = or i32 %tmp7, %tmp3
 211   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
 212   %tmp10 = load i8, ptr %tmp9, align 1
 213   %tmp11 = zext i8 %tmp10 to i32
 214   %tmp12 = shl nuw nsw i32 %tmp11, 16
 215   %tmp13 = or i32 %tmp8, %tmp12
 216   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 4
 217   %tmp15 = load i8, ptr %tmp14, align 1
 218   %tmp16 = zext i8 %tmp15 to i32
 219   %tmp17 = shl nuw nsw i32 %tmp16, 24
 220   %tmp18 = or i32 %tmp13, %tmp17
 221   ret i32 %tmp18
 222 }
 223
 224 ; ptr p; // p[-4] is 4 byte aligned
 225 ; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
 226 define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
 227 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
 228 ; CHECK:       // %bb.0:
 229 ; CHECK-NEXT:    ldur w8, [x0, #-4]
 230 ; CHECK-NEXT:    rev w0, w8
 231 ; CHECK-NEXT:    ret
 232   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -4
 233   %tmp2 = load i8, ptr %tmp1, align 4
 234   %tmp3 = zext i8 %tmp2 to i32
 235   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -3
 236   %tmp5 = load i8, ptr %tmp4, align 1
 237   %tmp6 = zext i8 %tmp5 to i32
 238   %tmp7 = shl nuw nsw i32 %tmp6, 8
 239   %tmp8 = or i32 %tmp7, %tmp3
 240   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -2
 241   %tmp10 = load i8, ptr %tmp9, align 1
 242   %tmp11 = zext i8 %tmp10 to i32
 243   %tmp12 = shl nuw nsw i32 %tmp11, 16
 244   %tmp13 = or i32 %tmp8, %tmp12
 245   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -1
 246   %tmp15 = load i8, ptr %tmp14, align 1
 247   %tmp16 = zext i8 %tmp15 to i32
 248   %tmp17 = shl nuw nsw i32 %tmp16, 24
 249   %tmp18 = or i32 %tmp13, %tmp17
 250   ret i32 %tmp18
 251 }
 252
 253 ; ptr p; // p[1] is 4 byte aligned
 254 ; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
 255 define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
 256 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 257 ; CHECK:       // %bb.0:
 258 ; CHECK-NEXT:    ldur w0, [x0, #1]
 259 ; CHECK-NEXT:    ret
 260   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 4
 261   %tmp2 = load i8, ptr %tmp1, align 1
 262   %tmp3 = zext i8 %tmp2 to i32
 263   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 3
 264   %tmp5 = load i8, ptr %tmp4, align 1
 265   %tmp6 = zext i8 %tmp5 to i32
 266   %tmp7 = shl nuw nsw i32 %tmp6, 8
 267   %tmp8 = or i32 %tmp7, %tmp3
 268   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
 269   %tmp10 = load i8, ptr %tmp9, align 1
 270   %tmp11 = zext i8 %tmp10 to i32
 271   %tmp12 = shl nuw nsw i32 %tmp11, 16
 272   %tmp13 = or i32 %tmp8, %tmp12
 273   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 1
 274   %tmp15 = load i8, ptr %tmp14, align 4
 275   %tmp16 = zext i8 %tmp15 to i32
 276   %tmp17 = shl nuw nsw i32 %tmp16, 24
 277   %tmp18 = or i32 %tmp13, %tmp17
 278   ret i32 %tmp18
 279 }
 280
 281 ; ptr p; // p[-4] is 4 byte aligned
 282 ; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
 283 define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
 284 ; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
 285 ; CHECK:       // %bb.0:
 286 ; CHECK-NEXT:    ldur w0, [x0, #-4]
 287 ; CHECK-NEXT:    ret
 288   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -1
 289   %tmp2 = load i8, ptr %tmp1, align 1
 290   %tmp3 = zext i8 %tmp2 to i32
 291   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -2
 292   %tmp5 = load i8, ptr %tmp4, align 1
 293   %tmp6 = zext i8 %tmp5 to i32
 294   %tmp7 = shl nuw nsw i32 %tmp6, 8
 295   %tmp8 = or i32 %tmp7, %tmp3
 296   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -3
 297   %tmp10 = load i8, ptr %tmp9, align 1
 298   %tmp11 = zext i8 %tmp10 to i32
 299   %tmp12 = shl nuw nsw i32 %tmp11, 16
 300   %tmp13 = or i32 %tmp8, %tmp12
 301   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -4
 302   %tmp15 = load i8, ptr %tmp14, align 4
 303   %tmp16 = zext i8 %tmp15 to i32
 304   %tmp17 = shl nuw nsw i32 %tmp16, 24
 305   %tmp18 = or i32 %tmp13, %tmp17
 306   ret i32 %tmp18
 307 }
 308
 309 declare i16 @llvm.bswap.i16(i16)
 310
 311 ; ptr p; // p is 4 byte aligned
 312 ; (i32) bswap(p[0]) | (i32) bswap(p[1] << 16)
 313 define i32 @load_i32_by_bswap_i16(ptr %arg) {
 314 ; CHECK-LABEL: load_i32_by_bswap_i16:
 315 ; CHECK:       // %bb.0:
 316 ; CHECK-NEXT:    ldr w8, [x0]
 317 ; CHECK-NEXT:    rev w0, w8
 318 ; CHECK-NEXT:    ret
 319   %tmp1 = load i16, ptr %arg, align 4
 320   %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
 321   %tmp2 = zext i16 %tmp11 to i32
 322   %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
 323   %tmp4 = load i16, ptr %tmp3, align 1
 324   %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
 325   %tmp5 = zext i16 %tmp41 to i32
 326   %tmp6 = shl nuw nsw i32 %tmp5, 16
 327   %tmp7 = or i32 %tmp6, %tmp2
 328   ret i32 %tmp7
 329 }
 330
 331 ; ptr p; // p is 4 byte aligned
 332 ; (i32) p[1] | (sext(p[0] << 16) to i32)
 333 define i32 @load_i32_by_sext_i16(ptr %arg) {
 334 ; CHECK-LABEL: load_i32_by_sext_i16:
 335 ; CHECK:       // %bb.0:
 336 ; CHECK-NEXT:    ldr w0, [x0]
 337 ; CHECK-NEXT:    ret
 338   %tmp1 = load i16, ptr %arg, align 4
 339   %tmp2 = sext i16 %tmp1 to i32
 340   %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
 341   %tmp4 = load i16, ptr %tmp3, align 1
 342   %tmp5 = zext i16 %tmp4 to i32
 343   %tmp6 = shl nuw nsw i32 %tmp2, 16
 344   %tmp7 = or i32 %tmp6, %tmp5
 345   ret i32 %tmp7
 346 }
 347
 348 ; ptr arg; i32 i;
 349 ; p = arg + 12;
 350 ; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
 351 define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
 352 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
 353 ; CHECK:       // %bb.0:
 354 ; CHECK-NEXT:    add x8, x0, w1, uxtw
 355 ; CHECK-NEXT:    ldr w8, [x8, #12]
 356 ; CHECK-NEXT:    rev w0, w8
 357 ; CHECK-NEXT:    ret
 358   %tmp = add nuw nsw i32 %i, 3
 359   %tmp2 = add nuw nsw i32 %i, 2
 360   %tmp3 = add nuw nsw i32 %i, 1
 361   %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
 362   %tmp5 = zext i32 %i to i64
 363   %tmp6 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp5
 364   %tmp7 = load i8, ptr %tmp6, align 4
 365   %tmp8 = zext i8 %tmp7 to i32
 366   %tmp9 = zext i32 %tmp3 to i64
 367   %tmp10 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp9
 368   %tmp11 = load i8, ptr %tmp10, align 1
 369   %tmp12 = zext i8 %tmp11 to i32
 370   %tmp13 = shl nuw nsw i32 %tmp12, 8
 371   %tmp14 = or i32 %tmp13, %tmp8
 372   %tmp15 = zext i32 %tmp2 to i64
 373   %tmp16 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp15
 374   %tmp17 = load i8, ptr %tmp16, align 1
 375   %tmp18 = zext i8 %tmp17 to i32
 376   %tmp19 = shl nuw nsw i32 %tmp18, 16
 377   %tmp20 = or i32 %tmp14, %tmp19
 378   %tmp21 = zext i32 %tmp to i64
 379   %tmp22 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp21
 380   %tmp23 = load i8, ptr %tmp22, align 1
 381   %tmp24 = zext i8 %tmp23 to i32
 382   %tmp25 = shl nuw i32 %tmp24, 24
 383   %tmp26 = or i32 %tmp20, %tmp25
 384   ret i32 %tmp26
 385 }
 386
 387 ; ptr arg; i32 i;
 388 ; p = arg + 12;
 389 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
 390 define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
 391 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
 392 ; CHECK:       // %bb.0:
 393 ; CHECK-NEXT:    add x8, x0, w1, uxtw
 394 ; CHECK-NEXT:    ldur w8, [x8, #13]
 395 ; CHECK-NEXT:    rev w0, w8
 396 ; CHECK-NEXT:    ret
 397   %tmp = add nuw nsw i32 %i, 4
 398   %tmp2 = add nuw nsw i32 %i, 3
 399   %tmp3 = add nuw nsw i32 %i, 2
 400   %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
 401   %tmp5 = add nuw nsw i32 %i, 1
 402   %tmp27 = zext i32 %tmp5 to i64
 403   %tmp28 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp27
 404   %tmp29 = load i8, ptr %tmp28, align 4
 405   %tmp30 = zext i8 %tmp29 to i32
 406   %tmp31 = zext i32 %tmp3 to i64
 407   %tmp32 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp31
 408   %tmp33 = load i8, ptr %tmp32, align 1
 409   %tmp34 = zext i8 %tmp33 to i32
 410   %tmp35 = shl nuw nsw i32 %tmp34, 8
 411   %tmp36 = or i32 %tmp35, %tmp30
 412   %tmp37 = zext i32 %tmp2 to i64
 413   %tmp38 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp37
 414   %tmp39 = load i8, ptr %tmp38, align 1
 415   %tmp40 = zext i8 %tmp39 to i32
 416   %tmp41 = shl nuw nsw i32 %tmp40, 16
 417   %tmp42 = or i32 %tmp36, %tmp41
 418   %tmp43 = zext i32 %tmp to i64
 419   %tmp44 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp43
 420   %tmp45 = load i8, ptr %tmp44, align 1
 421   %tmp46 = zext i8 %tmp45 to i32
 422   %tmp47 = shl nuw i32 %tmp46, 24
 423   %tmp48 = or i32 %tmp42, %tmp47
 424   ret i32 %tmp48
 425 }
 426 ; ptr p; // p is 2 byte aligned
 427 ; (i32) p[0] | ((i32) p[1] << 8)
 428 define i32 @zext_load_i32_by_i8(ptr %arg) {
 429 ; CHECK-LABEL: zext_load_i32_by_i8:
 430 ; CHECK:       // %bb.0:
 431 ; CHECK-NEXT:    ldrh w8, [x0]
 432 ; CHECK-NEXT:    rev16 w0, w8
 433 ; CHECK-NEXT:    ret
 434   %tmp2 = load i8, ptr %arg, align 2
 435   %tmp3 = zext i8 %tmp2 to i32
 436   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
 437   %tmp5 = load i8, ptr %tmp4, align 1
 438   %tmp6 = zext i8 %tmp5 to i32
 439   %tmp7 = shl nuw nsw i32 %tmp6, 8
 440   %tmp8 = or i32 %tmp7, %tmp3
 441   ret i32 %tmp8
 442 }
 443
 444 ; ptr p; // p is 2 byte aligned
 445 ; ((i32) p[0] << 8) | ((i32) p[1] << 16)
 446 define i32 @zext_load_i32_by_i8_shl_8(ptr %arg) {
 447 ; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
 448 ; CHECK:       // %bb.0:
 449 ; CHECK-NEXT:    ldrb w8, [x0]
 450 ; CHECK-NEXT:    ldrb w9, [x0, #1]
 451 ; CHECK-NEXT:    lsl w8, w8, #8
 452 ; CHECK-NEXT:    orr w0, w8, w9, lsl #16
 453 ; CHECK-NEXT:    ret
 454   %tmp2 = load i8, ptr %arg, align 2
 455   %tmp3 = zext i8 %tmp2 to i32
 456   %tmp30 = shl nuw nsw i32 %tmp3, 8
 457   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
 458   %tmp5 = load i8, ptr %tmp4, align 1
 459   %tmp6 = zext i8 %tmp5 to i32
 460   %tmp7 = shl nuw nsw i32 %tmp6, 16
 461   %tmp8 = or i32 %tmp7, %tmp30
 462   ret i32 %tmp8
 463 }
 464
 465 ; ptr p; // p is 2 byte aligned
 466 ; ((i32) p[0] << 16) | ((i32) p[1] << 24)
 467 define i32 @zext_load_i32_by_i8_shl_16(ptr %arg) {
 468 ; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
 469 ; CHECK:       // %bb.0:
 470 ; CHECK-NEXT:    ldrb w8, [x0]
 471 ; CHECK-NEXT:    ldrb w9, [x0, #1]
 472 ; CHECK-NEXT:    lsl w8, w8, #16
 473 ; CHECK-NEXT:    orr w0, w8, w9, lsl #24
 474 ; CHECK-NEXT:    ret
 475   %tmp2 = load i8, ptr %arg, align 2
 476   %tmp3 = zext i8 %tmp2 to i32
 477   %tmp30 = shl nuw nsw i32 %tmp3, 16
 478   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
 479   %tmp5 = load i8, ptr %tmp4, align 1
 480   %tmp6 = zext i8 %tmp5 to i32
 481   %tmp7 = shl nuw nsw i32 %tmp6, 24
 482   %tmp8 = or i32 %tmp7, %tmp30
 483   ret i32 %tmp8
 484 }
 485 ; ptr p; // p is 2 byte aligned
 486 ; (i32) p[1] | ((i32) p[0] << 8)
 487 define i32 @zext_load_i32_by_i8_bswap(ptr %arg) {
 488 ; CHECK-LABEL: zext_load_i32_by_i8_bswap:
 489 ; CHECK:       // %bb.0:
 490 ; CHECK-NEXT:    ldrh w0, [x0]
 491 ; CHECK-NEXT:    ret
 492   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
 493   %tmp2 = load i8, ptr %tmp1, align 1
 494   %tmp3 = zext i8 %tmp2 to i32
 495   %tmp5 = load i8, ptr %arg, align 2
 496   %tmp6 = zext i8 %tmp5 to i32
 497   %tmp7 = shl nuw nsw i32 %tmp6, 8
 498   %tmp8 = or i32 %tmp7, %tmp3
 499   ret i32 %tmp8
 500 }
 501
 502 ; ptr p; // p is 2 byte aligned
 503 ; ((i32) p[1] << 8) | ((i32) p[0] << 16)
 504 define i32 @zext_load_i32_by_i8_bswap_shl_8(ptr %arg) {
 505 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
 506 ; CHECK:       // %bb.0:
 507 ; CHECK-NEXT:    ldrb w8, [x0, #1]
 508 ; CHECK-NEXT:    ldrb w9, [x0]
 509 ; CHECK-NEXT:    lsl w8, w8, #8
 510 ; CHECK-NEXT:    orr w0, w8, w9, lsl #16
 511 ; CHECK-NEXT:    ret
 512   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
 513   %tmp2 = load i8, ptr %tmp1, align 1
 514   %tmp3 = zext i8 %tmp2 to i32
 515   %tmp30 = shl nuw nsw i32 %tmp3, 8
 516   %tmp5 = load i8, ptr %arg, align 2
 517   %tmp6 = zext i8 %tmp5 to i32
 518   %tmp7 = shl nuw nsw i32 %tmp6, 16
 519   %tmp8 = or i32 %tmp7, %tmp30
 520   ret i32 %tmp8
 521 }
 522
 523 ; ptr p; // p is 2 byte aligned
 524 ; ((i32) p[1] << 16) | ((i32) p[0] << 24)
 525 define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
 526 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
 527 ; CHECK:       // %bb.0:
 528 ; CHECK-NEXT:    ldrb w8, [x0, #1]
 529 ; CHECK-NEXT:    ldrb w9, [x0]
 530 ; CHECK-NEXT:    lsl w8, w8, #16
 531 ; CHECK-NEXT:    orr w0, w8, w9, lsl #24
 532 ; CHECK-NEXT:    ret
 533   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
 534   %tmp2 = load i8, ptr %tmp1, align 1
 535   %tmp3 = zext i8 %tmp2 to i32
 536   %tmp30 = shl nuw nsw i32 %tmp3, 16
 537   %tmp5 = load i8, ptr %arg, align 2
 538   %tmp6 = zext i8 %tmp5 to i32
 539   %tmp7 = shl nuw nsw i32 %tmp6, 24
 540   %tmp8 = or i32 %tmp7, %tmp30
 541   ret i32 %tmp8
 542 }
 543
 544 ; ptr p;
 545 ; ptr p1.i16 = (ptr) p;
 546 ; (p1.i16[0] << 8) | ((i16) p[2])
 547 ;
 548 ; This is essentialy a i16 load from p[1], but we don't fold the pattern now
 549 ; because in the original DAG we don't have p[1] address available
 550 define i16 @load_i16_from_nonzero_offset(ptr %p) {
 551 ; CHECK-LABEL: load_i16_from_nonzero_offset:
 552 ; CHECK:       // %bb.0:
 553 ; CHECK-NEXT:    ldrh w8, [x0]
 554 ; CHECK-NEXT:    ldrb w9, [x0, #2]
 555 ; CHECK-NEXT:    orr w0, w9, w8, lsl #8
 556 ; CHECK-NEXT:    ret
 557   %p2.i8 = getelementptr i8, ptr %p, i64 2
 558   %v1 = load i16, ptr %p
 559   %v2.i8 = load i8, ptr %p2.i8
 560   %v2 = zext i8 %v2.i8 to i16
 561   %v1.shl = shl i16 %v1, 8
 562   %res = or i16 %v1.shl, %v2
 563   ret i16 %res
 564 }