llvm/test/CodeGen/AArch64/load-combine.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=arm64-unknown | FileCheck %s
   3
   4 ; i8* p; // p is 1 byte aligned
   5 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
   6 define i32 @load_i32_by_i8_unaligned(i32* %arg) {
   7 ; CHECK-LABEL: load_i32_by_i8_unaligned:
   8 ; CHECK:       // %bb.0:
   9 ; CHECK-NEXT:    ldr w0, [x0]
  10 ; CHECK-NEXT:    ret
  11   %tmp = bitcast i32* %arg to i8*
  12   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
  13   %tmp2 = load i8, i8* %tmp1, align 1
  14   %tmp3 = zext i8 %tmp2 to i32
  15   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
  16   %tmp5 = load i8, i8* %tmp4, align 1
  17   %tmp6 = zext i8 %tmp5 to i32
  18   %tmp7 = shl nuw nsw i32 %tmp6, 8
  19   %tmp8 = or i32 %tmp7, %tmp3
  20   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
  21   %tmp10 = load i8, i8* %tmp9, align 1
  22   %tmp11 = zext i8 %tmp10 to i32
  23   %tmp12 = shl nuw nsw i32 %tmp11, 16
  24   %tmp13 = or i32 %tmp8, %tmp12
  25   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
  26   %tmp15 = load i8, i8* %tmp14, align 1
  27   %tmp16 = zext i8 %tmp15 to i32
  28   %tmp17 = shl nuw nsw i32 %tmp16, 24
  29   %tmp18 = or i32 %tmp13, %tmp17
  30   ret i32 %tmp18
  31 }
  32
  33 ; i8* p; // p is 4 byte aligned
  34 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
  35 define i32 @load_i32_by_i8_aligned(i32* %arg) {
  36 ; CHECK-LABEL: load_i32_by_i8_aligned:
  37 ; CHECK:       // %bb.0:
  38 ; CHECK-NEXT:    ldr w0, [x0]
  39 ; CHECK-NEXT:    ret
  40   %tmp = bitcast i32* %arg to i8*
  41   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
  42   %tmp2 = load i8, i8* %tmp1, align 4
  43   %tmp3 = zext i8 %tmp2 to i32
  44   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
  45   %tmp5 = load i8, i8* %tmp4, align 1
  46   %tmp6 = zext i8 %tmp5 to i32
  47   %tmp7 = shl nuw nsw i32 %tmp6, 8
  48   %tmp8 = or i32 %tmp7, %tmp3
  49   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
  50   %tmp10 = load i8, i8* %tmp9, align 1
  51   %tmp11 = zext i8 %tmp10 to i32
  52   %tmp12 = shl nuw nsw i32 %tmp11, 16
  53   %tmp13 = or i32 %tmp8, %tmp12
  54   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
  55   %tmp15 = load i8, i8* %tmp14, align 1
  56   %tmp16 = zext i8 %tmp15 to i32
  57   %tmp17 = shl nuw nsw i32 %tmp16, 24
  58   %tmp18 = or i32 %tmp13, %tmp17
  59   ret i32 %tmp18
  60 }
  61
  62 ; i8* p; // p is 4 byte aligned
  63 ; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
  64 define i32 @load_i32_by_i8_bswap(i32* %arg) {
  65 ; CHECK-LABEL: load_i32_by_i8_bswap:
  66 ; CHECK:       // %bb.0:
  67 ; CHECK-NEXT:    ldr w8, [x0]
  68 ; CHECK-NEXT:    rev w0, w8
  69 ; CHECK-NEXT:    ret
  70   %tmp = bitcast i32* %arg to i8*
  71   %tmp1 = load i8, i8* %tmp, align 4
  72   %tmp2 = zext i8 %tmp1 to i32
  73   %tmp3 = shl nuw nsw i32 %tmp2, 24
  74   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
  75   %tmp5 = load i8, i8* %tmp4, align 1
  76   %tmp6 = zext i8 %tmp5 to i32
  77   %tmp7 = shl nuw nsw i32 %tmp6, 16
  78   %tmp8 = or i32 %tmp7, %tmp3
  79   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
  80   %tmp10 = load i8, i8* %tmp9, align 1
  81   %tmp11 = zext i8 %tmp10 to i32
  82   %tmp12 = shl nuw nsw i32 %tmp11, 8
  83   %tmp13 = or i32 %tmp8, %tmp12
  84   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
  85   %tmp15 = load i8, i8* %tmp14, align 1
  86   %tmp16 = zext i8 %tmp15 to i32
  87   %tmp17 = or i32 %tmp13, %tmp16
  88   ret i32 %tmp17
  89 }
  90
  91 ; i8* p; // p is 8 byte aligned
  92 ; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
  93 define i64 @load_i64_by_i8(i64* %arg) {
  94 ; CHECK-LABEL: load_i64_by_i8:
  95 ; CHECK:       // %bb.0:
  96 ; CHECK-NEXT:    ldr x0, [x0]
  97 ; CHECK-NEXT:    ret
  98   %tmp = bitcast i64* %arg to i8*
  99   %tmp1 = load i8, i8* %tmp, align 8
 100   %tmp2 = zext i8 %tmp1 to i64
 101   %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
 102   %tmp4 = load i8, i8* %tmp3, align 1
 103   %tmp5 = zext i8 %tmp4 to i64
 104   %tmp6 = shl nuw nsw i64 %tmp5, 8
 105   %tmp7 = or i64 %tmp6, %tmp2
 106   %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
 107   %tmp9 = load i8, i8* %tmp8, align 1
 108   %tmp10 = zext i8 %tmp9 to i64
 109   %tmp11 = shl nuw nsw i64 %tmp10, 16
 110   %tmp12 = or i64 %tmp7, %tmp11
 111   %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
 112   %tmp14 = load i8, i8* %tmp13, align 1
 113   %tmp15 = zext i8 %tmp14 to i64
 114   %tmp16 = shl nuw nsw i64 %tmp15, 24
 115   %tmp17 = or i64 %tmp12, %tmp16
 116   %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
 117   %tmp19 = load i8, i8* %tmp18, align 1
 118   %tmp20 = zext i8 %tmp19 to i64
 119   %tmp21 = shl nuw nsw i64 %tmp20, 32
 120   %tmp22 = or i64 %tmp17, %tmp21
 121   %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
 122   %tmp24 = load i8, i8* %tmp23, align 1
 123   %tmp25 = zext i8 %tmp24 to i64
 124   %tmp26 = shl nuw nsw i64 %tmp25, 40
 125   %tmp27 = or i64 %tmp22, %tmp26
 126   %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
 127   %tmp29 = load i8, i8* %tmp28, align 1
 128   %tmp30 = zext i8 %tmp29 to i64
 129   %tmp31 = shl nuw nsw i64 %tmp30, 48
 130   %tmp32 = or i64 %tmp27, %tmp31
 131   %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
 132   %tmp34 = load i8, i8* %tmp33, align 1
 133   %tmp35 = zext i8 %tmp34 to i64
 134   %tmp36 = shl nuw i64 %tmp35, 56
 135   %tmp37 = or i64 %tmp32, %tmp36
 136   ret i64 %tmp37
 137 }
 138
 139 ; i8* p; // p is 8 byte aligned
 140 ; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
 141 define i64 @load_i64_by_i8_bswap(i64* %arg) {
 142 ; CHECK-LABEL: load_i64_by_i8_bswap:
 143 ; CHECK:       // %bb.0:
 144 ; CHECK-NEXT:    ldr x8, [x0]
 145 ; CHECK-NEXT:    rev x0, x8
 146 ; CHECK-NEXT:    ret
 147   %tmp = bitcast i64* %arg to i8*
 148   %tmp1 = load i8, i8* %tmp, align 8
 149   %tmp2 = zext i8 %tmp1 to i64
 150   %tmp3 = shl nuw i64 %tmp2, 56
 151   %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
 152   %tmp5 = load i8, i8* %tmp4, align 1
 153   %tmp6 = zext i8 %tmp5 to i64
 154   %tmp7 = shl nuw nsw i64 %tmp6, 48
 155   %tmp8 = or i64 %tmp7, %tmp3
 156   %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
 157   %tmp10 = load i8, i8* %tmp9, align 1
 158   %tmp11 = zext i8 %tmp10 to i64
 159   %tmp12 = shl nuw nsw i64 %tmp11, 40
 160   %tmp13 = or i64 %tmp8, %tmp12
 161   %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
 162   %tmp15 = load i8, i8* %tmp14, align 1
 163   %tmp16 = zext i8 %tmp15 to i64
 164   %tmp17 = shl nuw nsw i64 %tmp16, 32
 165   %tmp18 = or i64 %tmp13, %tmp17
 166   %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
 167   %tmp20 = load i8, i8* %tmp19, align 1
 168   %tmp21 = zext i8 %tmp20 to i64
 169   %tmp22 = shl nuw nsw i64 %tmp21, 24
 170   %tmp23 = or i64 %tmp18, %tmp22
 171   %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
 172   %tmp25 = load i8, i8* %tmp24, align 1
 173   %tmp26 = zext i8 %tmp25 to i64
 174   %tmp27 = shl nuw nsw i64 %tmp26, 16
 175   %tmp28 = or i64 %tmp23, %tmp27
 176   %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
 177   %tmp30 = load i8, i8* %tmp29, align 1
 178   %tmp31 = zext i8 %tmp30 to i64
 179   %tmp32 = shl nuw nsw i64 %tmp31, 8
 180   %tmp33 = or i64 %tmp28, %tmp32
 181   %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
 182   %tmp35 = load i8, i8* %tmp34, align 1
 183   %tmp36 = zext i8 %tmp35 to i64
 184   %tmp37 = or i64 %tmp33, %tmp36
 185   ret i64 %tmp37
 186 }
 187
 188 ; i8* p; // p[1] is 4 byte aligned
 189 ; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
 190 define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
 191 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
 192 ; CHECK:       // %bb.0:
 193 ; CHECK-NEXT:    ldur w0, [x0, #1]
 194 ; CHECK-NEXT:    ret
 195
 196   %tmp = bitcast i32* %arg to i8*
 197   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
 198   %tmp2 = load i8, i8* %tmp1, align 4
 199   %tmp3 = zext i8 %tmp2 to i32
 200   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
 201   %tmp5 = load i8, i8* %tmp4, align 1
 202   %tmp6 = zext i8 %tmp5 to i32
 203   %tmp7 = shl nuw nsw i32 %tmp6, 8
 204   %tmp8 = or i32 %tmp7, %tmp3
 205   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
 206   %tmp10 = load i8, i8* %tmp9, align 1
 207   %tmp11 = zext i8 %tmp10 to i32
 208   %tmp12 = shl nuw nsw i32 %tmp11, 16
 209   %tmp13 = or i32 %tmp8, %tmp12
 210   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
 211   %tmp15 = load i8, i8* %tmp14, align 1
 212   %tmp16 = zext i8 %tmp15 to i32
 213   %tmp17 = shl nuw nsw i32 %tmp16, 24
 214   %tmp18 = or i32 %tmp13, %tmp17
 215   ret i32 %tmp18
 216 }
 217
 218 ; i8* p; // p[-4] is 4 byte aligned
 219 ; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
 220 define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
 221 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
 222 ; CHECK:       // %bb.0:
 223 ; CHECK-NEXT:    ldur w0, [x0, #-4]
 224 ; CHECK-NEXT:    ret
 225
 226   %tmp = bitcast i32* %arg to i8*
 227   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
 228   %tmp2 = load i8, i8* %tmp1, align 4
 229   %tmp3 = zext i8 %tmp2 to i32
 230   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
 231   %tmp5 = load i8, i8* %tmp4, align 1
 232   %tmp6 = zext i8 %tmp5 to i32
 233   %tmp7 = shl nuw nsw i32 %tmp6, 8
 234   %tmp8 = or i32 %tmp7, %tmp3
 235   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
 236   %tmp10 = load i8, i8* %tmp9, align 1
 237   %tmp11 = zext i8 %tmp10 to i32
 238   %tmp12 = shl nuw nsw i32 %tmp11, 16
 239   %tmp13 = or i32 %tmp8, %tmp12
 240   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
 241   %tmp15 = load i8, i8* %tmp14, align 1
 242   %tmp16 = zext i8 %tmp15 to i32
 243   %tmp17 = shl nuw nsw i32 %tmp16, 24
 244   %tmp18 = or i32 %tmp13, %tmp17
 245   ret i32 %tmp18
 246 }
 247
 248 ; i8* p; // p[1] is 4 byte aligned
 249 ; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
 250 define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
 251 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 252 ; CHECK:       // %bb.0:
 253 ; CHECK-NEXT:    ldur w8, [x0, #1]
 254 ; CHECK-NEXT:    rev w0, w8
 255 ; CHECK-NEXT:    ret
 256
 257   %tmp = bitcast i32* %arg to i8*
 258   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
 259   %tmp2 = load i8, i8* %tmp1, align 1
 260   %tmp3 = zext i8 %tmp2 to i32
 261   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
 262   %tmp5 = load i8, i8* %tmp4, align 1
 263   %tmp6 = zext i8 %tmp5 to i32
 264   %tmp7 = shl nuw nsw i32 %tmp6, 8
 265   %tmp8 = or i32 %tmp7, %tmp3
 266   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
 267   %tmp10 = load i8, i8* %tmp9, align 1
 268   %tmp11 = zext i8 %tmp10 to i32
 269   %tmp12 = shl nuw nsw i32 %tmp11, 16
 270   %tmp13 = or i32 %tmp8, %tmp12
 271   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
 272   %tmp15 = load i8, i8* %tmp14, align 4
 273   %tmp16 = zext i8 %tmp15 to i32
 274   %tmp17 = shl nuw nsw i32 %tmp16, 24
 275   %tmp18 = or i32 %tmp13, %tmp17
 276   ret i32 %tmp18
 277 }
 278
 279 ; i8* p; // p[-4] is 4 byte aligned
 280 ; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
 281 define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
 282 ; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
 283 ; CHECK:       // %bb.0:
 284 ; CHECK-NEXT:    ldur w8, [x0, #-4]
 285 ; CHECK-NEXT:    rev w0, w8
 286 ; CHECK-NEXT:    ret
 287
 288   %tmp = bitcast i32* %arg to i8*
 289   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
 290   %tmp2 = load i8, i8* %tmp1, align 1
 291   %tmp3 = zext i8 %tmp2 to i32
 292   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
 293   %tmp5 = load i8, i8* %tmp4, align 1
 294   %tmp6 = zext i8 %tmp5 to i32
 295   %tmp7 = shl nuw nsw i32 %tmp6, 8
 296   %tmp8 = or i32 %tmp7, %tmp3
 297   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
 298   %tmp10 = load i8, i8* %tmp9, align 1
 299   %tmp11 = zext i8 %tmp10 to i32
 300   %tmp12 = shl nuw nsw i32 %tmp11, 16
 301   %tmp13 = or i32 %tmp8, %tmp12
 302   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
 303   %tmp15 = load i8, i8* %tmp14, align 4
 304   %tmp16 = zext i8 %tmp15 to i32
 305   %tmp17 = shl nuw nsw i32 %tmp16, 24
 306   %tmp18 = or i32 %tmp13, %tmp17
 307   ret i32 %tmp18
 308 }
 309
 310 declare i16 @llvm.bswap.i16(i16)
 311
 312 ; i16* p; // p is 4 byte aligned
 313 ; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
 314 define i32 @load_i32_by_bswap_i16(i32* %arg) {
 315 ; CHECK-LABEL: load_i32_by_bswap_i16:
 316 ; CHECK:       // %bb.0:
 317 ; CHECK-NEXT:    ldr w8, [x0]
 318 ; CHECK-NEXT:    rev w0, w8
 319 ; CHECK-NEXT:    ret
 320
 321   %tmp = bitcast i32* %arg to i16*
 322   %tmp1 = load i16, i16* %tmp, align 4
 323   %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
 324   %tmp2 = zext i16 %tmp11 to i32
 325   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
 326   %tmp4 = load i16, i16* %tmp3, align 1
 327   %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
 328   %tmp5 = zext i16 %tmp41 to i32
 329   %tmp6 = shl nuw nsw i32 %tmp2, 16
 330   %tmp7 = or i32 %tmp6, %tmp5
 331   ret i32 %tmp7
 332 }
 333
 334 ; i16* p; // p is 4 byte aligned
 335 ; (i32) p[0] | (sext(p[1] << 16) to i32)
 336 define i32 @load_i32_by_sext_i16(i32* %arg) {
 337 ; CHECK-LABEL: load_i32_by_sext_i16:
 338 ; CHECK:       // %bb.0:
 339 ; CHECK-NEXT:    ldr w0, [x0]
 340 ; CHECK-NEXT:    ret
 341   %tmp = bitcast i32* %arg to i16*
 342   %tmp1 = load i16, i16* %tmp, align 4
 343   %tmp2 = zext i16 %tmp1 to i32
 344   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
 345   %tmp4 = load i16, i16* %tmp3, align 1
 346   %tmp5 = sext i16 %tmp4 to i32
 347   %tmp6 = shl nuw nsw i32 %tmp5, 16
 348   %tmp7 = or i32 %tmp6, %tmp2
 349   ret i32 %tmp7
 350 }
 351
 352 ; i8* arg; i32 i;
 353 ; p = arg + 12;
 354 ; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
 355 define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
 356 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
 357 ; CHECK:       // %bb.0:
 358 ; CHECK-NEXT:    add x8, x0, w1, uxtw
 359 ; CHECK-NEXT:    ldr w0, [x8, #12]
 360 ; CHECK-NEXT:    ret
 361   %tmp = add nuw nsw i32 %i, 3
 362   %tmp2 = add nuw nsw i32 %i, 2
 363   %tmp3 = add nuw nsw i32 %i, 1
 364   %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
 365   %tmp5 = zext i32 %i to i64
 366   %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
 367   %tmp7 = load i8, i8* %tmp6, align 4
 368   %tmp8 = zext i8 %tmp7 to i32
 369   %tmp9 = zext i32 %tmp3 to i64
 370   %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
 371   %tmp11 = load i8, i8* %tmp10, align 1
 372   %tmp12 = zext i8 %tmp11 to i32
 373   %tmp13 = shl nuw nsw i32 %tmp12, 8
 374   %tmp14 = or i32 %tmp13, %tmp8
 375   %tmp15 = zext i32 %tmp2 to i64
 376   %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
 377   %tmp17 = load i8, i8* %tmp16, align 1
 378   %tmp18 = zext i8 %tmp17 to i32
 379   %tmp19 = shl nuw nsw i32 %tmp18, 16
 380   %tmp20 = or i32 %tmp14, %tmp19
 381   %tmp21 = zext i32 %tmp to i64
 382   %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
 383   %tmp23 = load i8, i8* %tmp22, align 1
 384   %tmp24 = zext i8 %tmp23 to i32
 385   %tmp25 = shl nuw i32 %tmp24, 24
 386   %tmp26 = or i32 %tmp20, %tmp25
 387   ret i32 %tmp26
 388 }
 389
 390 ; i8* arg; i32 i;
 391 ; p = arg + 12;
 392 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
 393 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
 394 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
 395 ; CHECK:       // %bb.0:
 396 ; CHECK-NEXT:    add x8, x0, w1, uxtw
 397 ; CHECK-NEXT:    ldur w0, [x8, #13]
 398 ; CHECK-NEXT:    ret
 399   %tmp = add nuw nsw i32 %i, 4
 400   %tmp2 = add nuw nsw i32 %i, 3
 401   %tmp3 = add nuw nsw i32 %i, 2
 402   %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
 403   %tmp5 = add nuw nsw i32 %i, 1
 404   %tmp27 = zext i32 %tmp5 to i64
 405   %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
 406   %tmp29 = load i8, i8* %tmp28, align 4
 407   %tmp30 = zext i8 %tmp29 to i32
 408   %tmp31 = zext i32 %tmp3 to i64
 409   %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
 410   %tmp33 = load i8, i8* %tmp32, align 1
 411   %tmp34 = zext i8 %tmp33 to i32
 412   %tmp35 = shl nuw nsw i32 %tmp34, 8
 413   %tmp36 = or i32 %tmp35, %tmp30
 414   %tmp37 = zext i32 %tmp2 to i64
 415   %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
 416   %tmp39 = load i8, i8* %tmp38, align 1
 417   %tmp40 = zext i8 %tmp39 to i32
 418   %tmp41 = shl nuw nsw i32 %tmp40, 16
 419   %tmp42 = or i32 %tmp36, %tmp41
 420   %tmp43 = zext i32 %tmp to i64
 421   %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
 422   %tmp45 = load i8, i8* %tmp44, align 1
 423   %tmp46 = zext i8 %tmp45 to i32
 424   %tmp47 = shl nuw i32 %tmp46, 24
 425   %tmp48 = or i32 %tmp42, %tmp47
 426   ret i32 %tmp48
 427 }
 428
 429 ; i8* p; // p is 2 byte aligned
 430 ; (i32) p[0] | ((i32) p[1] << 8)
 431 define i32 @zext_load_i32_by_i8(i32* %arg) {
 432 ; CHECK-LABEL: zext_load_i32_by_i8:
 433 ; CHECK:       // %bb.0:
 434 ; CHECK-NEXT:    ldrh w0, [x0]
 435 ; CHECK-NEXT:    ret
 436
 437   %tmp = bitcast i32* %arg to i8*
 438   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
 439   %tmp2 = load i8, i8* %tmp1, align 2
 440   %tmp3 = zext i8 %tmp2 to i32
 441   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
 442   %tmp5 = load i8, i8* %tmp4, align 1
 443   %tmp6 = zext i8 %tmp5 to i32
 444   %tmp7 = shl nuw nsw i32 %tmp6, 8
 445   %tmp8 = or i32 %tmp7, %tmp3
 446   ret i32 %tmp8
 447 }
 448
 449 ; i8* p; // p is 2 byte aligned
 450 ; ((i32) p[0] << 8) | ((i32) p[1] << 16)
 451 define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
 452 ; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
 453 ; CHECK:       // %bb.0:
 454 ; CHECK-NEXT:    ldrb w8, [x0]
 455 ; CHECK-NEXT:    ldrb w9, [x0, #1]
 456 ; CHECK-NEXT:    lsl w0, w8, #8
 457 ; CHECK-NEXT:    bfi w0, w9, #16, #8
 458 ; CHECK-NEXT:    ret
 459
 460   %tmp = bitcast i32* %arg to i8*
 461   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
 462   %tmp2 = load i8, i8* %tmp1, align 2
 463   %tmp3 = zext i8 %tmp2 to i32
 464   %tmp30 = shl nuw nsw i32 %tmp3, 8
 465   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
 466   %tmp5 = load i8, i8* %tmp4, align 1
 467   %tmp6 = zext i8 %tmp5 to i32
 468   %tmp7 = shl nuw nsw i32 %tmp6, 16
 469   %tmp8 = or i32 %tmp7, %tmp30
 470   ret i32 %tmp8
 471 }
 472
 473 ; i8* p; // p is 2 byte aligned
 474 ; ((i32) p[0] << 16) | ((i32) p[1] << 24)
 475 define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
 476 ; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
 477 ; CHECK:       // %bb.0:
 478 ; CHECK-NEXT:    ldrb w8, [x0]
 479 ; CHECK-NEXT:    ldrb w9, [x0, #1]
 480 ; CHECK-NEXT:    lsl w0, w8, #16
 481 ; CHECK-NEXT:    bfi w0, w9, #24, #8
 482 ; CHECK-NEXT:    ret
 483
 484   %tmp = bitcast i32* %arg to i8*
 485   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
 486   %tmp2 = load i8, i8* %tmp1, align 2
 487   %tmp3 = zext i8 %tmp2 to i32
 488   %tmp30 = shl nuw nsw i32 %tmp3, 16
 489   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
 490   %tmp5 = load i8, i8* %tmp4, align 1
 491   %tmp6 = zext i8 %tmp5 to i32
 492   %tmp7 = shl nuw nsw i32 %tmp6, 24
 493   %tmp8 = or i32 %tmp7, %tmp30
 494   ret i32 %tmp8
 495 }
 496 ; i8* p; // p is 2 byte aligned
 497 ; (i32) p[1] | ((i32) p[0] << 8)
 498 define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
 499 ; CHECK-LABEL: zext_load_i32_by_i8_bswap:
 500 ; CHECK:       // %bb.0:
 501 ; CHECK-NEXT:    ldrh w8, [x0]
 502 ; CHECK-NEXT:    lsl w8, w8, #16
 503 ; CHECK-NEXT:    rev w0, w8
 504 ; CHECK-NEXT:    ret
 505
 506   %tmp = bitcast i32* %arg to i8*
 507   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
 508   %tmp2 = load i8, i8* %tmp1, align 1
 509   %tmp3 = zext i8 %tmp2 to i32
 510   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
 511   %tmp5 = load i8, i8* %tmp4, align 2
 512   %tmp6 = zext i8 %tmp5 to i32
 513   %tmp7 = shl nuw nsw i32 %tmp6, 8
 514   %tmp8 = or i32 %tmp7, %tmp3
 515   ret i32 %tmp8
 516 }
 517
 518 ; i8* p; // p is 2 byte aligned
 519 ; ((i32) p[1] << 8) | ((i32) p[0] << 16)
 520 define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
 521 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
 522 ; CHECK:       // %bb.0:
 523 ; CHECK-NEXT:    ldrb w8, [x0, #1]
 524 ; CHECK-NEXT:    ldrb w9, [x0]
 525 ; CHECK-NEXT:    lsl w0, w8, #8
 526 ; CHECK-NEXT:    bfi w0, w9, #16, #8
 527 ; CHECK-NEXT:    ret
 528
 529   %tmp = bitcast i32* %arg to i8*
 530   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
 531   %tmp2 = load i8, i8* %tmp1, align 1
 532   %tmp3 = zext i8 %tmp2 to i32
 533   %tmp30 = shl nuw nsw i32 %tmp3, 8
 534   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
 535   %tmp5 = load i8, i8* %tmp4, align 2
 536   %tmp6 = zext i8 %tmp5 to i32
 537   %tmp7 = shl nuw nsw i32 %tmp6, 16
 538   %tmp8 = or i32 %tmp7, %tmp30
 539   ret i32 %tmp8
 540 }
 541
 542 ; i8* p; // p is 2 byte aligned
 543 ; ((i32) p[1] << 16) | ((i32) p[0] << 24)
 544 define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
 545 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
 546 ; CHECK:       // %bb.0:
 547 ; CHECK-NEXT:    ldrb w8, [x0, #1]
 548 ; CHECK-NEXT:    ldrb w9, [x0]
 549 ; CHECK-NEXT:    lsl w0, w8, #16
 550 ; CHECK-NEXT:    bfi w0, w9, #24, #8
 551 ; CHECK-NEXT:    ret
 552
 553   %tmp = bitcast i32* %arg to i8*
 554   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
 555   %tmp2 = load i8, i8* %tmp1, align 1
 556   %tmp3 = zext i8 %tmp2 to i32
 557   %tmp30 = shl nuw nsw i32 %tmp3, 16
 558   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
 559   %tmp5 = load i8, i8* %tmp4, align 2
 560   %tmp6 = zext i8 %tmp5 to i32
 561   %tmp7 = shl nuw nsw i32 %tmp6, 24
 562   %tmp8 = or i32 %tmp7, %tmp30
 563   ret i32 %tmp8
 564 }