test/CodeGen/X86/load-combine.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP
   3 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE
   4 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64
   5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64
   6
   7 ; i8* p;
   8 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
   9 define i32 @load_i32_by_i8(i32* %arg) {
  10 ; CHECK-LABEL: load_i32_by_i8:
  11 ; CHECK:       # %bb.0:
  12 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
  13 ; CHECK-NEXT:    movl (%eax), %eax
  14 ; CHECK-NEXT:    retl
  15 ;
  16 ; CHECK64-LABEL: load_i32_by_i8:
  17 ; CHECK64:       # %bb.0:
  18 ; CHECK64-NEXT:    movl (%rdi), %eax
  19 ; CHECK64-NEXT:    retq
  20   %tmp = bitcast i32* %arg to i8*
  21   %tmp1 = load i8, i8* %tmp, align 1
  22   %tmp2 = zext i8 %tmp1 to i32
  23   %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
  24   %tmp4 = load i8, i8* %tmp3, align 1
  25   %tmp5 = zext i8 %tmp4 to i32
  26   %tmp6 = shl nuw nsw i32 %tmp5, 8
  27   %tmp7 = or i32 %tmp6, %tmp2
  28   %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
  29   %tmp9 = load i8, i8* %tmp8, align 1
  30   %tmp10 = zext i8 %tmp9 to i32
  31   %tmp11 = shl nuw nsw i32 %tmp10, 16
  32   %tmp12 = or i32 %tmp7, %tmp11
  33   %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3
  34   %tmp14 = load i8, i8* %tmp13, align 1
  35   %tmp15 = zext i8 %tmp14 to i32
  36   %tmp16 = shl nuw nsw i32 %tmp15, 24
  37   %tmp17 = or i32 %tmp12, %tmp16
  38   ret i32 %tmp17
  39 }
  40
  41 ; i8* p;
  42 ; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
  43 define i32 @load_i32_by_i8_bswap(i32* %arg) {
  44 ; BSWAP-LABEL: load_i32_by_i8_bswap:
  45 ; BSWAP:       # %bb.0:
  46 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
  47 ; BSWAP-NEXT:    movl (%eax), %eax
  48 ; BSWAP-NEXT:    bswapl %eax
  49 ; BSWAP-NEXT:    retl
  50 ;
  51 ; MOVBE-LABEL: load_i32_by_i8_bswap:
  52 ; MOVBE:       # %bb.0:
  53 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  54 ; MOVBE-NEXT:    movbel (%eax), %eax
  55 ; MOVBE-NEXT:    retl
  56 ;
  57 ; BSWAP64-LABEL: load_i32_by_i8_bswap:
  58 ; BSWAP64:       # %bb.0:
  59 ; BSWAP64-NEXT:    movl (%rdi), %eax
  60 ; BSWAP64-NEXT:    bswapl %eax
  61 ; BSWAP64-NEXT:    retq
  62 ;
  63 ; MOVBE64-LABEL: load_i32_by_i8_bswap:
  64 ; MOVBE64:       # %bb.0:
  65 ; MOVBE64-NEXT:    movbel (%rdi), %eax
  66 ; MOVBE64-NEXT:    retq
  67   %tmp = bitcast i32* %arg to i8*
  68   %tmp1 = load i8, i8* %tmp, align 1
  69   %tmp2 = zext i8 %tmp1 to i32
  70   %tmp3 = shl nuw nsw i32 %tmp2, 24
  71   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
  72   %tmp5 = load i8, i8* %tmp4, align 1
  73   %tmp6 = zext i8 %tmp5 to i32
  74   %tmp7 = shl nuw nsw i32 %tmp6, 16
  75   %tmp8 = or i32 %tmp7, %tmp3
  76   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
  77   %tmp10 = load i8, i8* %tmp9, align 1
  78   %tmp11 = zext i8 %tmp10 to i32
  79   %tmp12 = shl nuw nsw i32 %tmp11, 8
  80   %tmp13 = or i32 %tmp8, %tmp12
  81   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
  82   %tmp15 = load i8, i8* %tmp14, align 1
  83   %tmp16 = zext i8 %tmp15 to i32
  84   %tmp17 = or i32 %tmp13, %tmp16
  85   ret i32 %tmp17
  86 }
  87
  88 ; i16* p;
  89 ; (i32) p[0] | ((i32) p[1] << 16)
  90 define i32 @load_i32_by_i16(i32* %arg) {
  91 ; CHECK-LABEL: load_i32_by_i16:
  92 ; CHECK:       # %bb.0:
  93 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
  94 ; CHECK-NEXT:    movl (%eax), %eax
  95 ; CHECK-NEXT:    retl
  96 ;
  97 ; CHECK64-LABEL: load_i32_by_i16:
  98 ; CHECK64:       # %bb.0:
  99 ; CHECK64-NEXT:    movl (%rdi), %eax
 100 ; CHECK64-NEXT:    retq
 101   %tmp = bitcast i32* %arg to i16*
 102   %tmp1 = load i16, i16* %tmp, align 1
 103   %tmp2 = zext i16 %tmp1 to i32
 104   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
 105   %tmp4 = load i16, i16* %tmp3, align 1
 106   %tmp5 = zext i16 %tmp4 to i32
 107   %tmp6 = shl nuw nsw i32 %tmp5, 16
 108   %tmp7 = or i32 %tmp6, %tmp2
 109   ret i32 %tmp7
 110 }
 111
 112 ; i16* p_16;
 113 ; i8* p_8 = (i8*) p_16;
 114 ; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
 115 define i32 @load_i32_by_i16_i8(i32* %arg) {
 116 ; CHECK-LABEL: load_i32_by_i16_i8:
 117 ; CHECK:       # %bb.0:
 118 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 119 ; CHECK-NEXT:    movl (%eax), %eax
 120 ; CHECK-NEXT:    retl
 121 ;
 122 ; CHECK64-LABEL: load_i32_by_i16_i8:
 123 ; CHECK64:       # %bb.0:
 124 ; CHECK64-NEXT:    movl (%rdi), %eax
 125 ; CHECK64-NEXT:    retq
 126   %tmp = bitcast i32* %arg to i16*
 127   %tmp1 = bitcast i32* %arg to i8*
 128   %tmp2 = load i16, i16* %tmp, align 1
 129   %tmp3 = zext i16 %tmp2 to i32
 130   %tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2
 131   %tmp5 = load i8, i8* %tmp4, align 1
 132   %tmp6 = zext i8 %tmp5 to i32
 133   %tmp7 = shl nuw nsw i32 %tmp6, 16
 134   %tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3
 135   %tmp9 = load i8, i8* %tmp8, align 1
 136   %tmp10 = zext i8 %tmp9 to i32
 137   %tmp11 = shl nuw nsw i32 %tmp10, 24
 138   %tmp12 = or i32 %tmp7, %tmp11
 139   %tmp13 = or i32 %tmp12, %tmp3
 140   ret i32 %tmp13
 141 }
 142
 143
 144 ; i8* p;
 145 ; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
 146 define i32 @load_i32_by_i16_by_i8(i32* %arg) {
 147 ; CHECK-LABEL: load_i32_by_i16_by_i8:
 148 ; CHECK:       # %bb.0:
 149 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 150 ; CHECK-NEXT:    movl (%eax), %eax
 151 ; CHECK-NEXT:    retl
 152 ;
 153 ; CHECK64-LABEL: load_i32_by_i16_by_i8:
 154 ; CHECK64:       # %bb.0:
 155 ; CHECK64-NEXT:    movl (%rdi), %eax
 156 ; CHECK64-NEXT:    retq
 157   %tmp = bitcast i32* %arg to i8*
 158   %tmp1 = load i8, i8* %tmp, align 1
 159   %tmp2 = zext i8 %tmp1 to i16
 160   %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
 161   %tmp4 = load i8, i8* %tmp3, align 1
 162   %tmp5 = zext i8 %tmp4 to i16
 163   %tmp6 = shl nuw nsw i16 %tmp5, 8
 164   %tmp7 = or i16 %tmp6, %tmp2
 165   %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
 166   %tmp9 = load i8, i8* %tmp8, align 1
 167   %tmp10 = zext i8 %tmp9 to i16
 168   %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
 169   %tmp12 = load i8, i8* %tmp11, align 1
 170   %tmp13 = zext i8 %tmp12 to i16
 171   %tmp14 = shl nuw nsw i16 %tmp13, 8
 172   %tmp15 = or i16 %tmp14, %tmp10
 173   %tmp16 = zext i16 %tmp7 to i32
 174   %tmp17 = zext i16 %tmp15 to i32
 175   %tmp18 = shl nuw nsw i32 %tmp17, 16
 176   %tmp19 = or i32 %tmp18, %tmp16
 177   ret i32 %tmp19
 178 }
 179
 180 ; i8* p;
 181 ; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
 182 define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) {
 183 ; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap:
 184 ; BSWAP:       # %bb.0:
 185 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 186 ; BSWAP-NEXT:    movl (%eax), %eax
 187 ; BSWAP-NEXT:    bswapl %eax
 188 ; BSWAP-NEXT:    retl
 189 ;
 190 ; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap:
 191 ; MOVBE:       # %bb.0:
 192 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 193 ; MOVBE-NEXT:    movbel (%eax), %eax
 194 ; MOVBE-NEXT:    retl
 195 ;
 196 ; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap:
 197 ; BSWAP64:       # %bb.0:
 198 ; BSWAP64-NEXT:    movl (%rdi), %eax
 199 ; BSWAP64-NEXT:    bswapl %eax
 200 ; BSWAP64-NEXT:    retq
 201 ;
 202 ; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap:
 203 ; MOVBE64:       # %bb.0:
 204 ; MOVBE64-NEXT:    movbel (%rdi), %eax
 205 ; MOVBE64-NEXT:    retq
 206   %tmp = bitcast i32* %arg to i8*
 207   %tmp1 = load i8, i8* %tmp, align 1
 208   %tmp2 = zext i8 %tmp1 to i16
 209   %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
 210   %tmp4 = load i8, i8* %tmp3, align 1
 211   %tmp5 = zext i8 %tmp4 to i16
 212   %tmp6 = shl nuw nsw i16 %tmp2, 8
 213   %tmp7 = or i16 %tmp6, %tmp5
 214   %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
 215   %tmp9 = load i8, i8* %tmp8, align 1
 216   %tmp10 = zext i8 %tmp9 to i16
 217   %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
 218   %tmp12 = load i8, i8* %tmp11, align 1
 219   %tmp13 = zext i8 %tmp12 to i16
 220   %tmp14 = shl nuw nsw i16 %tmp10, 8
 221   %tmp15 = or i16 %tmp14, %tmp13
 222   %tmp16 = zext i16 %tmp7 to i32
 223   %tmp17 = zext i16 %tmp15 to i32
 224   %tmp18 = shl nuw nsw i32 %tmp16, 16
 225   %tmp19 = or i32 %tmp18, %tmp17
 226   ret i32 %tmp19
 227 }
 228
 229 ; i8* p;
 230 ; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
 231 define i64 @load_i64_by_i8(i64* %arg) {
 232 ; CHECK-LABEL: load_i64_by_i8:
 233 ; CHECK:       # %bb.0:
 234 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 235 ; CHECK-NEXT:    movl (%ecx), %eax
 236 ; CHECK-NEXT:    movl 4(%ecx), %edx
 237 ; CHECK-NEXT:    retl
 238 ;
 239 ; CHECK64-LABEL: load_i64_by_i8:
 240 ; CHECK64:       # %bb.0:
 241 ; CHECK64-NEXT:    movq (%rdi), %rax
 242 ; CHECK64-NEXT:    retq
 243   %tmp = bitcast i64* %arg to i8*
 244   %tmp1 = load i8, i8* %tmp, align 1
 245   %tmp2 = zext i8 %tmp1 to i64
 246   %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
 247   %tmp4 = load i8, i8* %tmp3, align 1
 248   %tmp5 = zext i8 %tmp4 to i64
 249   %tmp6 = shl nuw nsw i64 %tmp5, 8
 250   %tmp7 = or i64 %tmp6, %tmp2
 251   %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
 252   %tmp9 = load i8, i8* %tmp8, align 1
 253   %tmp10 = zext i8 %tmp9 to i64
 254   %tmp11 = shl nuw nsw i64 %tmp10, 16
 255   %tmp12 = or i64 %tmp7, %tmp11
 256   %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
 257   %tmp14 = load i8, i8* %tmp13, align 1
 258   %tmp15 = zext i8 %tmp14 to i64
 259   %tmp16 = shl nuw nsw i64 %tmp15, 24
 260   %tmp17 = or i64 %tmp12, %tmp16
 261   %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
 262   %tmp19 = load i8, i8* %tmp18, align 1
 263   %tmp20 = zext i8 %tmp19 to i64
 264   %tmp21 = shl nuw nsw i64 %tmp20, 32
 265   %tmp22 = or i64 %tmp17, %tmp21
 266   %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
 267   %tmp24 = load i8, i8* %tmp23, align 1
 268   %tmp25 = zext i8 %tmp24 to i64
 269   %tmp26 = shl nuw nsw i64 %tmp25, 40
 270   %tmp27 = or i64 %tmp22, %tmp26
 271   %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
 272   %tmp29 = load i8, i8* %tmp28, align 1
 273   %tmp30 = zext i8 %tmp29 to i64
 274   %tmp31 = shl nuw nsw i64 %tmp30, 48
 275   %tmp32 = or i64 %tmp27, %tmp31
 276   %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
 277   %tmp34 = load i8, i8* %tmp33, align 1
 278   %tmp35 = zext i8 %tmp34 to i64
 279   %tmp36 = shl nuw i64 %tmp35, 56
 280   %tmp37 = or i64 %tmp32, %tmp36
 281   ret i64 %tmp37
 282 }
 283
 284 ; i8* p;
 285 ; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
 286 define i64 @load_i64_by_i8_bswap(i64* %arg) {
 287 ; BSWAP-LABEL: load_i64_by_i8_bswap:
 288 ; BSWAP:       # %bb.0:
 289 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 290 ; BSWAP-NEXT:    movl (%eax), %edx
 291 ; BSWAP-NEXT:    movl 4(%eax), %eax
 292 ; BSWAP-NEXT:    bswapl %eax
 293 ; BSWAP-NEXT:    bswapl %edx
 294 ; BSWAP-NEXT:    retl
 295 ;
 296 ; MOVBE-LABEL: load_i64_by_i8_bswap:
 297 ; MOVBE:       # %bb.0:
 298 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 299 ; MOVBE-NEXT:    movbel 4(%ecx), %eax
 300 ; MOVBE-NEXT:    movbel (%ecx), %edx
 301 ; MOVBE-NEXT:    retl
 302 ;
 303 ; BSWAP64-LABEL: load_i64_by_i8_bswap:
 304 ; BSWAP64:       # %bb.0:
 305 ; BSWAP64-NEXT:    movq (%rdi), %rax
 306 ; BSWAP64-NEXT:    bswapq %rax
 307 ; BSWAP64-NEXT:    retq
 308 ;
 309 ; MOVBE64-LABEL: load_i64_by_i8_bswap:
 310 ; MOVBE64:       # %bb.0:
 311 ; MOVBE64-NEXT:    movbeq (%rdi), %rax
 312 ; MOVBE64-NEXT:    retq
 313   %tmp = bitcast i64* %arg to i8*
 314   %tmp1 = load i8, i8* %tmp, align 1
 315   %tmp2 = zext i8 %tmp1 to i64
 316   %tmp3 = shl nuw i64 %tmp2, 56
 317   %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
 318   %tmp5 = load i8, i8* %tmp4, align 1
 319   %tmp6 = zext i8 %tmp5 to i64
 320   %tmp7 = shl nuw nsw i64 %tmp6, 48
 321   %tmp8 = or i64 %tmp7, %tmp3
 322   %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
 323   %tmp10 = load i8, i8* %tmp9, align 1
 324   %tmp11 = zext i8 %tmp10 to i64
 325   %tmp12 = shl nuw nsw i64 %tmp11, 40
 326   %tmp13 = or i64 %tmp8, %tmp12
 327   %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
 328   %tmp15 = load i8, i8* %tmp14, align 1
 329   %tmp16 = zext i8 %tmp15 to i64
 330   %tmp17 = shl nuw nsw i64 %tmp16, 32
 331   %tmp18 = or i64 %tmp13, %tmp17
 332   %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
 333   %tmp20 = load i8, i8* %tmp19, align 1
 334   %tmp21 = zext i8 %tmp20 to i64
 335   %tmp22 = shl nuw nsw i64 %tmp21, 24
 336   %tmp23 = or i64 %tmp18, %tmp22
 337   %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
 338   %tmp25 = load i8, i8* %tmp24, align 1
 339   %tmp26 = zext i8 %tmp25 to i64
 340   %tmp27 = shl nuw nsw i64 %tmp26, 16
 341   %tmp28 = or i64 %tmp23, %tmp27
 342   %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
 343   %tmp30 = load i8, i8* %tmp29, align 1
 344   %tmp31 = zext i8 %tmp30 to i64
 345   %tmp32 = shl nuw nsw i64 %tmp31, 8
 346   %tmp33 = or i64 %tmp28, %tmp32
 347   %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
 348   %tmp35 = load i8, i8* %tmp34, align 1
 349   %tmp36 = zext i8 %tmp35 to i64
 350   %tmp37 = or i64 %tmp33, %tmp36
 351   ret i64 %tmp37
 352 }
 353
 354 ; Part of the load by bytes pattern is used outside of the pattern
 355 ; i8* p;
 356 ; i32 x = (i32) p[1]
 357 ; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
 358 ; x | res
 359 define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
 360 ; CHECK-LABEL: load_i32_by_i8_bswap_uses:
 361 ; CHECK:       # %bb.0:
 362 ; CHECK-NEXT:    pushl %esi
 363 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 364 ; CHECK-NEXT:    .cfi_offset %esi, -8
 365 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 366 ; CHECK-NEXT:    movzbl (%eax), %ecx
 367 ; CHECK-NEXT:    shll $24, %ecx
 368 ; CHECK-NEXT:    movzbl 1(%eax), %edx
 369 ; CHECK-NEXT:    movl %edx, %esi
 370 ; CHECK-NEXT:    shll $16, %esi
 371 ; CHECK-NEXT:    orl %ecx, %esi
 372 ; CHECK-NEXT:    movzbl 2(%eax), %ecx
 373 ; CHECK-NEXT:    shll $8, %ecx
 374 ; CHECK-NEXT:    orl %esi, %ecx
 375 ; CHECK-NEXT:    movzbl 3(%eax), %eax
 376 ; CHECK-NEXT:    orl %ecx, %eax
 377 ; CHECK-NEXT:    orl %edx, %eax
 378 ; CHECK-NEXT:    popl %esi
 379 ; CHECK-NEXT:    .cfi_def_cfa_offset 4
 380 ; CHECK-NEXT:    retl
 381 ;
 382 ; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
 383 ; CHECK64:       # %bb.0:
 384 ; CHECK64-NEXT:    movzbl (%rdi), %eax
 385 ; CHECK64-NEXT:    shll $24, %eax
 386 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
 387 ; CHECK64-NEXT:    movl %ecx, %edx
 388 ; CHECK64-NEXT:    shll $16, %edx
 389 ; CHECK64-NEXT:    orl %eax, %edx
 390 ; CHECK64-NEXT:    movzbl 2(%rdi), %esi
 391 ; CHECK64-NEXT:    shll $8, %esi
 392 ; CHECK64-NEXT:    orl %edx, %esi
 393 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
 394 ; CHECK64-NEXT:    orl %esi, %eax
 395 ; CHECK64-NEXT:    orl %ecx, %eax
 396 ; CHECK64-NEXT:    retq
 397   %tmp = bitcast i32* %arg to i8*
 398   %tmp1 = load i8, i8* %tmp, align 1
 399   %tmp2 = zext i8 %tmp1 to i32
 400   %tmp3 = shl nuw nsw i32 %tmp2, 24
 401   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
 402   %tmp5 = load i8, i8* %tmp4, align 1
 403   %tmp6 = zext i8 %tmp5 to i32
 404   %tmp7 = shl nuw nsw i32 %tmp6, 16
 405   %tmp8 = or i32 %tmp7, %tmp3
 406   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
 407   %tmp10 = load i8, i8* %tmp9, align 1
 408   %tmp11 = zext i8 %tmp10 to i32
 409   %tmp12 = shl nuw nsw i32 %tmp11, 8
 410   %tmp13 = or i32 %tmp8, %tmp12
 411   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
 412   %tmp15 = load i8, i8* %tmp14, align 1
 413   %tmp16 = zext i8 %tmp15 to i32
 414   %tmp17 = or i32 %tmp13, %tmp16
 415   ; Use individual part of the pattern outside of the pattern
 416   %tmp18 = or i32 %tmp6, %tmp17
 417   ret i32 %tmp18
 418 }
 419
 420 ; One of the loads is volatile
 421 ; i8* p;
 422 ; p0 = volatile *p;
 423 ; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
 424 define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) {
 425 ; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
 426 ; CHECK:       # %bb.0:
 427 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 428 ; CHECK-NEXT:    movzbl (%eax), %ecx
 429 ; CHECK-NEXT:    shll $24, %ecx
 430 ; CHECK-NEXT:    movzbl 1(%eax), %edx
 431 ; CHECK-NEXT:    shll $16, %edx
 432 ; CHECK-NEXT:    orl %ecx, %edx
 433 ; CHECK-NEXT:    movzbl 2(%eax), %ecx
 434 ; CHECK-NEXT:    shll $8, %ecx
 435 ; CHECK-NEXT:    orl %edx, %ecx
 436 ; CHECK-NEXT:    movzbl 3(%eax), %eax
 437 ; CHECK-NEXT:    orl %ecx, %eax
 438 ; CHECK-NEXT:    retl
 439 ;
 440 ; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
 441 ; CHECK64:       # %bb.0:
 442 ; CHECK64-NEXT:    movzbl (%rdi), %eax
 443 ; CHECK64-NEXT:    shll $24, %eax
 444 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
 445 ; CHECK64-NEXT:    shll $16, %ecx
 446 ; CHECK64-NEXT:    orl %eax, %ecx
 447 ; CHECK64-NEXT:    movzbl 2(%rdi), %edx
 448 ; CHECK64-NEXT:    shll $8, %edx
 449 ; CHECK64-NEXT:    orl %ecx, %edx
 450 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
 451 ; CHECK64-NEXT:    orl %edx, %eax
 452 ; CHECK64-NEXT:    retq
 453   %tmp = bitcast i32* %arg to i8*
 454   %tmp1 = load volatile i8, i8* %tmp, align 1
 455   %tmp2 = zext i8 %tmp1 to i32
 456   %tmp3 = shl nuw nsw i32 %tmp2, 24
 457   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
 458   %tmp5 = load i8, i8* %tmp4, align 1
 459   %tmp6 = zext i8 %tmp5 to i32
 460   %tmp7 = shl nuw nsw i32 %tmp6, 16
 461   %tmp8 = or i32 %tmp7, %tmp3
 462   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
 463   %tmp10 = load i8, i8* %tmp9, align 1
 464   %tmp11 = zext i8 %tmp10 to i32
 465   %tmp12 = shl nuw nsw i32 %tmp11, 8
 466   %tmp13 = or i32 %tmp8, %tmp12
 467   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
 468   %tmp15 = load i8, i8* %tmp14, align 1
 469   %tmp16 = zext i8 %tmp15 to i32
 470   %tmp17 = or i32 %tmp13, %tmp16
 471   ret i32 %tmp17
 472 }
 473
 474 ; There is a store in between individual loads
 475 ; i8* p, q;
 476 ; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
 477 ; *q = 0;
 478 ; res2 = ((i32) p[2] << 8) | (i32) p[3]
 479 ; res1 | res2
 480 define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
 481 ; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
 482 ; CHECK:       # %bb.0:
 483 ; CHECK-NEXT:    pushl %esi
 484 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 485 ; CHECK-NEXT:    .cfi_offset %esi, -8
 486 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 487 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 488 ; CHECK-NEXT:    movzbl (%ecx), %edx
 489 ; CHECK-NEXT:    shll $24, %edx
 490 ; CHECK-NEXT:    movzbl 1(%ecx), %esi
 491 ; CHECK-NEXT:    movl $0, (%eax)
 492 ; CHECK-NEXT:    shll $16, %esi
 493 ; CHECK-NEXT:    orl %edx, %esi
 494 ; CHECK-NEXT:    movzbl 2(%ecx), %edx
 495 ; CHECK-NEXT:    shll $8, %edx
 496 ; CHECK-NEXT:    orl %esi, %edx
 497 ; CHECK-NEXT:    movzbl 3(%ecx), %eax
 498 ; CHECK-NEXT:    orl %edx, %eax
 499 ; CHECK-NEXT:    popl %esi
 500 ; CHECK-NEXT:    .cfi_def_cfa_offset 4
 501 ; CHECK-NEXT:    retl
 502 ;
 503 ; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
 504 ; CHECK64:       # %bb.0:
 505 ; CHECK64-NEXT:    movzbl (%rdi), %eax
 506 ; CHECK64-NEXT:    shll $24, %eax
 507 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
 508 ; CHECK64-NEXT:    movl $0, (%rsi)
 509 ; CHECK64-NEXT:    shll $16, %ecx
 510 ; CHECK64-NEXT:    orl %eax, %ecx
 511 ; CHECK64-NEXT:    movzbl 2(%rdi), %edx
 512 ; CHECK64-NEXT:    shll $8, %edx
 513 ; CHECK64-NEXT:    orl %ecx, %edx
 514 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
 515 ; CHECK64-NEXT:    orl %edx, %eax
 516 ; CHECK64-NEXT:    retq
 517   %tmp = bitcast i32* %arg to i8*
 518   %tmp2 = load i8, i8* %tmp, align 1
 519   %tmp3 = zext i8 %tmp2 to i32
 520   %tmp4 = shl nuw nsw i32 %tmp3, 24
 521   %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
 522   %tmp6 = load i8, i8* %tmp5, align 1
 523   ; This store will prevent folding of the pattern
 524   store i32 0, i32* %arg1
 525   %tmp7 = zext i8 %tmp6 to i32
 526   %tmp8 = shl nuw nsw i32 %tmp7, 16
 527   %tmp9 = or i32 %tmp8, %tmp4
 528   %tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2
 529   %tmp11 = load i8, i8* %tmp10, align 1
 530   %tmp12 = zext i8 %tmp11 to i32
 531   %tmp13 = shl nuw nsw i32 %tmp12, 8
 532   %tmp14 = or i32 %tmp9, %tmp13
 533   %tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3
 534   %tmp16 = load i8, i8* %tmp15, align 1
 535   %tmp17 = zext i8 %tmp16 to i32
 536   %tmp18 = or i32 %tmp14, %tmp17
 537   ret i32 %tmp18
 538 }
 539
 540 ; One of the loads is from an unrelated location
 541 ; i8* p, q;
 542 ; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
 543 define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
 544 ; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
 545 ; CHECK:       # %bb.0:
 546 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 547 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 548 ; CHECK-NEXT:    movzbl (%ecx), %edx
 549 ; CHECK-NEXT:    shll $24, %edx
 550 ; CHECK-NEXT:    movzbl 1(%eax), %eax
 551 ; CHECK-NEXT:    shll $16, %eax
 552 ; CHECK-NEXT:    orl %edx, %eax
 553 ; CHECK-NEXT:    movzbl 2(%ecx), %edx
 554 ; CHECK-NEXT:    shll $8, %edx
 555 ; CHECK-NEXT:    orl %eax, %edx
 556 ; CHECK-NEXT:    movzbl 3(%ecx), %eax
 557 ; CHECK-NEXT:    orl %edx, %eax
 558 ; CHECK-NEXT:    retl
 559 ;
 560 ; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
 561 ; CHECK64:       # %bb.0:
 562 ; CHECK64-NEXT:    movzbl (%rdi), %eax
 563 ; CHECK64-NEXT:    shll $24, %eax
 564 ; CHECK64-NEXT:    movzbl 1(%rsi), %ecx
 565 ; CHECK64-NEXT:    shll $16, %ecx
 566 ; CHECK64-NEXT:    orl %eax, %ecx
 567 ; CHECK64-NEXT:    movzbl 2(%rdi), %edx
 568 ; CHECK64-NEXT:    shll $8, %edx
 569 ; CHECK64-NEXT:    orl %ecx, %edx
 570 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
 571 ; CHECK64-NEXT:    orl %edx, %eax
 572 ; CHECK64-NEXT:    retq
 573   %tmp = bitcast i32* %arg to i8*
 574   %tmp2 = bitcast i32* %arg1 to i8*
 575   %tmp3 = load i8, i8* %tmp, align 1
 576   %tmp4 = zext i8 %tmp3 to i32
 577   %tmp5 = shl nuw nsw i32 %tmp4, 24
 578   ; Load from an unrelated address
 579   %tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1
 580   %tmp7 = load i8, i8* %tmp6, align 1
 581   %tmp8 = zext i8 %tmp7 to i32
 582   %tmp9 = shl nuw nsw i32 %tmp8, 16
 583   %tmp10 = or i32 %tmp9, %tmp5
 584   %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2
 585   %tmp12 = load i8, i8* %tmp11, align 1
 586   %tmp13 = zext i8 %tmp12 to i32
 587   %tmp14 = shl nuw nsw i32 %tmp13, 8
 588   %tmp15 = or i32 %tmp10, %tmp14
 589   %tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3
 590   %tmp17 = load i8, i8* %tmp16, align 1
 591   %tmp18 = zext i8 %tmp17 to i32
 592   %tmp19 = or i32 %tmp15, %tmp18
 593   ret i32 %tmp19
 594 }
 595
 596 ; i8* p;
 597 ; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
 598 define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
 599 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
 600 ; CHECK:       # %bb.0:
 601 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 602 ; CHECK-NEXT:    movl 1(%eax), %eax
 603 ; CHECK-NEXT:    retl
 604 ;
 605 ; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
 606 ; CHECK64:       # %bb.0:
 607 ; CHECK64-NEXT:    movl 1(%rdi), %eax
 608 ; CHECK64-NEXT:    retq
 609   %tmp = bitcast i32* %arg to i8*
 610   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
 611   %tmp2 = load i8, i8* %tmp1, align 1
 612   %tmp3 = zext i8 %tmp2 to i32
 613   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
 614   %tmp5 = load i8, i8* %tmp4, align 1
 615   %tmp6 = zext i8 %tmp5 to i32
 616   %tmp7 = shl nuw nsw i32 %tmp6, 8
 617   %tmp8 = or i32 %tmp7, %tmp3
 618   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
 619   %tmp10 = load i8, i8* %tmp9, align 1
 620   %tmp11 = zext i8 %tmp10 to i32
 621   %tmp12 = shl nuw nsw i32 %tmp11, 16
 622   %tmp13 = or i32 %tmp8, %tmp12
 623   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
 624   %tmp15 = load i8, i8* %tmp14, align 1
 625   %tmp16 = zext i8 %tmp15 to i32
 626   %tmp17 = shl nuw nsw i32 %tmp16, 24
 627   %tmp18 = or i32 %tmp13, %tmp17
 628   ret i32 %tmp18
 629 }
 630
 631 ; i8* p;
 632 ; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
 633 define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
 634 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
 635 ; CHECK:       # %bb.0:
 636 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 637 ; CHECK-NEXT:    movl -4(%eax), %eax
 638 ; CHECK-NEXT:    retl
 639 ;
 640 ; CHECK64-LABEL: load_i32_by_i8_neg_offset:
 641 ; CHECK64:       # %bb.0:
 642 ; CHECK64-NEXT:    movl -4(%rdi), %eax
 643 ; CHECK64-NEXT:    retq
 644   %tmp = bitcast i32* %arg to i8*
 645   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
 646   %tmp2 = load i8, i8* %tmp1, align 1
 647   %tmp3 = zext i8 %tmp2 to i32
 648   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
 649   %tmp5 = load i8, i8* %tmp4, align 1
 650   %tmp6 = zext i8 %tmp5 to i32
 651   %tmp7 = shl nuw nsw i32 %tmp6, 8
 652   %tmp8 = or i32 %tmp7, %tmp3
 653   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
 654   %tmp10 = load i8, i8* %tmp9, align 1
 655   %tmp11 = zext i8 %tmp10 to i32
 656   %tmp12 = shl nuw nsw i32 %tmp11, 16
 657   %tmp13 = or i32 %tmp8, %tmp12
 658   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
 659   %tmp15 = load i8, i8* %tmp14, align 1
 660   %tmp16 = zext i8 %tmp15 to i32
 661   %tmp17 = shl nuw nsw i32 %tmp16, 24
 662   %tmp18 = or i32 %tmp13, %tmp17
 663   ret i32 %tmp18
 664 }
 665
 666 ; i8* p;
 667 ; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
 668 define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
 669 ; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 670 ; BSWAP:       # %bb.0:
 671 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 672 ; BSWAP-NEXT:    movl 1(%eax), %eax
 673 ; BSWAP-NEXT:    bswapl %eax
 674 ; BSWAP-NEXT:    retl
 675 ;
 676 ; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 677 ; MOVBE:       # %bb.0:
 678 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 679 ; MOVBE-NEXT:    movbel 1(%eax), %eax
 680 ; MOVBE-NEXT:    retl
 681 ;
 682 ; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 683 ; BSWAP64:       # %bb.0:
 684 ; BSWAP64-NEXT:    movl 1(%rdi), %eax
 685 ; BSWAP64-NEXT:    bswapl %eax
 686 ; BSWAP64-NEXT:    retq
 687 ;
 688 ; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 689 ; MOVBE64:       # %bb.0:
 690 ; MOVBE64-NEXT:    movbel 1(%rdi), %eax
 691 ; MOVBE64-NEXT:    retq
 692   %tmp = bitcast i32* %arg to i8*
 693   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
 694   %tmp2 = load i8, i8* %tmp1, align 1
 695   %tmp3 = zext i8 %tmp2 to i32
 696   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
 697   %tmp5 = load i8, i8* %tmp4, align 1
 698   %tmp6 = zext i8 %tmp5 to i32
 699   %tmp7 = shl nuw nsw i32 %tmp6, 8
 700   %tmp8 = or i32 %tmp7, %tmp3
 701   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
 702   %tmp10 = load i8, i8* %tmp9, align 1
 703   %tmp11 = zext i8 %tmp10 to i32
 704   %tmp12 = shl nuw nsw i32 %tmp11, 16
 705   %tmp13 = or i32 %tmp8, %tmp12
 706   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
 707   %tmp15 = load i8, i8* %tmp14, align 1
 708   %tmp16 = zext i8 %tmp15 to i32
 709   %tmp17 = shl nuw nsw i32 %tmp16, 24
 710   %tmp18 = or i32 %tmp13, %tmp17
 711   ret i32 %tmp18
 712 }
 713
 714 ; i8* p;
 715 ; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
 716 define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
 717 ; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap:
 718 ; BSWAP:       # %bb.0:
 719 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 720 ; BSWAP-NEXT:    movl -4(%eax), %eax
 721 ; BSWAP-NEXT:    bswapl %eax
 722 ; BSWAP-NEXT:    retl
 723 ;
 724 ; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap:
 725 ; MOVBE:       # %bb.0:
 726 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 727 ; MOVBE-NEXT:    movbel -4(%eax), %eax
 728 ; MOVBE-NEXT:    retl
 729 ;
 730 ; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap:
 731 ; BSWAP64:       # %bb.0:
 732 ; BSWAP64-NEXT:    movl -4(%rdi), %eax
 733 ; BSWAP64-NEXT:    bswapl %eax
 734 ; BSWAP64-NEXT:    retq
 735 ;
 736 ; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap:
 737 ; MOVBE64:       # %bb.0:
 738 ; MOVBE64-NEXT:    movbel -4(%rdi), %eax
 739 ; MOVBE64-NEXT:    retq
 740   %tmp = bitcast i32* %arg to i8*
 741   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
 742   %tmp2 = load i8, i8* %tmp1, align 1
 743   %tmp3 = zext i8 %tmp2 to i32
 744   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
 745   %tmp5 = load i8, i8* %tmp4, align 1
 746   %tmp6 = zext i8 %tmp5 to i32
 747   %tmp7 = shl nuw nsw i32 %tmp6, 8
 748   %tmp8 = or i32 %tmp7, %tmp3
 749   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
 750   %tmp10 = load i8, i8* %tmp9, align 1
 751   %tmp11 = zext i8 %tmp10 to i32
 752   %tmp12 = shl nuw nsw i32 %tmp11, 16
 753   %tmp13 = or i32 %tmp8, %tmp12
 754   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
 755   %tmp15 = load i8, i8* %tmp14, align 1
 756   %tmp16 = zext i8 %tmp15 to i32
 757   %tmp17 = shl nuw nsw i32 %tmp16, 24
 758   %tmp18 = or i32 %tmp13, %tmp17
 759   ret i32 %tmp18
 760 }
 761
 762 ; i8* p; i32 i;
 763 ; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
 764 define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {
 765 ; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset:
 766 ; BSWAP:       # %bb.0:
 767 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 768 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 769 ; BSWAP-NEXT:    movl (%ecx,%eax), %eax
 770 ; BSWAP-NEXT:    bswapl %eax
 771 ; BSWAP-NEXT:    retl
 772 ;
 773 ; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset:
 774 ; MOVBE:       # %bb.0:
 775 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 776 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 777 ; MOVBE-NEXT:    movbel (%ecx,%eax), %eax
 778 ; MOVBE-NEXT:    retl
 779 ;
 780 ; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset:
 781 ; BSWAP64:       # %bb.0:
 782 ; BSWAP64-NEXT:    movslq %esi, %rax
 783 ; BSWAP64-NEXT:    movl (%rdi,%rax), %eax
 784 ; BSWAP64-NEXT:    bswapl %eax
 785 ; BSWAP64-NEXT:    retq
 786 ;
 787 ; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset:
 788 ; MOVBE64:       # %bb.0:
 789 ; MOVBE64-NEXT:    movslq %esi, %rax
 790 ; MOVBE64-NEXT:    movbel (%rdi,%rax), %eax
 791 ; MOVBE64-NEXT:    retq
 792   %tmp = bitcast i32* %arg to i8*
 793   %tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1
 794   %tmp3 = load i8, i8* %tmp2, align 1
 795   %tmp4 = zext i8 %tmp3 to i32
 796   %tmp5 = shl nuw nsw i32 %tmp4, 24
 797   %tmp6 = add nuw nsw i32 %arg1, 1
 798   %tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6
 799   %tmp8 = load i8, i8* %tmp7, align 1
 800   %tmp9 = zext i8 %tmp8 to i32
 801   %tmp10 = shl nuw nsw i32 %tmp9, 16
 802   %tmp11 = or i32 %tmp10, %tmp5
 803   %tmp12 = add nuw nsw i32 %arg1, 2
 804   %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12
 805   %tmp14 = load i8, i8* %tmp13, align 1
 806   %tmp15 = zext i8 %tmp14 to i32
 807   %tmp16 = shl nuw nsw i32 %tmp15, 8
 808   %tmp17 = or i32 %tmp11, %tmp16
 809   %tmp18 = add nuw nsw i32 %arg1, 3
 810   %tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18
 811   %tmp20 = load i8, i8* %tmp19, align 1
 812   %tmp21 = zext i8 %tmp20 to i32
 813   %tmp22 = or i32 %tmp17, %tmp21
 814   ret i32 %tmp22
 815 }
 816
 817 ; Verify that we don't crash handling shl i32 %conv57, 32
 818 define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) {
 819 ; CHECK-LABEL: shift_i32_by_32:
 820 ; CHECK:       # %bb.0: # %entry
 821 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 822 ; CHECK-NEXT:    movl $-1, 4(%eax)
 823 ; CHECK-NEXT:    movl $-1, (%eax)
 824 ; CHECK-NEXT:    retl
 825 ;
 826 ; CHECK64-LABEL: shift_i32_by_32:
 827 ; CHECK64:       # %bb.0: # %entry
 828 ; CHECK64-NEXT:    movq $-1, (%rdx)
 829 ; CHECK64-NEXT:    retq
 830 entry:
 831   %load1 = load i8, i8* %src1, align 1
 832   %conv46 = zext i8 %load1 to i32
 833   %shl47 = shl i32 %conv46, 56
 834   %or55 = or i32 %shl47, 0
 835   %load2 = load i8, i8* %src2, align 1
 836   %conv57 = zext i8 %load2 to i32
 837   %shl58 = shl i32 %conv57, 32
 838   %or59 = or i32 %or55, %shl58
 839   %or74 = or i32 %or59, 0
 840   %conv75 = sext i32 %or74 to i64
 841   store i64 %conv75, i64* %dst, align 8
 842   ret void
 843 }
 844
 845 declare i16 @llvm.bswap.i16(i16)
 846
 847 ; i16* p;
 848 ; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
 849 define i32 @load_i32_by_bswap_i16(i32* %arg) {
 850 ; BSWAP-LABEL: load_i32_by_bswap_i16:
 851 ; BSWAP:       # %bb.0:
 852 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 853 ; BSWAP-NEXT:    movl (%eax), %eax
 854 ; BSWAP-NEXT:    bswapl %eax
 855 ; BSWAP-NEXT:    retl
 856 ;
 857 ; MOVBE-LABEL: load_i32_by_bswap_i16:
 858 ; MOVBE:       # %bb.0:
 859 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 860 ; MOVBE-NEXT:    movbel (%eax), %eax
 861 ; MOVBE-NEXT:    retl
 862 ;
 863 ; BSWAP64-LABEL: load_i32_by_bswap_i16:
 864 ; BSWAP64:       # %bb.0:
 865 ; BSWAP64-NEXT:    movl (%rdi), %eax
 866 ; BSWAP64-NEXT:    bswapl %eax
 867 ; BSWAP64-NEXT:    retq
 868 ;
 869 ; MOVBE64-LABEL: load_i32_by_bswap_i16:
 870 ; MOVBE64:       # %bb.0:
 871 ; MOVBE64-NEXT:    movbel (%rdi), %eax
 872 ; MOVBE64-NEXT:    retq
 873   %tmp = bitcast i32* %arg to i16*
 874   %tmp1 = load i16, i16* %tmp, align 4
 875   %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
 876   %tmp2 = zext i16 %tmp11 to i32
 877   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
 878   %tmp4 = load i16, i16* %tmp3, align 1
 879   %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
 880   %tmp5 = zext i16 %tmp41 to i32
 881   %tmp6 = shl nuw nsw i32 %tmp2, 16
 882   %tmp7 = or i32 %tmp6, %tmp5
 883   ret i32 %tmp7
 884 }
 885
 886 ; i16* p;
 887 ; (i32) p[0] | (sext(p[1] << 16) to i32)
 888 define i32 @load_i32_by_sext_i16(i32* %arg) {
 889 ; CHECK-LABEL: load_i32_by_sext_i16:
 890 ; CHECK:       # %bb.0:
 891 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 892 ; CHECK-NEXT:    movl (%eax), %eax
 893 ; CHECK-NEXT:    retl
 894 ;
 895 ; CHECK64-LABEL: load_i32_by_sext_i16:
 896 ; CHECK64:       # %bb.0:
 897 ; CHECK64-NEXT:    movl (%rdi), %eax
 898 ; CHECK64-NEXT:    retq
 899   %tmp = bitcast i32* %arg to i16*
 900   %tmp1 = load i16, i16* %tmp, align 1
 901   %tmp2 = zext i16 %tmp1 to i32
 902   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
 903   %tmp4 = load i16, i16* %tmp3, align 1
 904   %tmp5 = sext i16 %tmp4 to i32
 905   %tmp6 = shl nuw nsw i32 %tmp5, 16
 906   %tmp7 = or i32 %tmp6, %tmp2
 907   ret i32 %tmp7
 908 }
 909
 910 ; i8* arg; i32 i;
 911 ; p = arg + 12;
 912 ; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
 913 define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
 914 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
 915 ; CHECK:       # %bb.0:
 916 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 917 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 918 ; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
 919 ; CHECK-NEXT:    retl
 920 ;
 921 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
 922 ; CHECK64:       # %bb.0:
 923 ; CHECK64-NEXT:    movl %esi, %eax
 924 ; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
 925 ; CHECK64-NEXT:    retq
 926   %tmp = add nuw nsw i32 %i, 3
 927   %tmp2 = add nuw nsw i32 %i, 2
 928   %tmp3 = add nuw nsw i32 %i, 1
 929   %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
 930   %tmp5 = zext i32 %i to i64
 931   %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
 932   %tmp7 = load i8, i8* %tmp6, align 1
 933   %tmp8 = zext i8 %tmp7 to i32
 934   %tmp9 = zext i32 %tmp3 to i64
 935   %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
 936   %tmp11 = load i8, i8* %tmp10, align 1
 937   %tmp12 = zext i8 %tmp11 to i32
 938   %tmp13 = shl nuw nsw i32 %tmp12, 8
 939   %tmp14 = or i32 %tmp13, %tmp8
 940   %tmp15 = zext i32 %tmp2 to i64
 941   %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
 942   %tmp17 = load i8, i8* %tmp16, align 1
 943   %tmp18 = zext i8 %tmp17 to i32
 944   %tmp19 = shl nuw nsw i32 %tmp18, 16
 945   %tmp20 = or i32 %tmp14, %tmp19
 946   %tmp21 = zext i32 %tmp to i64
 947   %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
 948   %tmp23 = load i8, i8* %tmp22, align 1
 949   %tmp24 = zext i8 %tmp23 to i32
 950   %tmp25 = shl nuw i32 %tmp24, 24
 951   %tmp26 = or i32 %tmp20, %tmp25
 952   ret i32 %tmp26
 953 }
 954
 955 ; i8* arg; i32 i;
 956 ; p = arg + 12;
 957 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
 958 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
 959 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
 960 ; CHECK:       # %bb.0:
 961 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 962 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 963 ; CHECK-NEXT:    movl 13(%eax,%ecx), %eax
 964 ; CHECK-NEXT:    retl
 965 ;
 966 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
 967 ; CHECK64:       # %bb.0:
 968 ; CHECK64-NEXT:    movl %esi, %eax
 969 ; CHECK64-NEXT:    movl 13(%rax,%rdi), %eax
 970 ; CHECK64-NEXT:    retq
 971   %tmp = add nuw nsw i32 %i, 4
 972   %tmp2 = add nuw nsw i32 %i, 3
 973   %tmp3 = add nuw nsw i32 %i, 2
 974   %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
 975   %tmp5 = add nuw nsw i32 %i, 1
 976   %tmp27 = zext i32 %tmp5 to i64
 977   %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
 978   %tmp29 = load i8, i8* %tmp28, align 1
 979   %tmp30 = zext i8 %tmp29 to i32
 980   %tmp31 = zext i32 %tmp3 to i64
 981   %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
 982   %tmp33 = load i8, i8* %tmp32, align 1
 983   %tmp34 = zext i8 %tmp33 to i32
 984   %tmp35 = shl nuw nsw i32 %tmp34, 8
 985   %tmp36 = or i32 %tmp35, %tmp30
 986   %tmp37 = zext i32 %tmp2 to i64
 987   %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
 988   %tmp39 = load i8, i8* %tmp38, align 1
 989   %tmp40 = zext i8 %tmp39 to i32
 990   %tmp41 = shl nuw nsw i32 %tmp40, 16
 991   %tmp42 = or i32 %tmp36, %tmp41
 992   %tmp43 = zext i32 %tmp to i64
 993   %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
 994   %tmp45 = load i8, i8* %tmp44, align 1
 995   %tmp46 = zext i8 %tmp45 to i32
 996   %tmp47 = shl nuw i32 %tmp46, 24
 997   %tmp48 = or i32 %tmp42, %tmp47
 998   ret i32 %tmp48
 999 }
1000
1001 ; i8* arg; i32 i;
1002 ;
1003 ; p0 = arg;
1004 ; p1 = arg + i + 1;
1005 ; p2 = arg + i + 2;
1006 ; p3 = arg + i + 3;
1007 ;
1008 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1009 ;
1010 ; This test excercises zero and any extend loads as a part of load combine pattern.
1011 ; In order to fold the pattern above we need to reassociate the address computation
1012 ; first. By the time the address computation is reassociated loads are combined to
1013 ; to zext and aext loads.
1014 define i32 @load_i32_by_i8_zaext_loads(i8* %arg, i32 %arg1) {
1015 ; CHECK-LABEL: load_i32_by_i8_zaext_loads:
1016 ; CHECK:       # %bb.0:
1017 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1018 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1019 ; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
1020 ; CHECK-NEXT:    retl
1021 ;
1022 ; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
1023 ; CHECK64:       # %bb.0:
1024 ; CHECK64-NEXT:    movl %esi, %eax
1025 ; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
1026 ; CHECK64-NEXT:    retq
1027   %tmp = add nuw nsw i32 %arg1, 3
1028   %tmp2 = add nuw nsw i32 %arg1, 2
1029   %tmp3 = add nuw nsw i32 %arg1, 1
1030   %tmp4 = zext i32 %tmp to i64
1031   %tmp5 = zext i32 %tmp2 to i64
1032   %tmp6 = zext i32 %tmp3 to i64
1033   %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1034   %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1035   %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1036   %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1037   %tmp33 = zext i32 %arg1 to i64
1038   %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1039   %tmp35 = load i8, i8* %tmp34, align 1
1040   %tmp36 = zext i8 %tmp35 to i32
1041   %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1042   %tmp38 = load i8, i8* %tmp37, align 1
1043   %tmp39 = zext i8 %tmp38 to i32
1044   %tmp40 = shl nuw nsw i32 %tmp39, 8
1045   %tmp41 = or i32 %tmp40, %tmp36
1046   %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1047   %tmp43 = load i8, i8* %tmp42, align 1
1048   %tmp44 = zext i8 %tmp43 to i32
1049   %tmp45 = shl nuw nsw i32 %tmp44, 16
1050   %tmp46 = or i32 %tmp41, %tmp45
1051   %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1052   %tmp48 = load i8, i8* %tmp47, align 1
1053   %tmp49 = zext i8 %tmp48 to i32
1054   %tmp50 = shl nuw i32 %tmp49, 24
1055   %tmp51 = or i32 %tmp46, %tmp50
1056   ret i32 %tmp51
1057 }
1058
1059 ; The same as load_i32_by_i8_zaext_loads but the last load is combined to
1060 ; a sext load.
1061 ;
1062 ; i8* arg; i32 i;
1063 ;
1064 ; p0 = arg;
1065 ; p1 = arg + i + 1;
1066 ; p2 = arg + i + 2;
1067 ; p3 = arg + i + 3;
1068 ;
1069 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1070 define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) {
1071 ; CHECK-LABEL: load_i32_by_i8_zsext_loads:
1072 ; CHECK:       # %bb.0:
1073 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1074 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1075 ; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
1076 ; CHECK-NEXT:    retl
1077 ;
1078 ; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
1079 ; CHECK64:       # %bb.0:
1080 ; CHECK64-NEXT:    movl %esi, %eax
1081 ; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
1082 ; CHECK64-NEXT:    retq
1083   %tmp = add nuw nsw i32 %arg1, 3
1084   %tmp2 = add nuw nsw i32 %arg1, 2
1085   %tmp3 = add nuw nsw i32 %arg1, 1
1086   %tmp4 = zext i32 %tmp to i64
1087   %tmp5 = zext i32 %tmp2 to i64
1088   %tmp6 = zext i32 %tmp3 to i64
1089   %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1090   %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1091   %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1092   %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1093   %tmp33 = zext i32 %arg1 to i64
1094   %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1095   %tmp35 = load i8, i8* %tmp34, align 1
1096   %tmp36 = zext i8 %tmp35 to i32
1097   %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1098   %tmp38 = load i8, i8* %tmp37, align 1
1099   %tmp39 = zext i8 %tmp38 to i32
1100   %tmp40 = shl nuw nsw i32 %tmp39, 8
1101   %tmp41 = or i32 %tmp40, %tmp36
1102   %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1103   %tmp43 = load i8, i8* %tmp42, align 1
1104   %tmp44 = zext i8 %tmp43 to i32
1105   %tmp45 = shl nuw nsw i32 %tmp44, 16
1106   %tmp46 = or i32 %tmp41, %tmp45
1107   %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1108   %tmp48 = load i8, i8* %tmp47, align 1
1109   %tmp49 = sext i8 %tmp48 to i16
1110   %tmp50 = zext i16 %tmp49 to i32
1111   %tmp51 = shl nuw i32 %tmp50, 24
1112   %tmp52 = or i32 %tmp46, %tmp51
1113   ret i32 %tmp52
1114 }
1115
1116 ; i8* p;
1117 ; (i32) p[0] | ((i32) p[1] << 8)
1118 define i32 @zext_load_i32_by_i8(i32* %arg) {
1119 ; CHECK-LABEL: zext_load_i32_by_i8:
1120 ; CHECK:       # %bb.0:
1121 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1122 ; CHECK-NEXT:    movzbl (%eax), %ecx
1123 ; CHECK-NEXT:    movzbl 1(%eax), %eax
1124 ; CHECK-NEXT:    shll $8, %eax
1125 ; CHECK-NEXT:    orl %ecx, %eax
1126 ; CHECK-NEXT:    retl
1127 ;
1128 ; CHECK64-LABEL: zext_load_i32_by_i8:
1129 ; CHECK64:       # %bb.0:
1130 ; CHECK64-NEXT:    movzbl (%rdi), %ecx
1131 ; CHECK64-NEXT:    movzbl 1(%rdi), %eax
1132 ; CHECK64-NEXT:    shll $8, %eax
1133 ; CHECK64-NEXT:    orl %ecx, %eax
1134 ; CHECK64-NEXT:    retq
1135   %tmp = bitcast i32* %arg to i8*
1136   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1137   %tmp2 = load i8, i8* %tmp1, align 1
1138   %tmp3 = zext i8 %tmp2 to i32
1139   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1140   %tmp5 = load i8, i8* %tmp4, align 1
1141   %tmp6 = zext i8 %tmp5 to i32
1142   %tmp7 = shl nuw nsw i32 %tmp6, 8
1143   %tmp8 = or i32 %tmp7, %tmp3
1144   ret i32 %tmp8
1145 }
1146
1147 ; i8* p;
1148 ; ((i32) p[0] << 8) | ((i32) p[1] << 16)
1149 define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
1150 ; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
1151 ; CHECK:       # %bb.0:
1152 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1153 ; CHECK-NEXT:    movzbl (%eax), %ecx
1154 ; CHECK-NEXT:    shll $8, %ecx
1155 ; CHECK-NEXT:    movzbl 1(%eax), %eax
1156 ; CHECK-NEXT:    shll $16, %eax
1157 ; CHECK-NEXT:    orl %ecx, %eax
1158 ; CHECK-NEXT:    retl
1159 ;
1160 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_8:
1161 ; CHECK64:       # %bb.0:
1162 ; CHECK64-NEXT:    movzbl (%rdi), %ecx
1163 ; CHECK64-NEXT:    shll $8, %ecx
1164 ; CHECK64-NEXT:    movzbl 1(%rdi), %eax
1165 ; CHECK64-NEXT:    shll $16, %eax
1166 ; CHECK64-NEXT:    orl %ecx, %eax
1167 ; CHECK64-NEXT:    retq
1168   %tmp = bitcast i32* %arg to i8*
1169   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1170   %tmp2 = load i8, i8* %tmp1, align 1
1171   %tmp3 = zext i8 %tmp2 to i32
1172   %tmp30 = shl nuw nsw i32 %tmp3, 8
1173   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1174   %tmp5 = load i8, i8* %tmp4, align 1
1175   %tmp6 = zext i8 %tmp5 to i32
1176   %tmp7 = shl nuw nsw i32 %tmp6, 16
1177   %tmp8 = or i32 %tmp7, %tmp30
1178   ret i32 %tmp8
1179 }
1180
1181 ; i8* p;
1182 ; ((i32) p[0] << 16) | ((i32) p[1] << 24)
1183 define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
1184 ; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
1185 ; CHECK:       # %bb.0:
1186 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1187 ; CHECK-NEXT:    movzbl (%eax), %ecx
1188 ; CHECK-NEXT:    shll $16, %ecx
1189 ; CHECK-NEXT:    movzbl 1(%eax), %eax
1190 ; CHECK-NEXT:    shll $24, %eax
1191 ; CHECK-NEXT:    orl %ecx, %eax
1192 ; CHECK-NEXT:    retl
1193 ;
1194 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_16:
1195 ; CHECK64:       # %bb.0:
1196 ; CHECK64-NEXT:    movzbl (%rdi), %ecx
1197 ; CHECK64-NEXT:    shll $16, %ecx
1198 ; CHECK64-NEXT:    movzbl 1(%rdi), %eax
1199 ; CHECK64-NEXT:    shll $24, %eax
1200 ; CHECK64-NEXT:    orl %ecx, %eax
1201 ; CHECK64-NEXT:    retq
1202   %tmp = bitcast i32* %arg to i8*
1203   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1204   %tmp2 = load i8, i8* %tmp1, align 1
1205   %tmp3 = zext i8 %tmp2 to i32
1206   %tmp30 = shl nuw nsw i32 %tmp3, 16
1207   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1208   %tmp5 = load i8, i8* %tmp4, align 1
1209   %tmp6 = zext i8 %tmp5 to i32
1210   %tmp7 = shl nuw nsw i32 %tmp6, 24
1211   %tmp8 = or i32 %tmp7, %tmp30
1212   ret i32 %tmp8
1213 }
1214
1215 ; i8* p;
1216 ; (i32) p[1] | ((i32) p[0] << 8)
1217 define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
1218 ; CHECK-LABEL: zext_load_i32_by_i8_bswap:
1219 ; CHECK:       # %bb.0:
1220 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1221 ; CHECK-NEXT:    movzbl 1(%eax), %ecx
1222 ; CHECK-NEXT:    movzbl (%eax), %eax
1223 ; CHECK-NEXT:    shll $8, %eax
1224 ; CHECK-NEXT:    orl %ecx, %eax
1225 ; CHECK-NEXT:    retl
1226 ;
1227 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap:
1228 ; CHECK64:       # %bb.0:
1229 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
1230 ; CHECK64-NEXT:    movzbl (%rdi), %eax
1231 ; CHECK64-NEXT:    shll $8, %eax
1232 ; CHECK64-NEXT:    orl %ecx, %eax
1233 ; CHECK64-NEXT:    retq
1234   %tmp = bitcast i32* %arg to i8*
1235   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1236   %tmp2 = load i8, i8* %tmp1, align 1
1237   %tmp3 = zext i8 %tmp2 to i32
1238   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1239   %tmp5 = load i8, i8* %tmp4, align 1
1240   %tmp6 = zext i8 %tmp5 to i32
1241   %tmp7 = shl nuw nsw i32 %tmp6, 8
1242   %tmp8 = or i32 %tmp7, %tmp3
1243   ret i32 %tmp8
1244 }
1245
1246 ; i8* p;
1247 ; ((i32) p[1] << 8) | ((i32) p[0] << 16)
1248 define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
1249 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1250 ; CHECK:       # %bb.0:
1251 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1252 ; CHECK-NEXT:    movzbl 1(%eax), %ecx
1253 ; CHECK-NEXT:    shll $8, %ecx
1254 ; CHECK-NEXT:    movzbl (%eax), %eax
1255 ; CHECK-NEXT:    shll $16, %eax
1256 ; CHECK-NEXT:    orl %ecx, %eax
1257 ; CHECK-NEXT:    retl
1258 ;
1259 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1260 ; CHECK64:       # %bb.0:
1261 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
1262 ; CHECK64-NEXT:    shll $8, %ecx
1263 ; CHECK64-NEXT:    movzbl (%rdi), %eax
1264 ; CHECK64-NEXT:    shll $16, %eax
1265 ; CHECK64-NEXT:    orl %ecx, %eax
1266 ; CHECK64-NEXT:    retq
1267   %tmp = bitcast i32* %arg to i8*
1268   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1269   %tmp2 = load i8, i8* %tmp1, align 1
1270   %tmp3 = zext i8 %tmp2 to i32
1271   %tmp30 = shl nuw nsw i32 %tmp3, 8
1272   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1273   %tmp5 = load i8, i8* %tmp4, align 1
1274   %tmp6 = zext i8 %tmp5 to i32
1275   %tmp7 = shl nuw nsw i32 %tmp6, 16
1276   %tmp8 = or i32 %tmp7, %tmp30
1277   ret i32 %tmp8
1278 }
1279
1280 ; i8* p;
1281 ; ((i32) p[1] << 16) | ((i32) p[0] << 24)
1282 define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
1283 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1284 ; CHECK:       # %bb.0:
1285 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1286 ; CHECK-NEXT:    movzbl 1(%eax), %ecx
1287 ; CHECK-NEXT:    shll $16, %ecx
1288 ; CHECK-NEXT:    movzbl (%eax), %eax
1289 ; CHECK-NEXT:    shll $24, %eax
1290 ; CHECK-NEXT:    orl %ecx, %eax
1291 ; CHECK-NEXT:    retl
1292 ;
1293 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1294 ; CHECK64:       # %bb.0:
1295 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
1296 ; CHECK64-NEXT:    shll $16, %ecx
1297 ; CHECK64-NEXT:    movzbl (%rdi), %eax
1298 ; CHECK64-NEXT:    shll $24, %eax
1299 ; CHECK64-NEXT:    orl %ecx, %eax
1300 ; CHECK64-NEXT:    retq
1301   %tmp = bitcast i32* %arg to i8*
1302   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1303   %tmp2 = load i8, i8* %tmp1, align 1
1304   %tmp3 = zext i8 %tmp2 to i32
1305   %tmp30 = shl nuw nsw i32 %tmp3, 16
1306   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1307   %tmp5 = load i8, i8* %tmp4, align 1
1308   %tmp6 = zext i8 %tmp5 to i32
1309   %tmp7 = shl nuw nsw i32 %tmp6, 24
1310   %tmp8 = or i32 %tmp7, %tmp30
1311   ret i32 %tmp8
1312 }