llvm/test/CodeGen/X86/load-combine.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP
   3 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE
   4 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64
   5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64
   6
   7 ; ptr p;
   8 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
   9 define i32 @load_i32_by_i8(ptr %arg) {
  10 ; CHECK-LABEL: load_i32_by_i8:
  11 ; CHECK:       # %bb.0:
  12 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
  13 ; CHECK-NEXT:    movl (%eax), %eax
  14 ; CHECK-NEXT:    retl
  15 ;
  16 ; CHECK64-LABEL: load_i32_by_i8:
  17 ; CHECK64:       # %bb.0:
  18 ; CHECK64-NEXT:    movl (%rdi), %eax
  19 ; CHECK64-NEXT:    retq
  20   %tmp1 = load i8, ptr %arg, align 1
  21   %tmp2 = zext i8 %tmp1 to i32
  22   %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
  23   %tmp4 = load i8, ptr %tmp3, align 1
  24   %tmp5 = zext i8 %tmp4 to i32
  25   %tmp6 = shl nuw nsw i32 %tmp5, 8
  26   %tmp7 = or i32 %tmp6, %tmp2
  27   %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
  28   %tmp9 = load i8, ptr %tmp8, align 1
  29   %tmp10 = zext i8 %tmp9 to i32
  30   %tmp11 = shl nuw nsw i32 %tmp10, 16
  31   %tmp12 = or i32 %tmp7, %tmp11
  32   %tmp13 = getelementptr inbounds i8, ptr %arg, i32 3
  33   %tmp14 = load i8, ptr %tmp13, align 1
  34   %tmp15 = zext i8 %tmp14 to i32
  35   %tmp16 = shl nuw nsw i32 %tmp15, 24
  36   %tmp17 = or i32 %tmp12, %tmp16
  37   ret i32 %tmp17
  38 }
  39
  40 ; ptr p;
  41 ; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
  42 define i32 @load_i32_by_i8_bswap(ptr %arg) {
  43 ; BSWAP-LABEL: load_i32_by_i8_bswap:
  44 ; BSWAP:       # %bb.0:
  45 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
  46 ; BSWAP-NEXT:    movl (%eax), %eax
  47 ; BSWAP-NEXT:    bswapl %eax
  48 ; BSWAP-NEXT:    retl
  49 ;
  50 ; MOVBE-LABEL: load_i32_by_i8_bswap:
  51 ; MOVBE:       # %bb.0:
  52 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  53 ; MOVBE-NEXT:    movbel (%eax), %eax
  54 ; MOVBE-NEXT:    retl
  55 ;
  56 ; BSWAP64-LABEL: load_i32_by_i8_bswap:
  57 ; BSWAP64:       # %bb.0:
  58 ; BSWAP64-NEXT:    movl (%rdi), %eax
  59 ; BSWAP64-NEXT:    bswapl %eax
  60 ; BSWAP64-NEXT:    retq
  61 ;
  62 ; MOVBE64-LABEL: load_i32_by_i8_bswap:
  63 ; MOVBE64:       # %bb.0:
  64 ; MOVBE64-NEXT:    movbel (%rdi), %eax
  65 ; MOVBE64-NEXT:    retq
  66   %tmp1 = load i8, ptr %arg, align 1
  67   %tmp2 = zext i8 %tmp1 to i32
  68   %tmp3 = shl nuw nsw i32 %tmp2, 24
  69   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
  70   %tmp5 = load i8, ptr %tmp4, align 1
  71   %tmp6 = zext i8 %tmp5 to i32
  72   %tmp7 = shl nuw nsw i32 %tmp6, 16
  73   %tmp8 = or i32 %tmp7, %tmp3
  74   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
  75   %tmp10 = load i8, ptr %tmp9, align 1
  76   %tmp11 = zext i8 %tmp10 to i32
  77   %tmp12 = shl nuw nsw i32 %tmp11, 8
  78   %tmp13 = or i32 %tmp8, %tmp12
  79   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
  80   %tmp15 = load i8, ptr %tmp14, align 1
  81   %tmp16 = zext i8 %tmp15 to i32
  82   %tmp17 = or i32 %tmp13, %tmp16
  83   ret i32 %tmp17
  84 }
  85
  86 ; ptr p;
  87 ; (i32) p[0] | ((i32) p[1] << 16)
  88 define i32 @load_i32_by_i16(ptr %arg) {
  89 ; CHECK-LABEL: load_i32_by_i16:
  90 ; CHECK:       # %bb.0:
  91 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
  92 ; CHECK-NEXT:    movl (%eax), %eax
  93 ; CHECK-NEXT:    retl
  94 ;
  95 ; CHECK64-LABEL: load_i32_by_i16:
  96 ; CHECK64:       # %bb.0:
  97 ; CHECK64-NEXT:    movl (%rdi), %eax
  98 ; CHECK64-NEXT:    retq
  99   %tmp1 = load i16, ptr %arg, align 1
 100   %tmp2 = zext i16 %tmp1 to i32
 101   %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
 102   %tmp4 = load i16, ptr %tmp3, align 1
 103   %tmp5 = zext i16 %tmp4 to i32
 104   %tmp6 = shl nuw nsw i32 %tmp5, 16
 105   %tmp7 = or i32 %tmp6, %tmp2
 106   ret i32 %tmp7
 107 }
 108
 109 ; ptr p_16;
 110 ; ptr p_8 = (ptr) p_16;
 111 ; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
 112 define i32 @load_i32_by_i16_i8(ptr %arg) {
 113 ; CHECK-LABEL: load_i32_by_i16_i8:
 114 ; CHECK:       # %bb.0:
 115 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 116 ; CHECK-NEXT:    movl (%eax), %eax
 117 ; CHECK-NEXT:    retl
 118 ;
 119 ; CHECK64-LABEL: load_i32_by_i16_i8:
 120 ; CHECK64:       # %bb.0:
 121 ; CHECK64-NEXT:    movl (%rdi), %eax
 122 ; CHECK64-NEXT:    retq
 123   %tmp2 = load i16, ptr %arg, align 1
 124   %tmp3 = zext i16 %tmp2 to i32
 125   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
 126   %tmp5 = load i8, ptr %tmp4, align 1
 127   %tmp6 = zext i8 %tmp5 to i32
 128   %tmp7 = shl nuw nsw i32 %tmp6, 16
 129   %tmp8 = getelementptr inbounds i8, ptr %arg, i32 3
 130   %tmp9 = load i8, ptr %tmp8, align 1
 131   %tmp10 = zext i8 %tmp9 to i32
 132   %tmp11 = shl nuw nsw i32 %tmp10, 24
 133   %tmp12 = or i32 %tmp7, %tmp11
 134   %tmp13 = or i32 %tmp12, %tmp3
 135   ret i32 %tmp13
 136 }
 137
 138
 139 ; ptr p;
 140 ; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
 141 define i32 @load_i32_by_i16_by_i8(ptr %arg) {
 142 ; CHECK-LABEL: load_i32_by_i16_by_i8:
 143 ; CHECK:       # %bb.0:
 144 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 145 ; CHECK-NEXT:    movl (%eax), %eax
 146 ; CHECK-NEXT:    retl
 147 ;
 148 ; CHECK64-LABEL: load_i32_by_i16_by_i8:
 149 ; CHECK64:       # %bb.0:
 150 ; CHECK64-NEXT:    movl (%rdi), %eax
 151 ; CHECK64-NEXT:    retq
 152   %tmp1 = load i8, ptr %arg, align 1
 153   %tmp2 = zext i8 %tmp1 to i16
 154   %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
 155   %tmp4 = load i8, ptr %tmp3, align 1
 156   %tmp5 = zext i8 %tmp4 to i16
 157   %tmp6 = shl nuw nsw i16 %tmp5, 8
 158   %tmp7 = or i16 %tmp6, %tmp2
 159   %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
 160   %tmp9 = load i8, ptr %tmp8, align 1
 161   %tmp10 = zext i8 %tmp9 to i16
 162   %tmp11 = getelementptr inbounds i8, ptr %arg, i32 3
 163   %tmp12 = load i8, ptr %tmp11, align 1
 164   %tmp13 = zext i8 %tmp12 to i16
 165   %tmp14 = shl nuw nsw i16 %tmp13, 8
 166   %tmp15 = or i16 %tmp14, %tmp10
 167   %tmp16 = zext i16 %tmp7 to i32
 168   %tmp17 = zext i16 %tmp15 to i32
 169   %tmp18 = shl nuw nsw i32 %tmp17, 16
 170   %tmp19 = or i32 %tmp18, %tmp16
 171   ret i32 %tmp19
 172 }
 173
 174 ; ptr p;
 175 ; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
 176 define i32 @load_i32_by_i16_by_i8_bswap(ptr %arg) {
 177 ; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap:
 178 ; BSWAP:       # %bb.0:
 179 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 180 ; BSWAP-NEXT:    movl (%eax), %eax
 181 ; BSWAP-NEXT:    bswapl %eax
 182 ; BSWAP-NEXT:    retl
 183 ;
 184 ; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap:
 185 ; MOVBE:       # %bb.0:
 186 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 187 ; MOVBE-NEXT:    movbel (%eax), %eax
 188 ; MOVBE-NEXT:    retl
 189 ;
 190 ; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap:
 191 ; BSWAP64:       # %bb.0:
 192 ; BSWAP64-NEXT:    movl (%rdi), %eax
 193 ; BSWAP64-NEXT:    bswapl %eax
 194 ; BSWAP64-NEXT:    retq
 195 ;
 196 ; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap:
 197 ; MOVBE64:       # %bb.0:
 198 ; MOVBE64-NEXT:    movbel (%rdi), %eax
 199 ; MOVBE64-NEXT:    retq
 200   %tmp1 = load i8, ptr %arg, align 1
 201   %tmp2 = zext i8 %tmp1 to i16
 202   %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
 203   %tmp4 = load i8, ptr %tmp3, align 1
 204   %tmp5 = zext i8 %tmp4 to i16
 205   %tmp6 = shl nuw nsw i16 %tmp2, 8
 206   %tmp7 = or i16 %tmp6, %tmp5
 207   %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
 208   %tmp9 = load i8, ptr %tmp8, align 1
 209   %tmp10 = zext i8 %tmp9 to i16
 210   %tmp11 = getelementptr inbounds i8, ptr %arg, i32 3
 211   %tmp12 = load i8, ptr %tmp11, align 1
 212   %tmp13 = zext i8 %tmp12 to i16
 213   %tmp14 = shl nuw nsw i16 %tmp10, 8
 214   %tmp15 = or i16 %tmp14, %tmp13
 215   %tmp16 = zext i16 %tmp7 to i32
 216   %tmp17 = zext i16 %tmp15 to i32
 217   %tmp18 = shl nuw nsw i32 %tmp16, 16
 218   %tmp19 = or i32 %tmp18, %tmp17
 219   ret i32 %tmp19
 220 }
 221
 222 ; ptr p;
 223 ; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
 224 define i64 @load_i64_by_i8(ptr %arg) {
 225 ; CHECK-LABEL: load_i64_by_i8:
 226 ; CHECK:       # %bb.0:
 227 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 228 ; CHECK-NEXT:    movl (%ecx), %eax
 229 ; CHECK-NEXT:    movl 4(%ecx), %edx
 230 ; CHECK-NEXT:    retl
 231 ;
 232 ; CHECK64-LABEL: load_i64_by_i8:
 233 ; CHECK64:       # %bb.0:
 234 ; CHECK64-NEXT:    movq (%rdi), %rax
 235 ; CHECK64-NEXT:    retq
 236   %tmp1 = load i8, ptr %arg, align 1
 237   %tmp2 = zext i8 %tmp1 to i64
 238   %tmp3 = getelementptr inbounds i8, ptr %arg, i64 1
 239   %tmp4 = load i8, ptr %tmp3, align 1
 240   %tmp5 = zext i8 %tmp4 to i64
 241   %tmp6 = shl nuw nsw i64 %tmp5, 8
 242   %tmp7 = or i64 %tmp6, %tmp2
 243   %tmp8 = getelementptr inbounds i8, ptr %arg, i64 2
 244   %tmp9 = load i8, ptr %tmp8, align 1
 245   %tmp10 = zext i8 %tmp9 to i64
 246   %tmp11 = shl nuw nsw i64 %tmp10, 16
 247   %tmp12 = or i64 %tmp7, %tmp11
 248   %tmp13 = getelementptr inbounds i8, ptr %arg, i64 3
 249   %tmp14 = load i8, ptr %tmp13, align 1
 250   %tmp15 = zext i8 %tmp14 to i64
 251   %tmp16 = shl nuw nsw i64 %tmp15, 24
 252   %tmp17 = or i64 %tmp12, %tmp16
 253   %tmp18 = getelementptr inbounds i8, ptr %arg, i64 4
 254   %tmp19 = load i8, ptr %tmp18, align 1
 255   %tmp20 = zext i8 %tmp19 to i64
 256   %tmp21 = shl nuw nsw i64 %tmp20, 32
 257   %tmp22 = or i64 %tmp17, %tmp21
 258   %tmp23 = getelementptr inbounds i8, ptr %arg, i64 5
 259   %tmp24 = load i8, ptr %tmp23, align 1
 260   %tmp25 = zext i8 %tmp24 to i64
 261   %tmp26 = shl nuw nsw i64 %tmp25, 40
 262   %tmp27 = or i64 %tmp22, %tmp26
 263   %tmp28 = getelementptr inbounds i8, ptr %arg, i64 6
 264   %tmp29 = load i8, ptr %tmp28, align 1
 265   %tmp30 = zext i8 %tmp29 to i64
 266   %tmp31 = shl nuw nsw i64 %tmp30, 48
 267   %tmp32 = or i64 %tmp27, %tmp31
 268   %tmp33 = getelementptr inbounds i8, ptr %arg, i64 7
 269   %tmp34 = load i8, ptr %tmp33, align 1
 270   %tmp35 = zext i8 %tmp34 to i64
 271   %tmp36 = shl nuw i64 %tmp35, 56
 272   %tmp37 = or i64 %tmp32, %tmp36
 273   ret i64 %tmp37
 274 }
 275
 276 ; ptr p;
 277 ; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
 278 define i64 @load_i64_by_i8_bswap(ptr %arg) {
 279 ; BSWAP-LABEL: load_i64_by_i8_bswap:
 280 ; BSWAP:       # %bb.0:
 281 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 282 ; BSWAP-NEXT:    movl (%eax), %edx
 283 ; BSWAP-NEXT:    movl 4(%eax), %eax
 284 ; BSWAP-NEXT:    bswapl %eax
 285 ; BSWAP-NEXT:    bswapl %edx
 286 ; BSWAP-NEXT:    retl
 287 ;
 288 ; MOVBE-LABEL: load_i64_by_i8_bswap:
 289 ; MOVBE:       # %bb.0:
 290 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 291 ; MOVBE-NEXT:    movbel 4(%ecx), %eax
 292 ; MOVBE-NEXT:    movbel (%ecx), %edx
 293 ; MOVBE-NEXT:    retl
 294 ;
 295 ; BSWAP64-LABEL: load_i64_by_i8_bswap:
 296 ; BSWAP64:       # %bb.0:
 297 ; BSWAP64-NEXT:    movq (%rdi), %rax
 298 ; BSWAP64-NEXT:    bswapq %rax
 299 ; BSWAP64-NEXT:    retq
 300 ;
 301 ; MOVBE64-LABEL: load_i64_by_i8_bswap:
 302 ; MOVBE64:       # %bb.0:
 303 ; MOVBE64-NEXT:    movbeq (%rdi), %rax
 304 ; MOVBE64-NEXT:    retq
 305   %tmp1 = load i8, ptr %arg, align 1
 306   %tmp2 = zext i8 %tmp1 to i64
 307   %tmp3 = shl nuw i64 %tmp2, 56
 308   %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
 309   %tmp5 = load i8, ptr %tmp4, align 1
 310   %tmp6 = zext i8 %tmp5 to i64
 311   %tmp7 = shl nuw nsw i64 %tmp6, 48
 312   %tmp8 = or i64 %tmp7, %tmp3
 313   %tmp9 = getelementptr inbounds i8, ptr %arg, i64 2
 314   %tmp10 = load i8, ptr %tmp9, align 1
 315   %tmp11 = zext i8 %tmp10 to i64
 316   %tmp12 = shl nuw nsw i64 %tmp11, 40
 317   %tmp13 = or i64 %tmp8, %tmp12
 318   %tmp14 = getelementptr inbounds i8, ptr %arg, i64 3
 319   %tmp15 = load i8, ptr %tmp14, align 1
 320   %tmp16 = zext i8 %tmp15 to i64
 321   %tmp17 = shl nuw nsw i64 %tmp16, 32
 322   %tmp18 = or i64 %tmp13, %tmp17
 323   %tmp19 = getelementptr inbounds i8, ptr %arg, i64 4
 324   %tmp20 = load i8, ptr %tmp19, align 1
 325   %tmp21 = zext i8 %tmp20 to i64
 326   %tmp22 = shl nuw nsw i64 %tmp21, 24
 327   %tmp23 = or i64 %tmp18, %tmp22
 328   %tmp24 = getelementptr inbounds i8, ptr %arg, i64 5
 329   %tmp25 = load i8, ptr %tmp24, align 1
 330   %tmp26 = zext i8 %tmp25 to i64
 331   %tmp27 = shl nuw nsw i64 %tmp26, 16
 332   %tmp28 = or i64 %tmp23, %tmp27
 333   %tmp29 = getelementptr inbounds i8, ptr %arg, i64 6
 334   %tmp30 = load i8, ptr %tmp29, align 1
 335   %tmp31 = zext i8 %tmp30 to i64
 336   %tmp32 = shl nuw nsw i64 %tmp31, 8
 337   %tmp33 = or i64 %tmp28, %tmp32
 338   %tmp34 = getelementptr inbounds i8, ptr %arg, i64 7
 339   %tmp35 = load i8, ptr %tmp34, align 1
 340   %tmp36 = zext i8 %tmp35 to i64
 341   %tmp37 = or i64 %tmp33, %tmp36
 342   ret i64 %tmp37
 343 }
 344
 345 ; Part of the load by bytes pattern is used outside of the pattern
 346 ; ptr p;
 347 ; i32 x = (i32) p[1]
 348 ; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
 349 ; x | res
 350 define i32 @load_i32_by_i8_bswap_uses(ptr %arg) {
 351 ; CHECK-LABEL: load_i32_by_i8_bswap_uses:
 352 ; CHECK:       # %bb.0:
 353 ; CHECK-NEXT:    pushl %esi
 354 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 355 ; CHECK-NEXT:    .cfi_offset %esi, -8
 356 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 357 ; CHECK-NEXT:    movzbl (%eax), %ecx
 358 ; CHECK-NEXT:    shll $24, %ecx
 359 ; CHECK-NEXT:    movzbl 1(%eax), %edx
 360 ; CHECK-NEXT:    movl %edx, %esi
 361 ; CHECK-NEXT:    shll $16, %esi
 362 ; CHECK-NEXT:    orl %ecx, %esi
 363 ; CHECK-NEXT:    movzbl 2(%eax), %ecx
 364 ; CHECK-NEXT:    shll $8, %ecx
 365 ; CHECK-NEXT:    orl %esi, %ecx
 366 ; CHECK-NEXT:    movzbl 3(%eax), %eax
 367 ; CHECK-NEXT:    orl %ecx, %eax
 368 ; CHECK-NEXT:    orl %edx, %eax
 369 ; CHECK-NEXT:    popl %esi
 370 ; CHECK-NEXT:    .cfi_def_cfa_offset 4
 371 ; CHECK-NEXT:    retl
 372 ;
 373 ; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
 374 ; CHECK64:       # %bb.0:
 375 ; CHECK64-NEXT:    movzbl (%rdi), %eax
 376 ; CHECK64-NEXT:    shll $24, %eax
 377 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
 378 ; CHECK64-NEXT:    movl %ecx, %edx
 379 ; CHECK64-NEXT:    shll $16, %edx
 380 ; CHECK64-NEXT:    orl %eax, %edx
 381 ; CHECK64-NEXT:    movzbl 2(%rdi), %esi
 382 ; CHECK64-NEXT:    shll $8, %esi
 383 ; CHECK64-NEXT:    orl %edx, %esi
 384 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
 385 ; CHECK64-NEXT:    orl %esi, %eax
 386 ; CHECK64-NEXT:    orl %ecx, %eax
 387 ; CHECK64-NEXT:    retq
 388   %tmp1 = load i8, ptr %arg, align 1
 389   %tmp2 = zext i8 %tmp1 to i32
 390   %tmp3 = shl nuw nsw i32 %tmp2, 24
 391   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
 392   %tmp5 = load i8, ptr %tmp4, align 1
 393   %tmp6 = zext i8 %tmp5 to i32
 394   %tmp7 = shl nuw nsw i32 %tmp6, 16
 395   %tmp8 = or i32 %tmp7, %tmp3
 396   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
 397   %tmp10 = load i8, ptr %tmp9, align 1
 398   %tmp11 = zext i8 %tmp10 to i32
 399   %tmp12 = shl nuw nsw i32 %tmp11, 8
 400   %tmp13 = or i32 %tmp8, %tmp12
 401   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
 402   %tmp15 = load i8, ptr %tmp14, align 1
 403   %tmp16 = zext i8 %tmp15 to i32
 404   %tmp17 = or i32 %tmp13, %tmp16
 405   ; Use individual part of the pattern outside of the pattern
 406   %tmp18 = or i32 %tmp6, %tmp17
 407   ret i32 %tmp18
 408 }
 409
 410 ; One of the loads is volatile
 411 ; ptr p;
 412 ; p0 = volatile *p;
 413 ; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
 414 define i32 @load_i32_by_i8_bswap_volatile(ptr %arg) {
 415 ; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
 416 ; CHECK:       # %bb.0:
 417 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 418 ; CHECK-NEXT:    movzbl (%eax), %ecx
 419 ; CHECK-NEXT:    shll $24, %ecx
 420 ; CHECK-NEXT:    movzbl 1(%eax), %edx
 421 ; CHECK-NEXT:    shll $16, %edx
 422 ; CHECK-NEXT:    orl %ecx, %edx
 423 ; CHECK-NEXT:    movzbl 2(%eax), %ecx
 424 ; CHECK-NEXT:    shll $8, %ecx
 425 ; CHECK-NEXT:    orl %edx, %ecx
 426 ; CHECK-NEXT:    movzbl 3(%eax), %eax
 427 ; CHECK-NEXT:    orl %ecx, %eax
 428 ; CHECK-NEXT:    retl
 429 ;
 430 ; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
 431 ; CHECK64:       # %bb.0:
 432 ; CHECK64-NEXT:    movzbl (%rdi), %eax
 433 ; CHECK64-NEXT:    shll $24, %eax
 434 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
 435 ; CHECK64-NEXT:    shll $16, %ecx
 436 ; CHECK64-NEXT:    orl %eax, %ecx
 437 ; CHECK64-NEXT:    movzbl 2(%rdi), %edx
 438 ; CHECK64-NEXT:    shll $8, %edx
 439 ; CHECK64-NEXT:    orl %ecx, %edx
 440 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
 441 ; CHECK64-NEXT:    orl %edx, %eax
 442 ; CHECK64-NEXT:    retq
 443   %tmp1 = load volatile i8, ptr %arg, align 1
 444   %tmp2 = zext i8 %tmp1 to i32
 445   %tmp3 = shl nuw nsw i32 %tmp2, 24
 446   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
 447   %tmp5 = load i8, ptr %tmp4, align 1
 448   %tmp6 = zext i8 %tmp5 to i32
 449   %tmp7 = shl nuw nsw i32 %tmp6, 16
 450   %tmp8 = or i32 %tmp7, %tmp3
 451   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
 452   %tmp10 = load i8, ptr %tmp9, align 1
 453   %tmp11 = zext i8 %tmp10 to i32
 454   %tmp12 = shl nuw nsw i32 %tmp11, 8
 455   %tmp13 = or i32 %tmp8, %tmp12
 456   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
 457   %tmp15 = load i8, ptr %tmp14, align 1
 458   %tmp16 = zext i8 %tmp15 to i32
 459   %tmp17 = or i32 %tmp13, %tmp16
 460   ret i32 %tmp17
 461 }
 462
 463 ; There is a store in between individual loads
 464 ; ptr p, q;
 465 ; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
 466 ; *q = 0;
 467 ; res2 = ((i32) p[2] << 8) | (i32) p[3]
 468 ; res1 | res2
 469 define i32 @load_i32_by_i8_bswap_store_in_between(ptr %arg, ptr %arg1) {
 470 ; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
 471 ; CHECK:       # %bb.0:
 472 ; CHECK-NEXT:    pushl %esi
 473 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 474 ; CHECK-NEXT:    .cfi_offset %esi, -8
 475 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 476 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 477 ; CHECK-NEXT:    movzbl (%eax), %edx
 478 ; CHECK-NEXT:    shll $24, %edx
 479 ; CHECK-NEXT:    movzbl 1(%eax), %esi
 480 ; CHECK-NEXT:    movl $0, (%ecx)
 481 ; CHECK-NEXT:    shll $16, %esi
 482 ; CHECK-NEXT:    orl %edx, %esi
 483 ; CHECK-NEXT:    movzbl 2(%eax), %ecx
 484 ; CHECK-NEXT:    shll $8, %ecx
 485 ; CHECK-NEXT:    orl %esi, %ecx
 486 ; CHECK-NEXT:    movzbl 3(%eax), %eax
 487 ; CHECK-NEXT:    orl %ecx, %eax
 488 ; CHECK-NEXT:    popl %esi
 489 ; CHECK-NEXT:    .cfi_def_cfa_offset 4
 490 ; CHECK-NEXT:    retl
 491 ;
 492 ; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
 493 ; CHECK64:       # %bb.0:
 494 ; CHECK64-NEXT:    movzbl (%rdi), %eax
 495 ; CHECK64-NEXT:    shll $24, %eax
 496 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
 497 ; CHECK64-NEXT:    movl $0, (%rsi)
 498 ; CHECK64-NEXT:    shll $16, %ecx
 499 ; CHECK64-NEXT:    orl %eax, %ecx
 500 ; CHECK64-NEXT:    movzbl 2(%rdi), %edx
 501 ; CHECK64-NEXT:    shll $8, %edx
 502 ; CHECK64-NEXT:    orl %ecx, %edx
 503 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
 504 ; CHECK64-NEXT:    orl %edx, %eax
 505 ; CHECK64-NEXT:    retq
 506   %tmp2 = load i8, ptr %arg, align 1
 507   %tmp3 = zext i8 %tmp2 to i32
 508   %tmp4 = shl nuw nsw i32 %tmp3, 24
 509   %tmp5 = getelementptr inbounds i8, ptr %arg, i32 1
 510   %tmp6 = load i8, ptr %tmp5, align 1
 511   ; This store will prevent folding of the pattern
 512   store i32 0, ptr %arg1
 513   %tmp7 = zext i8 %tmp6 to i32
 514   %tmp8 = shl nuw nsw i32 %tmp7, 16
 515   %tmp9 = or i32 %tmp8, %tmp4
 516   %tmp10 = getelementptr inbounds i8, ptr %arg, i32 2
 517   %tmp11 = load i8, ptr %tmp10, align 1
 518   %tmp12 = zext i8 %tmp11 to i32
 519   %tmp13 = shl nuw nsw i32 %tmp12, 8
 520   %tmp14 = or i32 %tmp9, %tmp13
 521   %tmp15 = getelementptr inbounds i8, ptr %arg, i32 3
 522   %tmp16 = load i8, ptr %tmp15, align 1
 523   %tmp17 = zext i8 %tmp16 to i32
 524   %tmp18 = or i32 %tmp14, %tmp17
 525   ret i32 %tmp18
 526 }
 527
 528 ; One of the loads is from an unrelated location
 529 ; ptr p, q;
 530 ; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
 531 define i32 @load_i32_by_i8_bswap_unrelated_load(ptr %arg, ptr %arg1) {
 532 ; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
 533 ; CHECK:       # %bb.0:
 534 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 535 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 536 ; CHECK-NEXT:    movzbl (%ecx), %edx
 537 ; CHECK-NEXT:    shll $24, %edx
 538 ; CHECK-NEXT:    movzbl 1(%eax), %eax
 539 ; CHECK-NEXT:    shll $16, %eax
 540 ; CHECK-NEXT:    orl %edx, %eax
 541 ; CHECK-NEXT:    movzbl 2(%ecx), %edx
 542 ; CHECK-NEXT:    shll $8, %edx
 543 ; CHECK-NEXT:    orl %eax, %edx
 544 ; CHECK-NEXT:    movzbl 3(%ecx), %eax
 545 ; CHECK-NEXT:    orl %edx, %eax
 546 ; CHECK-NEXT:    retl
 547 ;
 548 ; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
 549 ; CHECK64:       # %bb.0:
 550 ; CHECK64-NEXT:    movzbl (%rdi), %eax
 551 ; CHECK64-NEXT:    shll $24, %eax
 552 ; CHECK64-NEXT:    movzbl 1(%rsi), %ecx
 553 ; CHECK64-NEXT:    shll $16, %ecx
 554 ; CHECK64-NEXT:    orl %eax, %ecx
 555 ; CHECK64-NEXT:    movzbl 2(%rdi), %edx
 556 ; CHECK64-NEXT:    shll $8, %edx
 557 ; CHECK64-NEXT:    orl %ecx, %edx
 558 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
 559 ; CHECK64-NEXT:    orl %edx, %eax
 560 ; CHECK64-NEXT:    retq
 561   %tmp3 = load i8, ptr %arg, align 1
 562   %tmp4 = zext i8 %tmp3 to i32
 563   %tmp5 = shl nuw nsw i32 %tmp4, 24
 564   ; Load from an unrelated address
 565   %tmp6 = getelementptr inbounds i8, ptr %arg1, i32 1
 566   %tmp7 = load i8, ptr %tmp6, align 1
 567   %tmp8 = zext i8 %tmp7 to i32
 568   %tmp9 = shl nuw nsw i32 %tmp8, 16
 569   %tmp10 = or i32 %tmp9, %tmp5
 570   %tmp11 = getelementptr inbounds i8, ptr %arg, i32 2
 571   %tmp12 = load i8, ptr %tmp11, align 1
 572   %tmp13 = zext i8 %tmp12 to i32
 573   %tmp14 = shl nuw nsw i32 %tmp13, 8
 574   %tmp15 = or i32 %tmp10, %tmp14
 575   %tmp16 = getelementptr inbounds i8, ptr %arg, i32 3
 576   %tmp17 = load i8, ptr %tmp16, align 1
 577   %tmp18 = zext i8 %tmp17 to i32
 578   %tmp19 = or i32 %tmp15, %tmp18
 579   ret i32 %tmp19
 580 }
 581
 582 ; ptr p;
 583 ; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
 584 define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
 585 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
 586 ; CHECK:       # %bb.0:
 587 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 588 ; CHECK-NEXT:    movl 1(%eax), %eax
 589 ; CHECK-NEXT:    retl
 590 ;
 591 ; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
 592 ; CHECK64:       # %bb.0:
 593 ; CHECK64-NEXT:    movl 1(%rdi), %eax
 594 ; CHECK64-NEXT:    retq
 595   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
 596   %tmp2 = load i8, ptr %tmp1, align 1
 597   %tmp3 = zext i8 %tmp2 to i32
 598   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
 599   %tmp5 = load i8, ptr %tmp4, align 1
 600   %tmp6 = zext i8 %tmp5 to i32
 601   %tmp7 = shl nuw nsw i32 %tmp6, 8
 602   %tmp8 = or i32 %tmp7, %tmp3
 603   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
 604   %tmp10 = load i8, ptr %tmp9, align 1
 605   %tmp11 = zext i8 %tmp10 to i32
 606   %tmp12 = shl nuw nsw i32 %tmp11, 16
 607   %tmp13 = or i32 %tmp8, %tmp12
 608   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 4
 609   %tmp15 = load i8, ptr %tmp14, align 1
 610   %tmp16 = zext i8 %tmp15 to i32
 611   %tmp17 = shl nuw nsw i32 %tmp16, 24
 612   %tmp18 = or i32 %tmp13, %tmp17
 613   ret i32 %tmp18
 614 }
 615
 616 ; ptr p;
 617 ; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
 618 define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
 619 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
 620 ; CHECK:       # %bb.0:
 621 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 622 ; CHECK-NEXT:    movl -4(%eax), %eax
 623 ; CHECK-NEXT:    retl
 624 ;
 625 ; CHECK64-LABEL: load_i32_by_i8_neg_offset:
 626 ; CHECK64:       # %bb.0:
 627 ; CHECK64-NEXT:    movl -4(%rdi), %eax
 628 ; CHECK64-NEXT:    retq
 629   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -4
 630   %tmp2 = load i8, ptr %tmp1, align 1
 631   %tmp3 = zext i8 %tmp2 to i32
 632   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -3
 633   %tmp5 = load i8, ptr %tmp4, align 1
 634   %tmp6 = zext i8 %tmp5 to i32
 635   %tmp7 = shl nuw nsw i32 %tmp6, 8
 636   %tmp8 = or i32 %tmp7, %tmp3
 637   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -2
 638   %tmp10 = load i8, ptr %tmp9, align 1
 639   %tmp11 = zext i8 %tmp10 to i32
 640   %tmp12 = shl nuw nsw i32 %tmp11, 16
 641   %tmp13 = or i32 %tmp8, %tmp12
 642   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -1
 643   %tmp15 = load i8, ptr %tmp14, align 1
 644   %tmp16 = zext i8 %tmp15 to i32
 645   %tmp17 = shl nuw nsw i32 %tmp16, 24
 646   %tmp18 = or i32 %tmp13, %tmp17
 647   ret i32 %tmp18
 648 }
 649
 650 ; ptr p;
 651 ; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
 652 define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
 653 ; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 654 ; BSWAP:       # %bb.0:
 655 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 656 ; BSWAP-NEXT:    movl 1(%eax), %eax
 657 ; BSWAP-NEXT:    bswapl %eax
 658 ; BSWAP-NEXT:    retl
 659 ;
 660 ; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 661 ; MOVBE:       # %bb.0:
 662 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 663 ; MOVBE-NEXT:    movbel 1(%eax), %eax
 664 ; MOVBE-NEXT:    retl
 665 ;
 666 ; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 667 ; BSWAP64:       # %bb.0:
 668 ; BSWAP64-NEXT:    movl 1(%rdi), %eax
 669 ; BSWAP64-NEXT:    bswapl %eax
 670 ; BSWAP64-NEXT:    retq
 671 ;
 672 ; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 673 ; MOVBE64:       # %bb.0:
 674 ; MOVBE64-NEXT:    movbel 1(%rdi), %eax
 675 ; MOVBE64-NEXT:    retq
 676   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 4
 677   %tmp2 = load i8, ptr %tmp1, align 1
 678   %tmp3 = zext i8 %tmp2 to i32
 679   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 3
 680   %tmp5 = load i8, ptr %tmp4, align 1
 681   %tmp6 = zext i8 %tmp5 to i32
 682   %tmp7 = shl nuw nsw i32 %tmp6, 8
 683   %tmp8 = or i32 %tmp7, %tmp3
 684   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
 685   %tmp10 = load i8, ptr %tmp9, align 1
 686   %tmp11 = zext i8 %tmp10 to i32
 687   %tmp12 = shl nuw nsw i32 %tmp11, 16
 688   %tmp13 = or i32 %tmp8, %tmp12
 689   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 1
 690   %tmp15 = load i8, ptr %tmp14, align 1
 691   %tmp16 = zext i8 %tmp15 to i32
 692   %tmp17 = shl nuw nsw i32 %tmp16, 24
 693   %tmp18 = or i32 %tmp13, %tmp17
 694   ret i32 %tmp18
 695 }
 696
 697 ; ptr p;
 698 ; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
 699 define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
 700 ; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap:
 701 ; BSWAP:       # %bb.0:
 702 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 703 ; BSWAP-NEXT:    movl -4(%eax), %eax
 704 ; BSWAP-NEXT:    bswapl %eax
 705 ; BSWAP-NEXT:    retl
 706 ;
 707 ; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap:
 708 ; MOVBE:       # %bb.0:
 709 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 710 ; MOVBE-NEXT:    movbel -4(%eax), %eax
 711 ; MOVBE-NEXT:    retl
 712 ;
 713 ; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap:
 714 ; BSWAP64:       # %bb.0:
 715 ; BSWAP64-NEXT:    movl -4(%rdi), %eax
 716 ; BSWAP64-NEXT:    bswapl %eax
 717 ; BSWAP64-NEXT:    retq
 718 ;
 719 ; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap:
 720 ; MOVBE64:       # %bb.0:
 721 ; MOVBE64-NEXT:    movbel -4(%rdi), %eax
 722 ; MOVBE64-NEXT:    retq
 723   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -1
 724   %tmp2 = load i8, ptr %tmp1, align 1
 725   %tmp3 = zext i8 %tmp2 to i32
 726   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -2
 727   %tmp5 = load i8, ptr %tmp4, align 1
 728   %tmp6 = zext i8 %tmp5 to i32
 729   %tmp7 = shl nuw nsw i32 %tmp6, 8
 730   %tmp8 = or i32 %tmp7, %tmp3
 731   %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -3
 732   %tmp10 = load i8, ptr %tmp9, align 1
 733   %tmp11 = zext i8 %tmp10 to i32
 734   %tmp12 = shl nuw nsw i32 %tmp11, 16
 735   %tmp13 = or i32 %tmp8, %tmp12
 736   %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -4
 737   %tmp15 = load i8, ptr %tmp14, align 1
 738   %tmp16 = zext i8 %tmp15 to i32
 739   %tmp17 = shl nuw nsw i32 %tmp16, 24
 740   %tmp18 = or i32 %tmp13, %tmp17
 741   ret i32 %tmp18
 742 }
 743
 744 ; ptr p; i32 i;
 745 ; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
 746 define i32 @load_i32_by_i8_bswap_base_index_offset(ptr %arg, i32 %arg1) {
 747 ; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset:
 748 ; BSWAP:       # %bb.0:
 749 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 750 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 751 ; BSWAP-NEXT:    movl (%ecx,%eax), %eax
 752 ; BSWAP-NEXT:    bswapl %eax
 753 ; BSWAP-NEXT:    retl
 754 ;
 755 ; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset:
 756 ; MOVBE:       # %bb.0:
 757 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 758 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 759 ; MOVBE-NEXT:    movbel (%ecx,%eax), %eax
 760 ; MOVBE-NEXT:    retl
 761 ;
 762 ; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset:
 763 ; BSWAP64:       # %bb.0:
 764 ; BSWAP64-NEXT:    movslq %esi, %rax
 765 ; BSWAP64-NEXT:    movl (%rdi,%rax), %eax
 766 ; BSWAP64-NEXT:    bswapl %eax
 767 ; BSWAP64-NEXT:    retq
 768 ;
 769 ; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset:
 770 ; MOVBE64:       # %bb.0:
 771 ; MOVBE64-NEXT:    movslq %esi, %rax
 772 ; MOVBE64-NEXT:    movbel (%rdi,%rax), %eax
 773 ; MOVBE64-NEXT:    retq
 774   %tmp2 = getelementptr inbounds i8, ptr %arg, i32 %arg1
 775   %tmp3 = load i8, ptr %tmp2, align 1
 776   %tmp4 = zext i8 %tmp3 to i32
 777   %tmp5 = shl nuw nsw i32 %tmp4, 24
 778   %tmp6 = add nuw nsw i32 %arg1, 1
 779   %tmp7 = getelementptr inbounds i8, ptr %arg, i32 %tmp6
 780   %tmp8 = load i8, ptr %tmp7, align 1
 781   %tmp9 = zext i8 %tmp8 to i32
 782   %tmp10 = shl nuw nsw i32 %tmp9, 16
 783   %tmp11 = or i32 %tmp10, %tmp5
 784   %tmp12 = add nuw nsw i32 %arg1, 2
 785   %tmp13 = getelementptr inbounds i8, ptr %arg, i32 %tmp12
 786   %tmp14 = load i8, ptr %tmp13, align 1
 787   %tmp15 = zext i8 %tmp14 to i32
 788   %tmp16 = shl nuw nsw i32 %tmp15, 8
 789   %tmp17 = or i32 %tmp11, %tmp16
 790   %tmp18 = add nuw nsw i32 %arg1, 3
 791   %tmp19 = getelementptr inbounds i8, ptr %arg, i32 %tmp18
 792   %tmp20 = load i8, ptr %tmp19, align 1
 793   %tmp21 = zext i8 %tmp20 to i32
 794   %tmp22 = or i32 %tmp17, %tmp21
 795   ret i32 %tmp22
 796 }
 797
 798 ; Verify that we don't crash handling shl i32 %conv57, 32
 799 define void @shift_i32_by_32(ptr %src1, ptr %src2, ptr %dst) {
 800 ; CHECK-LABEL: shift_i32_by_32:
 801 ; CHECK:       # %bb.0: # %entry
 802 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 803 ; CHECK-NEXT:    movl $-1, 4(%eax)
 804 ; CHECK-NEXT:    movl $-1, (%eax)
 805 ; CHECK-NEXT:    retl
 806 ;
 807 ; CHECK64-LABEL: shift_i32_by_32:
 808 ; CHECK64:       # %bb.0: # %entry
 809 ; CHECK64-NEXT:    movq $-1, (%rdx)
 810 ; CHECK64-NEXT:    retq
 811 entry:
 812   %load1 = load i8, ptr %src1, align 1
 813   %conv46 = zext i8 %load1 to i32
 814   %shl47 = shl i32 %conv46, 56
 815   %or55 = or i32 %shl47, 0
 816   %load2 = load i8, ptr %src2, align 1
 817   %conv57 = zext i8 %load2 to i32
 818   %shl58 = shl i32 %conv57, 32
 819   %or59 = or i32 %or55, %shl58
 820   %or74 = or i32 %or59, 0
 821   %conv75 = sext i32 %or74 to i64
 822   store i64 %conv75, ptr %dst, align 8
 823   ret void
 824 }
 825
 826 declare i16 @llvm.bswap.i16(i16)
 827
 828 ; ptr p;
 829 ; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
 830 define i32 @load_i32_by_bswap_i16(ptr %arg) {
 831 ; BSWAP-LABEL: load_i32_by_bswap_i16:
 832 ; BSWAP:       # %bb.0:
 833 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
 834 ; BSWAP-NEXT:    movl (%eax), %eax
 835 ; BSWAP-NEXT:    bswapl %eax
 836 ; BSWAP-NEXT:    retl
 837 ;
 838 ; MOVBE-LABEL: load_i32_by_bswap_i16:
 839 ; MOVBE:       # %bb.0:
 840 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 841 ; MOVBE-NEXT:    movbel (%eax), %eax
 842 ; MOVBE-NEXT:    retl
 843 ;
 844 ; BSWAP64-LABEL: load_i32_by_bswap_i16:
 845 ; BSWAP64:       # %bb.0:
 846 ; BSWAP64-NEXT:    movl (%rdi), %eax
 847 ; BSWAP64-NEXT:    bswapl %eax
 848 ; BSWAP64-NEXT:    retq
 849 ;
 850 ; MOVBE64-LABEL: load_i32_by_bswap_i16:
 851 ; MOVBE64:       # %bb.0:
 852 ; MOVBE64-NEXT:    movbel (%rdi), %eax
 853 ; MOVBE64-NEXT:    retq
 854   %tmp1 = load i16, ptr %arg, align 4
 855   %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
 856   %tmp2 = zext i16 %tmp11 to i32
 857   %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
 858   %tmp4 = load i16, ptr %tmp3, align 1
 859   %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
 860   %tmp5 = zext i16 %tmp41 to i32
 861   %tmp6 = shl nuw nsw i32 %tmp2, 16
 862   %tmp7 = or i32 %tmp6, %tmp5
 863   ret i32 %tmp7
 864 }
 865
 866 ; ptr p;
 867 ; (i32) p[0] | (sext(p[1] << 16) to i32)
 868 define i32 @load_i32_by_sext_i16(ptr %arg) {
 869 ; CHECK-LABEL: load_i32_by_sext_i16:
 870 ; CHECK:       # %bb.0:
 871 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 872 ; CHECK-NEXT:    movl (%eax), %eax
 873 ; CHECK-NEXT:    retl
 874 ;
 875 ; CHECK64-LABEL: load_i32_by_sext_i16:
 876 ; CHECK64:       # %bb.0:
 877 ; CHECK64-NEXT:    movl (%rdi), %eax
 878 ; CHECK64-NEXT:    retq
 879   %tmp1 = load i16, ptr %arg, align 1
 880   %tmp2 = zext i16 %tmp1 to i32
 881   %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
 882   %tmp4 = load i16, ptr %tmp3, align 1
 883   %tmp5 = sext i16 %tmp4 to i32
 884   %tmp6 = shl nuw nsw i32 %tmp5, 16
 885   %tmp7 = or i32 %tmp6, %tmp2
 886   ret i32 %tmp7
 887 }
 888
 889 ; ptr arg; i32 i;
 890 ; p = arg + 12;
 891 ; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
 892 define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
 893 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
 894 ; CHECK:       # %bb.0:
 895 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 896 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 897 ; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
 898 ; CHECK-NEXT:    retl
 899 ;
 900 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
 901 ; CHECK64:       # %bb.0:
 902 ; CHECK64-NEXT:    movl %esi, %eax
 903 ; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
 904 ; CHECK64-NEXT:    retq
 905   %tmp = add nuw nsw i32 %i, 3
 906   %tmp2 = add nuw nsw i32 %i, 2
 907   %tmp3 = add nuw nsw i32 %i, 1
 908   %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
 909   %tmp5 = zext i32 %i to i64
 910   %tmp6 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp5
 911   %tmp7 = load i8, ptr %tmp6, align 1
 912   %tmp8 = zext i8 %tmp7 to i32
 913   %tmp9 = zext i32 %tmp3 to i64
 914   %tmp10 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp9
 915   %tmp11 = load i8, ptr %tmp10, align 1
 916   %tmp12 = zext i8 %tmp11 to i32
 917   %tmp13 = shl nuw nsw i32 %tmp12, 8
 918   %tmp14 = or i32 %tmp13, %tmp8
 919   %tmp15 = zext i32 %tmp2 to i64
 920   %tmp16 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp15
 921   %tmp17 = load i8, ptr %tmp16, align 1
 922   %tmp18 = zext i8 %tmp17 to i32
 923   %tmp19 = shl nuw nsw i32 %tmp18, 16
 924   %tmp20 = or i32 %tmp14, %tmp19
 925   %tmp21 = zext i32 %tmp to i64
 926   %tmp22 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp21
 927   %tmp23 = load i8, ptr %tmp22, align 1
 928   %tmp24 = zext i8 %tmp23 to i32
 929   %tmp25 = shl nuw i32 %tmp24, 24
 930   %tmp26 = or i32 %tmp20, %tmp25
 931   ret i32 %tmp26
 932 }
 933
 934 ; ptr arg; i32 i;
 935 ; p = arg + 12;
 936 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
 937 define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
 938 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
 939 ; CHECK:       # %bb.0:
 940 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 941 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 942 ; CHECK-NEXT:    movl 13(%eax,%ecx), %eax
 943 ; CHECK-NEXT:    retl
 944 ;
 945 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
 946 ; CHECK64:       # %bb.0:
 947 ; CHECK64-NEXT:    movl %esi, %eax
 948 ; CHECK64-NEXT:    movl 13(%rax,%rdi), %eax
 949 ; CHECK64-NEXT:    retq
 950   %tmp = add nuw nsw i32 %i, 4
 951   %tmp2 = add nuw nsw i32 %i, 3
 952   %tmp3 = add nuw nsw i32 %i, 2
 953   %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
 954   %tmp5 = add nuw nsw i32 %i, 1
 955   %tmp27 = zext i32 %tmp5 to i64
 956   %tmp28 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp27
 957   %tmp29 = load i8, ptr %tmp28, align 1
 958   %tmp30 = zext i8 %tmp29 to i32
 959   %tmp31 = zext i32 %tmp3 to i64
 960   %tmp32 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp31
 961   %tmp33 = load i8, ptr %tmp32, align 1
 962   %tmp34 = zext i8 %tmp33 to i32
 963   %tmp35 = shl nuw nsw i32 %tmp34, 8
 964   %tmp36 = or i32 %tmp35, %tmp30
 965   %tmp37 = zext i32 %tmp2 to i64
 966   %tmp38 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp37
 967   %tmp39 = load i8, ptr %tmp38, align 1
 968   %tmp40 = zext i8 %tmp39 to i32
 969   %tmp41 = shl nuw nsw i32 %tmp40, 16
 970   %tmp42 = or i32 %tmp36, %tmp41
 971   %tmp43 = zext i32 %tmp to i64
 972   %tmp44 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp43
 973   %tmp45 = load i8, ptr %tmp44, align 1
 974   %tmp46 = zext i8 %tmp45 to i32
 975   %tmp47 = shl nuw i32 %tmp46, 24
 976   %tmp48 = or i32 %tmp42, %tmp47
 977   ret i32 %tmp48
 978 }
 979
 980 ; ptr arg; i32 i;
 981 ;
 982 ; p0 = arg;
 983 ; p1 = arg + i + 1;
 984 ; p2 = arg + i + 2;
 985 ; p3 = arg + i + 3;
 986 ;
 987 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
 988 ;
 989 ; This test excercises zero and any extend loads as a part of load combine pattern.
 990 ; In order to fold the pattern above we need to reassociate the address computation
 991 ; first. By the time the address computation is reassociated loads are combined to
 992 ; to zext and aext loads.
 993 define i32 @load_i32_by_i8_zaext_loads(ptr %arg, i32 %arg1) {
 994 ; CHECK-LABEL: load_i32_by_i8_zaext_loads:
 995 ; CHECK:       # %bb.0:
 996 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 997 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 998 ; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
 999 ; CHECK-NEXT:    retl
1000 ;
1001 ; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
1002 ; CHECK64:       # %bb.0:
1003 ; CHECK64-NEXT:    movl %esi, %eax
1004 ; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
1005 ; CHECK64-NEXT:    retq
1006   %tmp = add nuw nsw i32 %arg1, 3
1007   %tmp2 = add nuw nsw i32 %arg1, 2
1008   %tmp3 = add nuw nsw i32 %arg1, 1
1009   %tmp4 = zext i32 %tmp to i64
1010   %tmp5 = zext i32 %tmp2 to i64
1011   %tmp6 = zext i32 %tmp3 to i64
1012   %tmp24 = getelementptr inbounds i8, ptr %arg, i64 %tmp4
1013   %tmp30 = getelementptr inbounds i8, ptr %arg, i64 %tmp5
1014   %tmp31 = getelementptr inbounds i8, ptr %arg, i64 %tmp6
1015   %tmp32 = getelementptr inbounds i8, ptr %arg, i64 12
1016   %tmp33 = zext i32 %arg1 to i64
1017   %tmp34 = getelementptr inbounds i8, ptr %tmp32, i64 %tmp33
1018   %tmp35 = load i8, ptr %tmp34, align 1
1019   %tmp36 = zext i8 %tmp35 to i32
1020   %tmp37 = getelementptr inbounds i8, ptr %tmp31, i64 12
1021   %tmp38 = load i8, ptr %tmp37, align 1
1022   %tmp39 = zext i8 %tmp38 to i32
1023   %tmp40 = shl nuw nsw i32 %tmp39, 8
1024   %tmp41 = or i32 %tmp40, %tmp36
1025   %tmp42 = getelementptr inbounds i8, ptr %tmp30, i64 12
1026   %tmp43 = load i8, ptr %tmp42, align 1
1027   %tmp44 = zext i8 %tmp43 to i32
1028   %tmp45 = shl nuw nsw i32 %tmp44, 16
1029   %tmp46 = or i32 %tmp41, %tmp45
1030   %tmp47 = getelementptr inbounds i8, ptr %tmp24, i64 12
1031   %tmp48 = load i8, ptr %tmp47, align 1
1032   %tmp49 = zext i8 %tmp48 to i32
1033   %tmp50 = shl nuw i32 %tmp49, 24
1034   %tmp51 = or i32 %tmp46, %tmp50
1035   ret i32 %tmp51
1036 }
1037
1038 ; The same as load_i32_by_i8_zaext_loads but the last load is combined to
1039 ; a sext load.
1040 ;
1041 ; ptr arg; i32 i;
1042 ;
1043 ; p0 = arg;
1044 ; p1 = arg + i + 1;
1045 ; p2 = arg + i + 2;
1046 ; p3 = arg + i + 3;
1047 ;
1048 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1049 define i32 @load_i32_by_i8_zsext_loads(ptr %arg, i32 %arg1) {
1050 ; CHECK-LABEL: load_i32_by_i8_zsext_loads:
1051 ; CHECK:       # %bb.0:
1052 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1053 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1054 ; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
1055 ; CHECK-NEXT:    retl
1056 ;
1057 ; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
1058 ; CHECK64:       # %bb.0:
1059 ; CHECK64-NEXT:    movl %esi, %eax
1060 ; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
1061 ; CHECK64-NEXT:    retq
1062   %tmp = add nuw nsw i32 %arg1, 3
1063   %tmp2 = add nuw nsw i32 %arg1, 2
1064   %tmp3 = add nuw nsw i32 %arg1, 1
1065   %tmp4 = zext i32 %tmp to i64
1066   %tmp5 = zext i32 %tmp2 to i64
1067   %tmp6 = zext i32 %tmp3 to i64
1068   %tmp24 = getelementptr inbounds i8, ptr %arg, i64 %tmp4
1069   %tmp30 = getelementptr inbounds i8, ptr %arg, i64 %tmp5
1070   %tmp31 = getelementptr inbounds i8, ptr %arg, i64 %tmp6
1071   %tmp32 = getelementptr inbounds i8, ptr %arg, i64 12
1072   %tmp33 = zext i32 %arg1 to i64
1073   %tmp34 = getelementptr inbounds i8, ptr %tmp32, i64 %tmp33
1074   %tmp35 = load i8, ptr %tmp34, align 1
1075   %tmp36 = zext i8 %tmp35 to i32
1076   %tmp37 = getelementptr inbounds i8, ptr %tmp31, i64 12
1077   %tmp38 = load i8, ptr %tmp37, align 1
1078   %tmp39 = zext i8 %tmp38 to i32
1079   %tmp40 = shl nuw nsw i32 %tmp39, 8
1080   %tmp41 = or i32 %tmp40, %tmp36
1081   %tmp42 = getelementptr inbounds i8, ptr %tmp30, i64 12
1082   %tmp43 = load i8, ptr %tmp42, align 1
1083   %tmp44 = zext i8 %tmp43 to i32
1084   %tmp45 = shl nuw nsw i32 %tmp44, 16
1085   %tmp46 = or i32 %tmp41, %tmp45
1086   %tmp47 = getelementptr inbounds i8, ptr %tmp24, i64 12
1087   %tmp48 = load i8, ptr %tmp47, align 1
1088   %tmp49 = sext i8 %tmp48 to i16
1089   %tmp50 = zext i16 %tmp49 to i32
1090   %tmp51 = shl nuw i32 %tmp50, 24
1091   %tmp52 = or i32 %tmp46, %tmp51
1092   ret i32 %tmp52
1093 }
1094
1095 ; ptr p;
1096 ; (i32) p[0] | ((i32) p[1] << 8)
1097 define i32 @zext_load_i32_by_i8(ptr %arg) {
1098 ; CHECK-LABEL: zext_load_i32_by_i8:
1099 ; CHECK:       # %bb.0:
1100 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1101 ; CHECK-NEXT:    movzwl (%eax), %eax
1102 ; CHECK-NEXT:    retl
1103 ;
1104 ; CHECK64-LABEL: zext_load_i32_by_i8:
1105 ; CHECK64:       # %bb.0:
1106 ; CHECK64-NEXT:    movzwl (%rdi), %eax
1107 ; CHECK64-NEXT:    retq
1108   %tmp2 = load i8, ptr %arg, align 1
1109   %tmp3 = zext i8 %tmp2 to i32
1110   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
1111   %tmp5 = load i8, ptr %tmp4, align 1
1112   %tmp6 = zext i8 %tmp5 to i32
1113   %tmp7 = shl nuw nsw i32 %tmp6, 8
1114   %tmp8 = or i32 %tmp7, %tmp3
1115   ret i32 %tmp8
1116 }
1117
1118 ; ptr p;
1119 ; ((i32) p[0] << 8) | ((i32) p[1] << 16)
1120 define i32 @zext_load_i32_by_i8_shl_8(ptr %arg) {
1121 ; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
1122 ; CHECK:       # %bb.0:
1123 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1124 ; CHECK-NEXT:    movzbl (%eax), %ecx
1125 ; CHECK-NEXT:    shll $8, %ecx
1126 ; CHECK-NEXT:    movzbl 1(%eax), %eax
1127 ; CHECK-NEXT:    shll $16, %eax
1128 ; CHECK-NEXT:    orl %ecx, %eax
1129 ; CHECK-NEXT:    retl
1130 ;
1131 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_8:
1132 ; CHECK64:       # %bb.0:
1133 ; CHECK64-NEXT:    movzbl (%rdi), %ecx
1134 ; CHECK64-NEXT:    shll $8, %ecx
1135 ; CHECK64-NEXT:    movzbl 1(%rdi), %eax
1136 ; CHECK64-NEXT:    shll $16, %eax
1137 ; CHECK64-NEXT:    orl %ecx, %eax
1138 ; CHECK64-NEXT:    retq
1139   %tmp2 = load i8, ptr %arg, align 1
1140   %tmp3 = zext i8 %tmp2 to i32
1141   %tmp30 = shl nuw nsw i32 %tmp3, 8
1142   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
1143   %tmp5 = load i8, ptr %tmp4, align 1
1144   %tmp6 = zext i8 %tmp5 to i32
1145   %tmp7 = shl nuw nsw i32 %tmp6, 16
1146   %tmp8 = or i32 %tmp7, %tmp30
1147   ret i32 %tmp8
1148 }
1149
1150 ; ptr p;
1151 ; ((i32) p[0] << 16) | ((i32) p[1] << 24)
1152 define i32 @zext_load_i32_by_i8_shl_16(ptr %arg) {
1153 ; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
1154 ; CHECK:       # %bb.0:
1155 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1156 ; CHECK-NEXT:    movzbl (%eax), %ecx
1157 ; CHECK-NEXT:    shll $16, %ecx
1158 ; CHECK-NEXT:    movzbl 1(%eax), %eax
1159 ; CHECK-NEXT:    shll $24, %eax
1160 ; CHECK-NEXT:    orl %ecx, %eax
1161 ; CHECK-NEXT:    retl
1162 ;
1163 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_16:
1164 ; CHECK64:       # %bb.0:
1165 ; CHECK64-NEXT:    movzbl (%rdi), %ecx
1166 ; CHECK64-NEXT:    shll $16, %ecx
1167 ; CHECK64-NEXT:    movzbl 1(%rdi), %eax
1168 ; CHECK64-NEXT:    shll $24, %eax
1169 ; CHECK64-NEXT:    orl %ecx, %eax
1170 ; CHECK64-NEXT:    retq
1171   %tmp2 = load i8, ptr %arg, align 1
1172   %tmp3 = zext i8 %tmp2 to i32
1173   %tmp30 = shl nuw nsw i32 %tmp3, 16
1174   %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
1175   %tmp5 = load i8, ptr %tmp4, align 1
1176   %tmp6 = zext i8 %tmp5 to i32
1177   %tmp7 = shl nuw nsw i32 %tmp6, 24
1178   %tmp8 = or i32 %tmp7, %tmp30
1179   ret i32 %tmp8
1180 }
1181
1182 ; ptr p;
1183 ; (i32) p[1] | ((i32) p[0] << 8)
1184 define i32 @zext_load_i32_by_i8_bswap(ptr %arg) {
1185 ; BSWAP-LABEL: zext_load_i32_by_i8_bswap:
1186 ; BSWAP:       # %bb.0:
1187 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
1188 ; BSWAP-NEXT:    movzwl (%eax), %eax
1189 ; BSWAP-NEXT:    rolw $8, %ax
1190 ; BSWAP-NEXT:    movzwl %ax, %eax
1191 ; BSWAP-NEXT:    retl
1192 ;
1193 ; MOVBE-LABEL: zext_load_i32_by_i8_bswap:
1194 ; MOVBE:       # %bb.0:
1195 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1196 ; MOVBE-NEXT:    movbew (%eax), %ax
1197 ; MOVBE-NEXT:    movzwl %ax, %eax
1198 ; MOVBE-NEXT:    retl
1199 ;
1200 ; BSWAP64-LABEL: zext_load_i32_by_i8_bswap:
1201 ; BSWAP64:       # %bb.0:
1202 ; BSWAP64-NEXT:    movzwl (%rdi), %eax
1203 ; BSWAP64-NEXT:    rolw $8, %ax
1204 ; BSWAP64-NEXT:    movzwl %ax, %eax
1205 ; BSWAP64-NEXT:    retq
1206 ;
1207 ; MOVBE64-LABEL: zext_load_i32_by_i8_bswap:
1208 ; MOVBE64:       # %bb.0:
1209 ; MOVBE64-NEXT:    movbew (%rdi), %ax
1210 ; MOVBE64-NEXT:    movzwl %ax, %eax
1211 ; MOVBE64-NEXT:    retq
1212   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
1213   %tmp2 = load i8, ptr %tmp1, align 1
1214   %tmp3 = zext i8 %tmp2 to i32
1215   %tmp5 = load i8, ptr %arg, align 1
1216   %tmp6 = zext i8 %tmp5 to i32
1217   %tmp7 = shl nuw nsw i32 %tmp6, 8
1218   %tmp8 = or i32 %tmp7, %tmp3
1219   ret i32 %tmp8
1220 }
1221
1222 ; ptr p;
1223 ; ((i32) p[1] << 8) | ((i32) p[0] << 16)
1224 define i32 @zext_load_i32_by_i8_bswap_shl_8(ptr %arg) {
1225 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1226 ; CHECK:       # %bb.0:
1227 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1228 ; CHECK-NEXT:    movzbl 1(%eax), %ecx
1229 ; CHECK-NEXT:    shll $8, %ecx
1230 ; CHECK-NEXT:    movzbl (%eax), %eax
1231 ; CHECK-NEXT:    shll $16, %eax
1232 ; CHECK-NEXT:    orl %ecx, %eax
1233 ; CHECK-NEXT:    retl
1234 ;
1235 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1236 ; CHECK64:       # %bb.0:
1237 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
1238 ; CHECK64-NEXT:    shll $8, %ecx
1239 ; CHECK64-NEXT:    movzbl (%rdi), %eax
1240 ; CHECK64-NEXT:    shll $16, %eax
1241 ; CHECK64-NEXT:    orl %ecx, %eax
1242 ; CHECK64-NEXT:    retq
1243   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
1244   %tmp2 = load i8, ptr %tmp1, align 1
1245   %tmp3 = zext i8 %tmp2 to i32
1246   %tmp30 = shl nuw nsw i32 %tmp3, 8
1247   %tmp5 = load i8, ptr %arg, align 1
1248   %tmp6 = zext i8 %tmp5 to i32
1249   %tmp7 = shl nuw nsw i32 %tmp6, 16
1250   %tmp8 = or i32 %tmp7, %tmp30
1251   ret i32 %tmp8
1252 }
1253
1254 ; ptr p;
1255 ; ((i32) p[1] << 16) | ((i32) p[0] << 24)
1256 define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
1257 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1258 ; CHECK:       # %bb.0:
1259 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1260 ; CHECK-NEXT:    movzbl 1(%eax), %ecx
1261 ; CHECK-NEXT:    shll $16, %ecx
1262 ; CHECK-NEXT:    movzbl (%eax), %eax
1263 ; CHECK-NEXT:    shll $24, %eax
1264 ; CHECK-NEXT:    orl %ecx, %eax
1265 ; CHECK-NEXT:    retl
1266 ;
1267 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1268 ; CHECK64:       # %bb.0:
1269 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
1270 ; CHECK64-NEXT:    shll $16, %ecx
1271 ; CHECK64-NEXT:    movzbl (%rdi), %eax
1272 ; CHECK64-NEXT:    shll $24, %eax
1273 ; CHECK64-NEXT:    orl %ecx, %eax
1274 ; CHECK64-NEXT:    retq
1275   %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
1276   %tmp2 = load i8, ptr %tmp1, align 1
1277   %tmp3 = zext i8 %tmp2 to i32
1278   %tmp30 = shl nuw nsw i32 %tmp3, 16
1279   %tmp5 = load i8, ptr %arg, align 1
1280   %tmp6 = zext i8 %tmp5 to i32
1281   %tmp7 = shl nuw nsw i32 %tmp6, 24
1282   %tmp8 = or i32 %tmp7, %tmp30
1283   ret i32 %tmp8
1284 }
1285
1286 define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind {
1287 ; CHECK-LABEL: pr80911_vector_load_multiuse:
1288 ; CHECK:       # %bb.0:
1289 ; CHECK-NEXT:    pushl %esi
1290 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1291 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
1292 ; CHECK-NEXT:    movl (%edx), %esi
1293 ; CHECK-NEXT:    movzwl (%edx), %eax
1294 ; CHECK-NEXT:    movl $0, (%ecx)
1295 ; CHECK-NEXT:    movl %esi, (%edx)
1296 ; CHECK-NEXT:    popl %esi
1297 ; CHECK-NEXT:    retl
1298 ;
1299 ; CHECK64-LABEL: pr80911_vector_load_multiuse:
1300 ; CHECK64:       # %bb.0:
1301 ; CHECK64-NEXT:    movl (%rdi), %ecx
1302 ; CHECK64-NEXT:    movzwl (%rdi), %eax
1303 ; CHECK64-NEXT:    movl $0, (%rsi)
1304 ; CHECK64-NEXT:    movl %ecx, (%rdi)
1305 ; CHECK64-NEXT:    retq
1306   %load = load <4 x i8>, ptr %ptr, align 16
1307   store i32 0, ptr %clobber
1308   store <4 x i8> %load, ptr %ptr, align 16
1309   %e1 = extractelement <4 x i8> %load, i64 1
1310   %e1.ext = zext i8 %e1 to i32
1311   %e1.ext.shift = shl nuw nsw i32 %e1.ext, 8
1312   %e0 = extractelement <4 x i8> %load, i64 0
1313   %e0.ext = zext i8 %e0 to i32
1314   %res = or i32 %e1.ext.shift, %e0.ext
1315   ret i32 %res
1316 }