llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE2
   3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE4
   4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
   5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
   6 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -passes=slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
   7
   8 @arr = local_unnamed_addr global [32 x i32] zeroinitializer, align 16
   9 @arr1 = local_unnamed_addr global [32 x float] zeroinitializer, align 16
  10 @arrp = local_unnamed_addr global [32 x ptr] zeroinitializer, align 16
  11 @var = global i32 zeroinitializer, align 8
  12
  13 declare i32 @llvm.smax.i32(i32, i32)
  14 declare i16 @llvm.smin.i16(i16, i16)
  15 declare i64 @llvm.umax.i64(i64, i64)
  16 declare i8 @llvm.umin.i8(i8, i8)
  17 declare i32 @llvm.smin.i32(i32, i32)
  18 declare i32 @llvm.umin.i32(i32, i32)
  19
  20 define i32 @maxi8(i32) {
  21 ; CHECK-LABEL: @maxi8(
  22 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @arr, align 16
  23 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
  24 ; CHECK-NEXT:    ret i32 [[TMP3]]
  25 ;
  26   %2 = load i32, ptr @arr, align 16
  27   %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
  28   %4 = icmp sgt i32 %2, %3
  29   %5 = select i1 %4, i32 %2, i32 %3
  30   %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
  31   %7 = icmp sgt i32 %5, %6
  32   %8 = select i1 %7, i32 %5, i32 %6
  33   %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
  34   %10 = icmp sgt i32 %8, %9
  35   %11 = select i1 %10, i32 %8, i32 %9
  36   %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
  37   %13 = icmp sgt i32 %11, %12
  38   %14 = select i1 %13, i32 %11, i32 %12
  39   %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
  40   %16 = icmp sgt i32 %14, %15
  41   %17 = select i1 %16, i32 %14, i32 %15
  42   %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
  43   %19 = icmp sgt i32 %17, %18
  44   %20 = select i1 %19, i32 %17, i32 %18
  45   %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
  46   %22 = icmp sgt i32 %20, %21
  47   %23 = select i1 %22, i32 %20, i32 %21
  48   ret i32 %23
  49 }
  50
  51 define i32 @maxi8_store_in(i32) {
  52 ; CHECK-LABEL: @maxi8_store_in(
  53 ; CHECK-NEXT:    store i32 0, ptr @var, align 8
  54 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @arr, align 16
  55 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
  56 ; CHECK-NEXT:    ret i32 [[TMP3]]
  57 ;
  58   %2 = load i32, ptr @arr, align 16
  59   %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
  60   %4 = icmp sgt i32 %2, %3
  61   %5 = select i1 %4, i32 %2, i32 %3
  62   %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
  63   %7 = icmp sgt i32 %5, %6
  64   %8 = select i1 %7, i32 %5, i32 %6
  65   %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
  66   %10 = icmp sgt i32 %8, %9
  67   %11 = select i1 %10, i32 %8, i32 %9
  68   %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
  69   %13 = icmp sgt i32 %11, %12
  70   %14 = select i1 %13, i32 %11, i32 %12
  71   store i32 0, ptr @var, align 8
  72   %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
  73   %16 = icmp sgt i32 %14, %15
  74   %17 = select i1 %16, i32 %14, i32 %15
  75   %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
  76   %19 = icmp sgt i32 %17, %18
  77   %20 = select i1 %19, i32 %17, i32 %18
  78   %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
  79   %22 = icmp sgt i32 %20, %21
  80   %23 = select i1 %22, i32 %20, i32 %21
  81   ret i32 %23
  82 }
  83
  84 define i32 @maxi16(i32) {
  85 ; CHECK-LABEL: @maxi16(
  86 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @arr, align 16
  87 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> [[TMP2]])
  88 ; CHECK-NEXT:    ret i32 [[TMP3]]
  89 ;
  90   %2 = load i32, ptr @arr, align 16
  91   %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
  92   %4 = icmp sgt i32 %2, %3
  93   %5 = select i1 %4, i32 %2, i32 %3
  94   %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
  95   %7 = icmp sgt i32 %5, %6
  96   %8 = select i1 %7, i32 %5, i32 %6
  97   %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
  98   %10 = icmp sgt i32 %8, %9
  99   %11 = select i1 %10, i32 %8, i32 %9
 100   %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 101   %13 = icmp sgt i32 %11, %12
 102   %14 = select i1 %13, i32 %11, i32 %12
 103   %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 104   %16 = icmp sgt i32 %14, %15
 105   %17 = select i1 %16, i32 %14, i32 %15
 106   %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 107   %19 = icmp sgt i32 %17, %18
 108   %20 = select i1 %19, i32 %17, i32 %18
 109   %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 110   %22 = icmp sgt i32 %20, %21
 111   %23 = select i1 %22, i32 %20, i32 %21
 112   %24 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 8), align 16
 113   %25 = icmp sgt i32 %23, %24
 114   %26 = select i1 %25, i32 %23, i32 %24
 115   %27 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 9), align 4
 116   %28 = icmp sgt i32 %26, %27
 117   %29 = select i1 %28, i32 %26, i32 %27
 118   %30 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 10), align 8
 119   %31 = icmp sgt i32 %29, %30
 120   %32 = select i1 %31, i32 %29, i32 %30
 121   %33 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 11), align 4
 122   %34 = icmp sgt i32 %32, %33
 123   %35 = select i1 %34, i32 %32, i32 %33
 124   %36 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 12), align 16
 125   %37 = icmp sgt i32 %35, %36
 126   %38 = select i1 %37, i32 %35, i32 %36
 127   %39 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 13), align 4
 128   %40 = icmp sgt i32 %38, %39
 129   %41 = select i1 %40, i32 %38, i32 %39
 130   %42 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 14), align 8
 131   %43 = icmp sgt i32 %41, %42
 132   %44 = select i1 %43, i32 %41, i32 %42
 133   %45 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 15), align 4
 134   %46 = icmp sgt i32 %44, %45
 135   %47 = select i1 %46, i32 %44, i32 %45
 136   ret i32 %47
 137 }
 138
 139 define i32 @maxi32(i32) {
 140 ; CHECK-LABEL: @maxi32(
 141 ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i32>, ptr @arr, align 16
 142 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> [[TMP2]])
 143 ; CHECK-NEXT:    ret i32 [[TMP3]]
 144 ;
 145   %2 = load i32, ptr @arr, align 16
 146   %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 147   %4 = icmp sgt i32 %2, %3
 148   %5 = select i1 %4, i32 %2, i32 %3
 149   %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 150   %7 = icmp sgt i32 %5, %6
 151   %8 = select i1 %7, i32 %5, i32 %6
 152   %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 153   %10 = icmp sgt i32 %8, %9
 154   %11 = select i1 %10, i32 %8, i32 %9
 155   %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 156   %13 = icmp sgt i32 %11, %12
 157   %14 = select i1 %13, i32 %11, i32 %12
 158   %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 159   %16 = icmp sgt i32 %14, %15
 160   %17 = select i1 %16, i32 %14, i32 %15
 161   %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 162   %19 = icmp sgt i32 %17, %18
 163   %20 = select i1 %19, i32 %17, i32 %18
 164   %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 165   %22 = icmp sgt i32 %20, %21
 166   %23 = select i1 %22, i32 %20, i32 %21
 167   %24 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 8), align 16
 168   %25 = icmp sgt i32 %23, %24
 169   %26 = select i1 %25, i32 %23, i32 %24
 170   %27 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 9), align 4
 171   %28 = icmp sgt i32 %26, %27
 172   %29 = select i1 %28, i32 %26, i32 %27
 173   %30 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 10), align 8
 174   %31 = icmp sgt i32 %29, %30
 175   %32 = select i1 %31, i32 %29, i32 %30
 176   %33 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 11), align 4
 177   %34 = icmp sgt i32 %32, %33
 178   %35 = select i1 %34, i32 %32, i32 %33
 179   %36 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 12), align 16
 180   %37 = icmp sgt i32 %35, %36
 181   %38 = select i1 %37, i32 %35, i32 %36
 182   %39 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 13), align 4
 183   %40 = icmp sgt i32 %38, %39
 184   %41 = select i1 %40, i32 %38, i32 %39
 185   %42 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 14), align 8
 186   %43 = icmp sgt i32 %41, %42
 187   %44 = select i1 %43, i32 %41, i32 %42
 188   %45 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 15), align 4
 189   %46 = icmp sgt i32 %44, %45
 190   %47 = select i1 %46, i32 %44, i32 %45
 191   %48 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 16), align 16
 192   %49 = icmp sgt i32 %47, %48
 193   %50 = select i1 %49, i32 %47, i32 %48
 194   %51 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 17), align 4
 195   %52 = icmp sgt i32 %50, %51
 196   %53 = select i1 %52, i32 %50, i32 %51
 197   %54 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 18), align 8
 198   %55 = icmp sgt i32 %53, %54
 199   %56 = select i1 %55, i32 %53, i32 %54
 200   %57 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 19), align 4
 201   %58 = icmp sgt i32 %56, %57
 202   %59 = select i1 %58, i32 %56, i32 %57
 203   %60 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 20), align 16
 204   %61 = icmp sgt i32 %59, %60
 205   %62 = select i1 %61, i32 %59, i32 %60
 206   %63 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 21), align 4
 207   %64 = icmp sgt i32 %62, %63
 208   %65 = select i1 %64, i32 %62, i32 %63
 209   %66 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 22), align 8
 210   %67 = icmp sgt i32 %65, %66
 211   %68 = select i1 %67, i32 %65, i32 %66
 212   %69 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 23), align 4
 213   %70 = icmp sgt i32 %68, %69
 214   %71 = select i1 %70, i32 %68, i32 %69
 215   %72 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 24), align 16
 216   %73 = icmp sgt i32 %71, %72
 217   %74 = select i1 %73, i32 %71, i32 %72
 218   %75 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 25), align 4
 219   %76 = icmp sgt i32 %74, %75
 220   %77 = select i1 %76, i32 %74, i32 %75
 221   %78 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 26), align 8
 222   %79 = icmp sgt i32 %77, %78
 223   %80 = select i1 %79, i32 %77, i32 %78
 224   %81 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 27), align 4
 225   %82 = icmp sgt i32 %80, %81
 226   %83 = select i1 %82, i32 %80, i32 %81
 227   %84 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 28), align 16
 228   %85 = icmp sgt i32 %83, %84
 229   %86 = select i1 %85, i32 %83, i32 %84
 230   %87 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 29), align 4
 231   %88 = icmp sgt i32 %86, %87
 232   %89 = select i1 %88, i32 %86, i32 %87
 233   %90 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 30), align 8
 234   %91 = icmp sgt i32 %89, %90
 235   %92 = select i1 %91, i32 %89, i32 %90
 236   %93 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 31), align 4
 237   %94 = icmp sgt i32 %92, %93
 238   %95 = select i1 %94, i32 %92, i32 %93
 239   ret i32 %95
 240 }
 241
 242 ; Note: legacy test - InstCombine creates maxnum intrinsics for fcmp+select with fastmath on the select.
 243
 244 define float @maxf8(float) {
 245 ; DEFAULT-LABEL: @maxf8(
 246 ; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, ptr @arr1, align 16
 247 ; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
 248 ; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
 249 ; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
 250 ; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 251 ; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
 252 ; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
 253 ; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 254 ; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
 255 ; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
 256 ; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 257 ; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
 258 ; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
 259 ; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 260 ; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
 261 ; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
 262 ; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 263 ; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
 264 ; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
 265 ; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 266 ; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
 267 ; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
 268 ; DEFAULT-NEXT:    ret float [[TMP23]]
 269 ;
 270 ; THRESH-LABEL: @maxf8(
 271 ; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr @arr1, align 16
 272 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 273 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 274 ; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
 275 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
 276 ; THRESH-NEXT:    [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 277 ; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
 278 ; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
 279 ; THRESH-NEXT:    [[TMP10:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 280 ; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
 281 ; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
 282 ; THRESH-NEXT:    [[TMP13:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 283 ; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
 284 ; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
 285 ; THRESH-NEXT:    [[TMP16:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 286 ; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
 287 ; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
 288 ; THRESH-NEXT:    [[TMP19:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 289 ; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
 290 ; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
 291 ; THRESH-NEXT:    [[TMP22:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 292 ; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
 293 ; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
 294 ; THRESH-NEXT:    ret float [[TMP24]]
 295 ;
 296   %2 = load float, ptr @arr1, align 16
 297   %3 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
 298   %4 = fcmp fast ogt float %2, %3
 299   %5 = select i1 %4, float %2, float %3
 300   %6 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 301   %7 = fcmp fast ogt float %5, %6
 302   %8 = select i1 %7, float %5, float %6
 303   %9 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 304   %10 = fcmp fast ogt float %8, %9
 305   %11 = select i1 %10, float %8, float %9
 306   %12 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 307   %13 = fcmp fast ogt float %11, %12
 308   %14 = select i1 %13, float %11, float %12
 309   %15 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 310   %16 = fcmp fast ogt float %14, %15
 311   %17 = select i1 %16, float %14, float %15
 312   %18 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 313   %19 = fcmp fast ogt float %17, %18
 314   %20 = select i1 %19, float %17, float %18
 315   %21 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 316   %22 = fcmp fast ogt float %20, %21
 317   %23 = select i1 %22, float %20, float %21
 318   ret float %23
 319 }
 320
 321 ; Note: legacy test - maxnum intrinsics match what InstCombine creates for fcmp+select with fastmath on the select.
 322
 323 define float @maxf16(float) {
 324 ; DEFAULT-LABEL: @maxf16(
 325 ; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, ptr @arr1, align 16
 326 ; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
 327 ; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
 328 ; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
 329 ; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 330 ; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
 331 ; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
 332 ; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 333 ; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
 334 ; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
 335 ; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 336 ; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
 337 ; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
 338 ; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 339 ; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
 340 ; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
 341 ; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 342 ; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
 343 ; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
 344 ; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 345 ; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
 346 ; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
 347 ; DEFAULT-NEXT:    [[TMP24:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
 348 ; DEFAULT-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]]
 349 ; DEFAULT-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]]
 350 ; DEFAULT-NEXT:    [[TMP27:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
 351 ; DEFAULT-NEXT:    [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]]
 352 ; DEFAULT-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]]
 353 ; DEFAULT-NEXT:    [[TMP30:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
 354 ; DEFAULT-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]]
 355 ; DEFAULT-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]]
 356 ; DEFAULT-NEXT:    [[TMP33:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
 357 ; DEFAULT-NEXT:    [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]]
 358 ; DEFAULT-NEXT:    [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]]
 359 ; DEFAULT-NEXT:    [[TMP36:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
 360 ; DEFAULT-NEXT:    [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]]
 361 ; DEFAULT-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]]
 362 ; DEFAULT-NEXT:    [[TMP39:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
 363 ; DEFAULT-NEXT:    [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]]
 364 ; DEFAULT-NEXT:    [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]]
 365 ; DEFAULT-NEXT:    [[TMP42:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
 366 ; DEFAULT-NEXT:    [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]]
 367 ; DEFAULT-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]]
 368 ; DEFAULT-NEXT:    [[TMP45:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
 369 ; DEFAULT-NEXT:    [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]]
 370 ; DEFAULT-NEXT:    [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]]
 371 ; DEFAULT-NEXT:    ret float [[TMP47]]
 372 ;
 373 ; THRESH-LABEL: @maxf16(
 374 ; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr @arr1, align 16
 375 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 376 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 377 ; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
 378 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
 379 ; THRESH-NEXT:    [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 380 ; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
 381 ; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
 382 ; THRESH-NEXT:    [[TMP10:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 383 ; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
 384 ; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
 385 ; THRESH-NEXT:    [[TMP13:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 386 ; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
 387 ; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
 388 ; THRESH-NEXT:    [[TMP16:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 389 ; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
 390 ; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
 391 ; THRESH-NEXT:    [[TMP19:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 392 ; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
 393 ; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
 394 ; THRESH-NEXT:    [[TMP22:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 395 ; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
 396 ; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
 397 ; THRESH-NEXT:    [[TMP25:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
 398 ; THRESH-NEXT:    [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]]
 399 ; THRESH-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]]
 400 ; THRESH-NEXT:    [[TMP28:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
 401 ; THRESH-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]]
 402 ; THRESH-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]]
 403 ; THRESH-NEXT:    [[TMP31:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
 404 ; THRESH-NEXT:    [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]]
 405 ; THRESH-NEXT:    [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]]
 406 ; THRESH-NEXT:    [[TMP34:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
 407 ; THRESH-NEXT:    [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]]
 408 ; THRESH-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]]
 409 ; THRESH-NEXT:    [[TMP37:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
 410 ; THRESH-NEXT:    [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]]
 411 ; THRESH-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]]
 412 ; THRESH-NEXT:    [[TMP40:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
 413 ; THRESH-NEXT:    [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]]
 414 ; THRESH-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]]
 415 ; THRESH-NEXT:    [[TMP43:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
 416 ; THRESH-NEXT:    [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]]
 417 ; THRESH-NEXT:    [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]]
 418 ; THRESH-NEXT:    [[TMP46:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
 419 ; THRESH-NEXT:    [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]]
 420 ; THRESH-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]]
 421 ; THRESH-NEXT:    ret float [[TMP48]]
 422 ;
 423   %2 = load float, ptr @arr1, align 16
 424   %3 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
 425   %4 = fcmp fast ogt float %2, %3
 426   %5 = select i1 %4, float %2, float %3
 427   %6 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 428   %7 = fcmp fast ogt float %5, %6
 429   %8 = select i1 %7, float %5, float %6
 430   %9 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 431   %10 = fcmp fast ogt float %8, %9
 432   %11 = select i1 %10, float %8, float %9
 433   %12 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 434   %13 = fcmp fast ogt float %11, %12
 435   %14 = select i1 %13, float %11, float %12
 436   %15 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 437   %16 = fcmp fast ogt float %14, %15
 438   %17 = select i1 %16, float %14, float %15
 439   %18 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 440   %19 = fcmp fast ogt float %17, %18
 441   %20 = select i1 %19, float %17, float %18
 442   %21 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 443   %22 = fcmp fast ogt float %20, %21
 444   %23 = select i1 %22, float %20, float %21
 445   %24 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
 446   %25 = fcmp fast ogt float %23, %24
 447   %26 = select i1 %25, float %23, float %24
 448   %27 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
 449   %28 = fcmp fast ogt float %26, %27
 450   %29 = select i1 %28, float %26, float %27
 451   %30 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
 452   %31 = fcmp fast ogt float %29, %30
 453   %32 = select i1 %31, float %29, float %30
 454   %33 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
 455   %34 = fcmp fast ogt float %32, %33
 456   %35 = select i1 %34, float %32, float %33
 457   %36 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
 458   %37 = fcmp fast ogt float %35, %36
 459   %38 = select i1 %37, float %35, float %36
 460   %39 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
 461   %40 = fcmp fast ogt float %38, %39
 462   %41 = select i1 %40, float %38, float %39
 463   %42 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
 464   %43 = fcmp fast ogt float %41, %42
 465   %44 = select i1 %43, float %41, float %42
 466   %45 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
 467   %46 = fcmp fast ogt float %44, %45
 468   %47 = select i1 %46, float %44, float %45
 469   ret float %47
 470 }
 471
 472 ; Note: legacy test - InstCombine creates maxnum intrinsics for fcmp+select with fastmath on the select.
 473
 474 define float @maxf32(float) {
 475 ; DEFAULT-LABEL: @maxf32(
 476 ; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, ptr @arr1, align 16
 477 ; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
 478 ; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
 479 ; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
 480 ; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 481 ; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
 482 ; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
 483 ; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 484 ; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
 485 ; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
 486 ; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 487 ; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
 488 ; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
 489 ; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 490 ; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
 491 ; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
 492 ; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 493 ; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
 494 ; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
 495 ; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 496 ; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
 497 ; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
 498 ; DEFAULT-NEXT:    [[TMP24:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
 499 ; DEFAULT-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]]
 500 ; DEFAULT-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]]
 501 ; DEFAULT-NEXT:    [[TMP27:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
 502 ; DEFAULT-NEXT:    [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]]
 503 ; DEFAULT-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]]
 504 ; DEFAULT-NEXT:    [[TMP30:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
 505 ; DEFAULT-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]]
 506 ; DEFAULT-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]]
 507 ; DEFAULT-NEXT:    [[TMP33:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
 508 ; DEFAULT-NEXT:    [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]]
 509 ; DEFAULT-NEXT:    [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]]
 510 ; DEFAULT-NEXT:    [[TMP36:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
 511 ; DEFAULT-NEXT:    [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]]
 512 ; DEFAULT-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]]
 513 ; DEFAULT-NEXT:    [[TMP39:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
 514 ; DEFAULT-NEXT:    [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]]
 515 ; DEFAULT-NEXT:    [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]]
 516 ; DEFAULT-NEXT:    [[TMP42:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
 517 ; DEFAULT-NEXT:    [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]]
 518 ; DEFAULT-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]]
 519 ; DEFAULT-NEXT:    [[TMP45:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
 520 ; DEFAULT-NEXT:    [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]]
 521 ; DEFAULT-NEXT:    [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]]
 522 ; DEFAULT-NEXT:    [[TMP48:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 16), align 16
 523 ; DEFAULT-NEXT:    [[TMP49:%.*]] = fcmp fast ogt float [[TMP47]], [[TMP48]]
 524 ; DEFAULT-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP47]], float [[TMP48]]
 525 ; DEFAULT-NEXT:    [[TMP51:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 17), align 4
 526 ; DEFAULT-NEXT:    [[TMP52:%.*]] = fcmp fast ogt float [[TMP50]], [[TMP51]]
 527 ; DEFAULT-NEXT:    [[TMP53:%.*]] = select i1 [[TMP52]], float [[TMP50]], float [[TMP51]]
 528 ; DEFAULT-NEXT:    [[TMP54:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 18), align 8
 529 ; DEFAULT-NEXT:    [[TMP55:%.*]] = fcmp fast ogt float [[TMP53]], [[TMP54]]
 530 ; DEFAULT-NEXT:    [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP53]], float [[TMP54]]
 531 ; DEFAULT-NEXT:    [[TMP57:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 19), align 4
 532 ; DEFAULT-NEXT:    [[TMP58:%.*]] = fcmp fast ogt float [[TMP56]], [[TMP57]]
 533 ; DEFAULT-NEXT:    [[TMP59:%.*]] = select i1 [[TMP58]], float [[TMP56]], float [[TMP57]]
 534 ; DEFAULT-NEXT:    [[TMP60:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 20), align 16
 535 ; DEFAULT-NEXT:    [[TMP61:%.*]] = fcmp fast ogt float [[TMP59]], [[TMP60]]
 536 ; DEFAULT-NEXT:    [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP59]], float [[TMP60]]
 537 ; DEFAULT-NEXT:    [[TMP63:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 21), align 4
 538 ; DEFAULT-NEXT:    [[TMP64:%.*]] = fcmp fast ogt float [[TMP62]], [[TMP63]]
 539 ; DEFAULT-NEXT:    [[TMP65:%.*]] = select i1 [[TMP64]], float [[TMP62]], float [[TMP63]]
 540 ; DEFAULT-NEXT:    [[TMP66:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 22), align 8
 541 ; DEFAULT-NEXT:    [[TMP67:%.*]] = fcmp fast ogt float [[TMP65]], [[TMP66]]
 542 ; DEFAULT-NEXT:    [[TMP68:%.*]] = select i1 [[TMP67]], float [[TMP65]], float [[TMP66]]
 543 ; DEFAULT-NEXT:    [[TMP69:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 23), align 4
 544 ; DEFAULT-NEXT:    [[TMP70:%.*]] = fcmp fast ogt float [[TMP68]], [[TMP69]]
 545 ; DEFAULT-NEXT:    [[TMP71:%.*]] = select i1 [[TMP70]], float [[TMP68]], float [[TMP69]]
 546 ; DEFAULT-NEXT:    [[TMP72:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 24), align 16
 547 ; DEFAULT-NEXT:    [[TMP73:%.*]] = fcmp fast ogt float [[TMP71]], [[TMP72]]
 548 ; DEFAULT-NEXT:    [[TMP74:%.*]] = select i1 [[TMP73]], float [[TMP71]], float [[TMP72]]
 549 ; DEFAULT-NEXT:    [[TMP75:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 25), align 4
 550 ; DEFAULT-NEXT:    [[TMP76:%.*]] = fcmp fast ogt float [[TMP74]], [[TMP75]]
 551 ; DEFAULT-NEXT:    [[TMP77:%.*]] = select i1 [[TMP76]], float [[TMP74]], float [[TMP75]]
 552 ; DEFAULT-NEXT:    [[TMP78:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 26), align 8
 553 ; DEFAULT-NEXT:    [[TMP79:%.*]] = fcmp fast ogt float [[TMP77]], [[TMP78]]
 554 ; DEFAULT-NEXT:    [[TMP80:%.*]] = select i1 [[TMP79]], float [[TMP77]], float [[TMP78]]
 555 ; DEFAULT-NEXT:    [[TMP81:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 27), align 4
 556 ; DEFAULT-NEXT:    [[TMP82:%.*]] = fcmp fast ogt float [[TMP80]], [[TMP81]]
 557 ; DEFAULT-NEXT:    [[TMP83:%.*]] = select i1 [[TMP82]], float [[TMP80]], float [[TMP81]]
 558 ; DEFAULT-NEXT:    [[TMP84:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 28), align 16
 559 ; DEFAULT-NEXT:    [[TMP85:%.*]] = fcmp fast ogt float [[TMP83]], [[TMP84]]
 560 ; DEFAULT-NEXT:    [[TMP86:%.*]] = select i1 [[TMP85]], float [[TMP83]], float [[TMP84]]
 561 ; DEFAULT-NEXT:    [[TMP87:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 29), align 4
 562 ; DEFAULT-NEXT:    [[TMP88:%.*]] = fcmp fast ogt float [[TMP86]], [[TMP87]]
 563 ; DEFAULT-NEXT:    [[TMP89:%.*]] = select i1 [[TMP88]], float [[TMP86]], float [[TMP87]]
 564 ; DEFAULT-NEXT:    [[TMP90:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 30), align 8
 565 ; DEFAULT-NEXT:    [[TMP91:%.*]] = fcmp fast ogt float [[TMP89]], [[TMP90]]
 566 ; DEFAULT-NEXT:    [[TMP92:%.*]] = select i1 [[TMP91]], float [[TMP89]], float [[TMP90]]
 567 ; DEFAULT-NEXT:    [[TMP93:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 31), align 4
 568 ; DEFAULT-NEXT:    [[TMP94:%.*]] = fcmp fast ogt float [[TMP92]], [[TMP93]]
 569 ; DEFAULT-NEXT:    [[TMP95:%.*]] = select i1 [[TMP94]], float [[TMP92]], float [[TMP93]]
 570 ; DEFAULT-NEXT:    ret float [[TMP95]]
 571 ;
 572 ; THRESH-LABEL: @maxf32(
 573 ; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr @arr1, align 16
 574 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 575 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 576 ; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
 577 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
 578 ; THRESH-NEXT:    [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 579 ; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
 580 ; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
 581 ; THRESH-NEXT:    [[TMP10:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 582 ; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
 583 ; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
 584 ; THRESH-NEXT:    [[TMP13:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 585 ; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
 586 ; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
 587 ; THRESH-NEXT:    [[TMP16:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 588 ; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
 589 ; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
 590 ; THRESH-NEXT:    [[TMP19:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 591 ; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
 592 ; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
 593 ; THRESH-NEXT:    [[TMP22:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 594 ; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
 595 ; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
 596 ; THRESH-NEXT:    [[TMP25:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
 597 ; THRESH-NEXT:    [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]]
 598 ; THRESH-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]]
 599 ; THRESH-NEXT:    [[TMP28:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
 600 ; THRESH-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]]
 601 ; THRESH-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]]
 602 ; THRESH-NEXT:    [[TMP31:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
 603 ; THRESH-NEXT:    [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]]
 604 ; THRESH-NEXT:    [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]]
 605 ; THRESH-NEXT:    [[TMP34:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
 606 ; THRESH-NEXT:    [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]]
 607 ; THRESH-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]]
 608 ; THRESH-NEXT:    [[TMP37:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
 609 ; THRESH-NEXT:    [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]]
 610 ; THRESH-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]]
 611 ; THRESH-NEXT:    [[TMP40:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
 612 ; THRESH-NEXT:    [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]]
 613 ; THRESH-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]]
 614 ; THRESH-NEXT:    [[TMP43:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
 615 ; THRESH-NEXT:    [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]]
 616 ; THRESH-NEXT:    [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]]
 617 ; THRESH-NEXT:    [[TMP46:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
 618 ; THRESH-NEXT:    [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]]
 619 ; THRESH-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]]
 620 ; THRESH-NEXT:    [[TMP49:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 16), align 16
 621 ; THRESH-NEXT:    [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]]
 622 ; THRESH-NEXT:    [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float [[TMP49]]
 623 ; THRESH-NEXT:    [[TMP52:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 17), align 4
 624 ; THRESH-NEXT:    [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]]
 625 ; THRESH-NEXT:    [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float [[TMP52]]
 626 ; THRESH-NEXT:    [[TMP55:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 18), align 8
 627 ; THRESH-NEXT:    [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]]
 628 ; THRESH-NEXT:    [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float [[TMP55]]
 629 ; THRESH-NEXT:    [[TMP58:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 19), align 4
 630 ; THRESH-NEXT:    [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]]
 631 ; THRESH-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float [[TMP58]]
 632 ; THRESH-NEXT:    [[TMP61:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 20), align 16
 633 ; THRESH-NEXT:    [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]]
 634 ; THRESH-NEXT:    [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float [[TMP61]]
 635 ; THRESH-NEXT:    [[TMP64:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 21), align 4
 636 ; THRESH-NEXT:    [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]]
 637 ; THRESH-NEXT:    [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float [[TMP64]]
 638 ; THRESH-NEXT:    [[TMP67:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 22), align 8
 639 ; THRESH-NEXT:    [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]]
 640 ; THRESH-NEXT:    [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float [[TMP67]]
 641 ; THRESH-NEXT:    [[TMP70:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 23), align 4
 642 ; THRESH-NEXT:    [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]]
 643 ; THRESH-NEXT:    [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float [[TMP70]]
 644 ; THRESH-NEXT:    [[TMP73:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 24), align 16
 645 ; THRESH-NEXT:    [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]]
 646 ; THRESH-NEXT:    [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float [[TMP73]]
 647 ; THRESH-NEXT:    [[TMP76:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 25), align 4
 648 ; THRESH-NEXT:    [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]]
 649 ; THRESH-NEXT:    [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float [[TMP76]]
 650 ; THRESH-NEXT:    [[TMP79:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 26), align 8
 651 ; THRESH-NEXT:    [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]]
 652 ; THRESH-NEXT:    [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float [[TMP79]]
 653 ; THRESH-NEXT:    [[TMP82:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 27), align 4
 654 ; THRESH-NEXT:    [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]]
 655 ; THRESH-NEXT:    [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float [[TMP82]]
 656 ; THRESH-NEXT:    [[TMP85:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 28), align 16
 657 ; THRESH-NEXT:    [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]]
 658 ; THRESH-NEXT:    [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float [[TMP85]]
 659 ; THRESH-NEXT:    [[TMP88:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 29), align 4
 660 ; THRESH-NEXT:    [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]]
 661 ; THRESH-NEXT:    [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float [[TMP88]]
 662 ; THRESH-NEXT:    [[TMP91:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 30), align 8
 663 ; THRESH-NEXT:    [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]]
 664 ; THRESH-NEXT:    [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float [[TMP91]]
 665 ; THRESH-NEXT:    [[TMP94:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 31), align 4
 666 ; THRESH-NEXT:    [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]]
 667 ; THRESH-NEXT:    [[TMP96:%.*]] = select i1 [[TMP95]], float [[TMP93]], float [[TMP94]]
 668 ; THRESH-NEXT:    ret float [[TMP96]]
 669 ;
 670   %2 = load float, ptr @arr1, align 16
 671   %3 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
 672   %4 = fcmp fast ogt float %2, %3
 673   %5 = select i1 %4, float %2, float %3
 674   %6 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 675   %7 = fcmp fast ogt float %5, %6
 676   %8 = select i1 %7, float %5, float %6
 677   %9 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 678   %10 = fcmp fast ogt float %8, %9
 679   %11 = select i1 %10, float %8, float %9
 680   %12 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 681   %13 = fcmp fast ogt float %11, %12
 682   %14 = select i1 %13, float %11, float %12
 683   %15 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 684   %16 = fcmp fast ogt float %14, %15
 685   %17 = select i1 %16, float %14, float %15
 686   %18 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 687   %19 = fcmp fast ogt float %17, %18
 688   %20 = select i1 %19, float %17, float %18
 689   %21 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 690   %22 = fcmp fast ogt float %20, %21
 691   %23 = select i1 %22, float %20, float %21
 692   %24 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
 693   %25 = fcmp fast ogt float %23, %24
 694   %26 = select i1 %25, float %23, float %24
 695   %27 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
 696   %28 = fcmp fast ogt float %26, %27
 697   %29 = select i1 %28, float %26, float %27
 698   %30 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
 699   %31 = fcmp fast ogt float %29, %30
 700   %32 = select i1 %31, float %29, float %30
 701   %33 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
 702   %34 = fcmp fast ogt float %32, %33
 703   %35 = select i1 %34, float %32, float %33
 704   %36 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
 705   %37 = fcmp fast ogt float %35, %36
 706   %38 = select i1 %37, float %35, float %36
 707   %39 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
 708   %40 = fcmp fast ogt float %38, %39
 709   %41 = select i1 %40, float %38, float %39
 710   %42 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
 711   %43 = fcmp fast ogt float %41, %42
 712   %44 = select i1 %43, float %41, float %42
 713   %45 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
 714   %46 = fcmp fast ogt float %44, %45
 715   %47 = select i1 %46, float %44, float %45
 716   %48 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 16), align 16
 717   %49 = fcmp fast ogt float %47, %48
 718   %50 = select i1 %49, float %47, float %48
 719   %51 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 17), align 4
 720   %52 = fcmp fast ogt float %50, %51
 721   %53 = select i1 %52, float %50, float %51
 722   %54 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 18), align 8
 723   %55 = fcmp fast ogt float %53, %54
 724   %56 = select i1 %55, float %53, float %54
 725   %57 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 19), align 4
 726   %58 = fcmp fast ogt float %56, %57
 727   %59 = select i1 %58, float %56, float %57
 728   %60 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 20), align 16
 729   %61 = fcmp fast ogt float %59, %60
 730   %62 = select i1 %61, float %59, float %60
 731   %63 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 21), align 4
 732   %64 = fcmp fast ogt float %62, %63
 733   %65 = select i1 %64, float %62, float %63
 734   %66 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 22), align 8
 735   %67 = fcmp fast ogt float %65, %66
 736   %68 = select i1 %67, float %65, float %66
 737   %69 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 23), align 4
 738   %70 = fcmp fast ogt float %68, %69
 739   %71 = select i1 %70, float %68, float %69
 740   %72 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 24), align 16
 741   %73 = fcmp fast ogt float %71, %72
 742   %74 = select i1 %73, float %71, float %72
 743   %75 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 25), align 4
 744   %76 = fcmp fast ogt float %74, %75
 745   %77 = select i1 %76, float %74, float %75
 746   %78 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 26), align 8
 747   %79 = fcmp fast ogt float %77, %78
 748   %80 = select i1 %79, float %77, float %78
 749   %81 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 27), align 4
 750   %82 = fcmp fast ogt float %80, %81
 751   %83 = select i1 %82, float %80, float %81
 752   %84 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 28), align 16
 753   %85 = fcmp fast ogt float %83, %84
 754   %86 = select i1 %85, float %83, float %84
 755   %87 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 29), align 4
 756   %88 = fcmp fast ogt float %86, %87
 757   %89 = select i1 %88, float %86, float %87
 758   %90 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 30), align 8
 759   %91 = fcmp fast ogt float %89, %90
 760   %92 = select i1 %91, float %89, float %90
 761   %93 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 31), align 4
 762   %94 = fcmp fast ogt float %92, %93
 763   %95 = select i1 %94, float %92, float %93
 764   ret float %95
 765 }
 766
 767 define i32 @maxi8_mutiple_uses(i32) {
 768 ; SSE2-LABEL: @maxi8_mutiple_uses(
 769 ; SSE2-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
 770 ; SSE2-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 771 ; SSE2-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 772 ; SSE2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
 773 ; SSE2-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 774 ; SSE2-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
 775 ; SSE2-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
 776 ; SSE2-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 777 ; SSE2-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
 778 ; SSE2-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
 779 ; SSE2-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 780 ; SSE2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
 781 ; SSE2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
 782 ; SSE2-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 783 ; SSE2-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
 784 ; SSE2-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
 785 ; SSE2-NEXT:    [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 786 ; SSE2-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
 787 ; SSE2-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
 788 ; SSE2-NEXT:    [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 789 ; SSE2-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
 790 ; SSE2-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
 791 ; SSE2-NEXT:    [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4
 792 ; SSE2-NEXT:    store i32 [[TMP24]], ptr @var, align 8
 793 ; SSE2-NEXT:    ret i32 [[TMP23]]
 794 ;
 795 ; SSE4-LABEL: @maxi8_mutiple_uses(
 796 ; SSE4-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
 797 ; SSE4-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 798 ; SSE4-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 799 ; SSE4-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
 800 ; SSE4-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 801 ; SSE4-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 802 ; SSE4-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 803 ; SSE4-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
 804 ; SSE4-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
 805 ; SSE4-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
 806 ; SSE4-NEXT:    [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
 807 ; SSE4-NEXT:    [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
 808 ; SSE4-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
 809 ; SSE4-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
 810 ; SSE4-NEXT:    [[TMP10:%.*]] = select i1 [[TMP4]], i32 3, i32 4
 811 ; SSE4-NEXT:    store i32 [[TMP10]], ptr @var, align 8
 812 ; SSE4-NEXT:    ret i32 [[OP_RDX5]]
 813 ;
 814 ; AVX-LABEL: @maxi8_mutiple_uses(
 815 ; AVX-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
 816 ; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 817 ; AVX-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 818 ; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
 819 ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 820 ; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 821 ; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 822 ; AVX-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
 823 ; AVX-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
 824 ; AVX-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
 825 ; AVX-NEXT:    [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
 826 ; AVX-NEXT:    [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
 827 ; AVX-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
 828 ; AVX-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
 829 ; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP4]], i32 3, i32 4
 830 ; AVX-NEXT:    store i32 [[TMP10]], ptr @var, align 8
 831 ; AVX-NEXT:    ret i32 [[OP_RDX5]]
 832 ;
 833 ; THRESH-LABEL: @maxi8_mutiple_uses(
 834 ; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
 835 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
 836 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
 837 ; THRESH-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
 838 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]]
 839 ; THRESH-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 840 ; THRESH-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 841 ; THRESH-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 842 ; THRESH-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP7]])
 843 ; THRESH-NEXT:    [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
 844 ; THRESH-NEXT:    [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP9]], i32 1
 845 ; THRESH-NEXT:    [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
 846 ; THRESH-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP6]], i32 1
 847 ; THRESH-NEXT:    [[TMP15:%.*]] = icmp sgt <2 x i32> [[TMP12]], [[TMP14]]
 848 ; THRESH-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP12]], <2 x i32> [[TMP14]]
 849 ; THRESH-NEXT:    [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
 850 ; THRESH-NEXT:    [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
 851 ; THRESH-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
 852 ; THRESH-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP17]], i32 [[TMP18]]
 853 ; THRESH-NEXT:    [[TMP19:%.*]] = select i1 [[TMP5]], i32 3, i32 4
 854 ; THRESH-NEXT:    store i32 [[TMP19]], ptr @var, align 8
 855 ; THRESH-NEXT:    ret i32 [[OP_RDX5]]
 856 ;
 857   %2 = load i32, ptr @arr, align 16
 858   %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 859   %4 = icmp sgt i32 %2, %3
 860   %5 = select i1 %4, i32 %2, i32 %3
 861   %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 862   %7 = icmp sgt i32 %5, %6
 863   %8 = select i1 %7, i32 %5, i32 %6
 864   %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 865   %10 = icmp sgt i32 %8, %9
 866   %11 = select i1 %10, i32 %8, i32 %9
 867   %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 868   %13 = icmp sgt i32 %11, %12
 869   %14 = select i1 %13, i32 %11, i32 %12
 870   %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 871   %16 = icmp sgt i32 %14, %15
 872   %17 = select i1 %16, i32 %14, i32 %15
 873   %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 874   %19 = icmp sgt i32 %17, %18
 875   %20 = select i1 %19, i32 %17, i32 %18
 876   %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 877   %22 = icmp sgt i32 %20, %21
 878   %23 = select i1 %22, i32 %20, i32 %21
 879   %24 = select i1 %4, i32 3, i32 4
 880   store i32 %24, ptr @var, align 8
 881   ret i32 %23
 882 }
 883
 884 define i32 @maxi8_mutiple_uses2(i32) {
 885 ; DEFAULT-LABEL: @maxi8_mutiple_uses2(
 886 ; DEFAULT-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
 887 ; DEFAULT-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 888 ; DEFAULT-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 889 ; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
 890 ; DEFAULT-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 891 ; DEFAULT-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
 892 ; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
 893 ; DEFAULT-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 894 ; DEFAULT-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
 895 ; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
 896 ; DEFAULT-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 897 ; DEFAULT-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
 898 ; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
 899 ; DEFAULT-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 900 ; DEFAULT-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
 901 ; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
 902 ; DEFAULT-NEXT:    [[TMP18:%.*]] = select i1 [[TMP10]], i32 3, i32 4
 903 ; DEFAULT-NEXT:    store i32 [[TMP18]], ptr @var, align 8
 904 ; DEFAULT-NEXT:    ret i32 [[TMP17]]
 905 ;
 906 ; THRESH-LABEL: @maxi8_mutiple_uses2(
 907 ; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
 908 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
 909 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
 910 ; THRESH-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
 911 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]]
 912 ; THRESH-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 913 ; THRESH-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
 914 ; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 [[TMP7]]
 915 ; THRESH-NEXT:    [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 916 ; THRESH-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
 917 ; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
 918 ; THRESH-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 919 ; THRESH-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
 920 ; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]]
 921 ; THRESH-NEXT:    [[TMP16:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 922 ; THRESH-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
 923 ; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]]
 924 ; THRESH-NEXT:    [[TMP19:%.*]] = select i1 [[TMP11]], i32 3, i32 4
 925 ; THRESH-NEXT:    store i32 [[TMP19]], ptr @var, align 8
 926 ; THRESH-NEXT:    ret i32 [[TMP18]]
 927 ;
 928   %2 = load i32, ptr @arr, align 16
 929   %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 930   %4 = icmp sgt i32 %2, %3
 931   %5 = select i1 %4, i32 %2, i32 %3
 932   %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 933   %7 = icmp sgt i32 %5, %6
 934   %8 = select i1 %7, i32 %5, i32 %6
 935   %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 936   %10 = icmp sgt i32 %8, %9
 937   %11 = select i1 %10, i32 %8, i32 %9
 938   %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 939   %13 = icmp sgt i32 %11, %12
 940   %14 = select i1 %13, i32 %11, i32 %12
 941   %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 942   %16 = icmp sgt i32 %14, %15
 943   %17 = select i1 %16, i32 %14, i32 %15
 944   %18 = select i1 %10, i32 3, i32 4
 945   store i32 %18, ptr @var, align 8
 946   ret i32 %17
 947 }
 948
 949 define i32 @maxi8_wrong_parent(i32) {
 950 ; SSE2-LABEL: @maxi8_wrong_parent(
 951 ; SSE2-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
 952 ; SSE2-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 953 ; SSE2-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 954 ; SSE2-NEXT:    br label [[PP:%.*]]
 955 ; SSE2:       pp:
 956 ; SSE2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
 957 ; SSE2-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 958 ; SSE2-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
 959 ; SSE2-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
 960 ; SSE2-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 961 ; SSE2-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
 962 ; SSE2-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
 963 ; SSE2-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 964 ; SSE2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
 965 ; SSE2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
 966 ; SSE2-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 967 ; SSE2-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
 968 ; SSE2-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
 969 ; SSE2-NEXT:    [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 970 ; SSE2-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
 971 ; SSE2-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
 972 ; SSE2-NEXT:    [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 973 ; SSE2-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
 974 ; SSE2-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
 975 ; SSE2-NEXT:    ret i32 [[TMP23]]
 976 ;
 977 ; SSE4-LABEL: @maxi8_wrong_parent(
 978 ; SSE4-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
 979 ; SSE4-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 980 ; SSE4-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 981 ; SSE4-NEXT:    br label [[PP:%.*]]
 982 ; SSE4:       pp:
 983 ; SSE4-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
 984 ; SSE4-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 985 ; SSE4-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 986 ; SSE4-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 987 ; SSE4-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
 988 ; SSE4-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
 989 ; SSE4-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
 990 ; SSE4-NEXT:    [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
 991 ; SSE4-NEXT:    [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
 992 ; SSE4-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
 993 ; SSE4-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
 994 ; SSE4-NEXT:    ret i32 [[OP_RDX5]]
 995 ;
 996 ; AVX-LABEL: @maxi8_wrong_parent(
 997 ; AVX-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
 998 ; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 999 ; AVX-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
1000 ; AVX-NEXT:    br label [[PP:%.*]]
1001 ; AVX:       pp:
1002 ; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
1003 ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
1004 ; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
1005 ; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
1006 ; AVX-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
1007 ; AVX-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
1008 ; AVX-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
1009 ; AVX-NEXT:    [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
1010 ; AVX-NEXT:    [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
1011 ; AVX-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
1012 ; AVX-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
1013 ; AVX-NEXT:    ret i32 [[OP_RDX5]]
1014 ;
1015 ; THRESH-LABEL: @maxi8_wrong_parent(
1016 ; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
1017 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
1018 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
1019 ; THRESH-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
1020 ; THRESH-NEXT:    br label [[PP:%.*]]
1021 ; THRESH:       pp:
1022 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]]
1023 ; THRESH-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
1024 ; THRESH-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
1025 ; THRESH-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
1026 ; THRESH-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP7]])
1027 ; THRESH-NEXT:    [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
1028 ; THRESH-NEXT:    [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP9]], i32 1
1029 ; THRESH-NEXT:    [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
1030 ; THRESH-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP6]], i32 1
1031 ; THRESH-NEXT:    [[TMP15:%.*]] = icmp sgt <2 x i32> [[TMP12]], [[TMP14]]
1032 ; THRESH-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP12]], <2 x i32> [[TMP14]]
1033 ; THRESH-NEXT:    [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
1034 ; THRESH-NEXT:    [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
1035 ; THRESH-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
1036 ; THRESH-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP17]], i32 [[TMP18]]
1037 ; THRESH-NEXT:    ret i32 [[OP_RDX5]]
1038 ;
1039   %2 = load i32, ptr @arr, align 16
1040   %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
1041   %4 = icmp sgt i32 %2, %3
1042   br label %pp
1043
1044 pp:
1045   %5 = select i1 %4, i32 %2, i32 %3
1046   %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
1047   %7 = icmp sgt i32 %5, %6
1048   %8 = select i1 %7, i32 %5, i32 %6
1049   %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
1050   %10 = icmp sgt i32 %8, %9
1051   %11 = select i1 %10, i32 %8, i32 %9
1052   %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
1053   %13 = icmp sgt i32 %11, %12
1054   %14 = select i1 %13, i32 %11, i32 %12
1055   %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
1056   %16 = icmp sgt i32 %14, %15
1057   %17 = select i1 %16, i32 %14, i32 %15
1058   %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
1059   %19 = icmp sgt i32 %17, %18
1060   %20 = select i1 %19, i32 %17, i32 %18
1061   %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
1062   %22 = icmp sgt i32 %20, %21
1063   %23 = select i1 %22, i32 %20, i32 %21
1064   ret i32 %23
1065 }
1066
1067 ; PR38191 - We don't handle array-of-pointer reductions.
1068 define ptr @maxp8(i32) {
1069 ; DEFAULT-LABEL: @maxp8(
1070 ; DEFAULT-NEXT:    [[TMP2:%.*]] = load ptr, ptr @arrp, align 16
1071 ; DEFAULT-NEXT:    [[TMP3:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 1), align 4
1072 ; DEFAULT-NEXT:    [[TMP4:%.*]] = icmp ugt ptr [[TMP2]], [[TMP3]]
1073 ; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], ptr [[TMP2]], ptr [[TMP3]]
1074 ; DEFAULT-NEXT:    [[TMP6:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 2), align 8
1075 ; DEFAULT-NEXT:    [[TMP7:%.*]] = icmp ugt ptr [[TMP5]], [[TMP6]]
1076 ; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], ptr [[TMP5]], ptr [[TMP6]]
1077 ; DEFAULT-NEXT:    [[TMP9:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 3), align 4
1078 ; DEFAULT-NEXT:    [[TMP10:%.*]] = icmp ugt ptr [[TMP8]], [[TMP9]]
1079 ; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], ptr [[TMP8]], ptr [[TMP9]]
1080 ; DEFAULT-NEXT:    [[TMP12:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 4), align 16
1081 ; DEFAULT-NEXT:    [[TMP13:%.*]] = icmp ugt ptr [[TMP11]], [[TMP12]]
1082 ; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], ptr [[TMP11]], ptr [[TMP12]]
1083 ; DEFAULT-NEXT:    [[TMP15:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 5), align 4
1084 ; DEFAULT-NEXT:    [[TMP16:%.*]] = icmp ugt ptr [[TMP14]], [[TMP15]]
1085 ; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], ptr [[TMP14]], ptr [[TMP15]]
1086 ; DEFAULT-NEXT:    [[TMP18:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 6), align 8
1087 ; DEFAULT-NEXT:    [[TMP19:%.*]] = icmp ugt ptr [[TMP17]], [[TMP18]]
1088 ; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], ptr [[TMP17]], ptr [[TMP18]]
1089 ; DEFAULT-NEXT:    [[TMP21:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 7), align 4
1090 ; DEFAULT-NEXT:    [[TMP22:%.*]] = icmp ugt ptr [[TMP20]], [[TMP21]]
1091 ; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], ptr [[TMP20]], ptr [[TMP21]]
1092 ; DEFAULT-NEXT:    ret ptr [[TMP23]]
1093 ;
1094 ; THRESH-LABEL: @maxp8(
1095 ; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x ptr>, ptr @arrp, align 16
1096 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 0
1097 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 1
1098 ; THRESH-NEXT:    [[TMP5:%.*]] = icmp ugt ptr [[TMP3]], [[TMP4]]
1099 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], ptr [[TMP3]], ptr [[TMP4]]
1100 ; THRESH-NEXT:    [[TMP7:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 2), align 8
1101 ; THRESH-NEXT:    [[TMP8:%.*]] = icmp ugt ptr [[TMP6]], [[TMP7]]
1102 ; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], ptr [[TMP6]], ptr [[TMP7]]
1103 ; THRESH-NEXT:    [[TMP10:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 3), align 4
1104 ; THRESH-NEXT:    [[TMP11:%.*]] = icmp ugt ptr [[TMP9]], [[TMP10]]
1105 ; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], ptr [[TMP9]], ptr [[TMP10]]
1106 ; THRESH-NEXT:    [[TMP13:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 4), align 16
1107 ; THRESH-NEXT:    [[TMP14:%.*]] = icmp ugt ptr [[TMP12]], [[TMP13]]
1108 ; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], ptr [[TMP12]], ptr [[TMP13]]
1109 ; THRESH-NEXT:    [[TMP16:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 5), align 4
1110 ; THRESH-NEXT:    [[TMP17:%.*]] = icmp ugt ptr [[TMP15]], [[TMP16]]
1111 ; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], ptr [[TMP15]], ptr [[TMP16]]
1112 ; THRESH-NEXT:    [[TMP19:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 6), align 8
1113 ; THRESH-NEXT:    [[TMP20:%.*]] = icmp ugt ptr [[TMP18]], [[TMP19]]
1114 ; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], ptr [[TMP18]], ptr [[TMP19]]
1115 ; THRESH-NEXT:    [[TMP22:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 7), align 4
1116 ; THRESH-NEXT:    [[TMP23:%.*]] = icmp ugt ptr [[TMP21]], [[TMP22]]
1117 ; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], ptr [[TMP21]], ptr [[TMP22]]
1118 ; THRESH-NEXT:    ret ptr [[TMP24]]
1119 ;
1120   %2 = load ptr, ptr @arrp, align 16
1121   %3 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 1), align 4
1122   %4 = icmp ugt ptr %2, %3
1123   %5 = select i1 %4, ptr %2, ptr %3
1124   %6 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 2), align 8
1125   %7 = icmp ugt ptr %5, %6
1126   %8 = select i1 %7, ptr %5, ptr %6
1127   %9 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 3), align 4
1128   %10 = icmp ugt ptr %8, %9
1129   %11 = select i1 %10, ptr %8, ptr %9
1130   %12 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 4), align 16
1131   %13 = icmp ugt ptr %11, %12
1132   %14 = select i1 %13, ptr %11, ptr %12
1133   %15 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 5), align 4
1134   %16 = icmp ugt ptr %14, %15
1135   %17 = select i1 %16, ptr %14, ptr %15
1136   %18 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 6), align 8
1137   %19 = icmp ugt ptr %17, %18
1138   %20 = select i1 %19, ptr %17, ptr %18
1139   %21 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 7), align 4
1140   %22 = icmp ugt ptr %20, %21
1141   %23 = select i1 %22, ptr %20, ptr %21
1142   ret ptr %23
1143 }
1144
1145 define i32 @smax_intrinsic_rdx_v8i32(ptr %p0) {
1146 ; CHECK-LABEL: @smax_intrinsic_rdx_v8i32(
1147 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[P0:%.*]], align 4
1148 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP1]])
1149 ; CHECK-NEXT:    ret i32 [[TMP2]]
1150 ;
1151   %p1 = getelementptr inbounds i32, ptr %p0, i64 1
1152   %p2 = getelementptr inbounds i32, ptr %p0, i64 2
1153   %p3 = getelementptr inbounds i32, ptr %p0, i64 3
1154   %p4 = getelementptr inbounds i32, ptr %p0, i64 4
1155   %p5 = getelementptr inbounds i32, ptr %p0, i64 5
1156   %p6 = getelementptr inbounds i32, ptr %p0, i64 6
1157   %p7 = getelementptr inbounds i32, ptr %p0, i64 7
1158   %t0 = load i32, ptr %p0, align 4
1159   %t1 = load i32, ptr %p1, align 4
1160   %t2 = load i32, ptr %p2, align 4
1161   %t3 = load i32, ptr %p3, align 4
1162   %t4 = load i32, ptr %p4, align 4
1163   %t5 = load i32, ptr %p5, align 4
1164   %t6 = load i32, ptr %p6, align 4
1165   %t7 = load i32, ptr %p7, align 4
1166   %m10 = tail call i32 @llvm.smax.i32(i32 %t1, i32 %t0)
1167   %m32 = tail call i32 @llvm.smax.i32(i32 %t3, i32 %t2)
1168   %m54 = tail call i32 @llvm.smax.i32(i32 %t5, i32 %t4)
1169   %m76 = tail call i32 @llvm.smax.i32(i32 %t7, i32 %t6)
1170   %m3210 = tail call i32 @llvm.smax.i32(i32 %m32, i32 %m10)
1171   %m7654 = tail call i32 @llvm.smax.i32(i32 %m76, i32 %m54)
1172   %m = tail call i32 @llvm.smax.i32(i32 %m7654, i32 %m3210)
1173   ret i32 %m
1174 }
1175
1176 define i16 @smin_intrinsic_rdx_v8i16(ptr %p0) {
1177 ; CHECK-LABEL: @smin_intrinsic_rdx_v8i16(
1178 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 4
1179 ; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[TMP1]])
1180 ; CHECK-NEXT:    ret i16 [[TMP2]]
1181 ;
1182   %p1 = getelementptr inbounds i16, ptr %p0, i64 1
1183   %p2 = getelementptr inbounds i16, ptr %p0, i64 2
1184   %p3 = getelementptr inbounds i16, ptr %p0, i64 3
1185   %p4 = getelementptr inbounds i16, ptr %p0, i64 4
1186   %p5 = getelementptr inbounds i16, ptr %p0, i64 5
1187   %p6 = getelementptr inbounds i16, ptr %p0, i64 6
1188   %p7 = getelementptr inbounds i16, ptr %p0, i64 7
1189   %t0 = load i16, ptr %p0, align 4
1190   %t1 = load i16, ptr %p1, align 4
1191   %t2 = load i16, ptr %p2, align 4
1192   %t3 = load i16, ptr %p3, align 4
1193   %t4 = load i16, ptr %p4, align 4
1194   %t5 = load i16, ptr %p5, align 4
1195   %t6 = load i16, ptr %p6, align 4
1196   %t7 = load i16, ptr %p7, align 4
1197   %m10 = tail call i16 @llvm.smin.i16(i16 %t1, i16 %t0)
1198   %m32 = tail call i16 @llvm.smin.i16(i16 %t3, i16 %t2)
1199   %m54 = tail call i16 @llvm.smin.i16(i16 %t5, i16 %t4)
1200   %m76 = tail call i16 @llvm.smin.i16(i16 %t7, i16 %t6)
1201   %m3210 = tail call i16 @llvm.smin.i16(i16 %m32, i16 %m10)
1202   %m7654 = tail call i16 @llvm.smin.i16(i16 %m76, i16 %m54)
1203   %m = tail call i16 @llvm.smin.i16(i16 %m7654, i16 %m3210)
1204   ret i16 %m
1205 }
1206
1207 define i64 @umax_intrinsic_rdx_v4i64(ptr %p0) {
1208 ; DEFAULT-LABEL: @umax_intrinsic_rdx_v4i64(
1209 ; DEFAULT-NEXT:    [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
1210 ; DEFAULT-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 2
1211 ; DEFAULT-NEXT:    [[P3:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 3
1212 ; DEFAULT-NEXT:    [[T0:%.*]] = load i64, ptr [[P0]], align 4
1213 ; DEFAULT-NEXT:    [[T1:%.*]] = load i64, ptr [[P1]], align 4
1214 ; DEFAULT-NEXT:    [[T2:%.*]] = load i64, ptr [[P2]], align 4
1215 ; DEFAULT-NEXT:    [[T3:%.*]] = load i64, ptr [[P3]], align 4
1216 ; DEFAULT-NEXT:    [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
1217 ; DEFAULT-NEXT:    [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
1218 ; DEFAULT-NEXT:    [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
1219 ; DEFAULT-NEXT:    ret i64 [[M]]
1220 ;
1221 ; THRESH-LABEL: @umax_intrinsic_rdx_v4i64(
1222 ; THRESH-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr [[P0:%.*]], align 4
1223 ; THRESH-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP1]])
1224 ; THRESH-NEXT:    ret i64 [[TMP2]]
1225 ;
1226   %p1 = getelementptr inbounds i64, ptr %p0, i64 1
1227   %p2 = getelementptr inbounds i64, ptr %p0, i64 2
1228   %p3 = getelementptr inbounds i64, ptr %p0, i64 3
1229   %t0 = load i64, ptr %p0, align 4
1230   %t1 = load i64, ptr %p1, align 4
1231   %t2 = load i64, ptr %p2, align 4
1232   %t3 = load i64, ptr %p3, align 4
1233   %m10 = tail call i64 @llvm.umax.i64(i64 %t1, i64 %t0)
1234   %m32 = tail call i64 @llvm.umax.i64(i64 %t3, i64 %t2)
1235   %m = tail call i64 @llvm.umax.i64(i64 %m32, i64 %m10)
1236   ret i64 %m
1237 }
1238
1239 define i8 @umin_intrinsic_rdx_v16i8(ptr %p0) {
1240 ; CHECK-LABEL: @umin_intrinsic_rdx_v16i8(
1241 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 4
1242 ; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[TMP1]])
1243 ; CHECK-NEXT:    ret i8 [[TMP2]]
1244 ;
1245   %p1 = getelementptr inbounds i8, ptr %p0, i64 1
1246   %p2 = getelementptr inbounds i8, ptr %p0, i64 2
1247   %p3 = getelementptr inbounds i8, ptr %p0, i64 3
1248   %p4 = getelementptr inbounds i8, ptr %p0, i64 4
1249   %p5 = getelementptr inbounds i8, ptr %p0, i64 5
1250   %p6 = getelementptr inbounds i8, ptr %p0, i64 6
1251   %p7 = getelementptr inbounds i8, ptr %p0, i64 7
1252   %p8 = getelementptr inbounds i8, ptr %p0, i64 8
1253   %p9 = getelementptr inbounds i8, ptr %p0, i64 9
1254   %pa = getelementptr inbounds i8, ptr %p0, i64 10
1255   %pb = getelementptr inbounds i8, ptr %p0, i64 11
1256   %pc = getelementptr inbounds i8, ptr %p0, i64 12
1257   %pd = getelementptr inbounds i8, ptr %p0, i64 13
1258   %pe = getelementptr inbounds i8, ptr %p0, i64 14
1259   %pf = getelementptr inbounds i8, ptr %p0, i64 15
1260   %t0 = load i8, ptr %p0, align 4
1261   %t1 = load i8, ptr %p1, align 4
1262   %t2 = load i8, ptr %p2, align 4
1263   %t3 = load i8, ptr %p3, align 4
1264   %t4 = load i8, ptr %p4, align 4
1265   %t5 = load i8, ptr %p5, align 4
1266   %t6 = load i8, ptr %p6, align 4
1267   %t7 = load i8, ptr %p7, align 4
1268   %t8 = load i8, ptr %p8, align 4
1269   %t9 = load i8, ptr %p9, align 4
1270   %ta = load i8, ptr %pa, align 4
1271   %tb = load i8, ptr %pb, align 4
1272   %tc = load i8, ptr %pc, align 4
1273   %td = load i8, ptr %pd, align 4
1274   %te = load i8, ptr %pe, align 4
1275   %tf = load i8, ptr %pf, align 4
1276   %m10 = tail call i8 @llvm.umin.i8(i8 %t1, i8 %t0)
1277   %m32 = tail call i8 @llvm.umin.i8(i8 %t3, i8 %t2)
1278   %m54 = tail call i8 @llvm.umin.i8(i8 %t5, i8 %t4)
1279   %m76 = tail call i8 @llvm.umin.i8(i8 %t7, i8 %t6)
1280   %m98 = tail call i8 @llvm.umin.i8(i8 %t9, i8 %t8)
1281   %mba = tail call i8 @llvm.umin.i8(i8 %tb, i8 %ta)
1282   %mdc = tail call i8 @llvm.umin.i8(i8 %td, i8 %tc)
1283   %mfe = tail call i8 @llvm.umin.i8(i8 %tf, i8 %te)
1284   %m3210 = tail call i8 @llvm.umin.i8(i8 %m32, i8 %m10)
1285   %m7654 = tail call i8 @llvm.umin.i8(i8 %m76, i8 %m54)
1286   %mdc98 = tail call i8 @llvm.umin.i8(i8 %mdc, i8 %m98)
1287   %mfeba = tail call i8 @llvm.umin.i8(i8 %mfe, i8 %mba)
1288   %ml = tail call i8 @llvm.umin.i8(i8 %m3210, i8 %m7654)
1289   %mh = tail call i8 @llvm.umin.i8(i8 %mfeba, i8 %mdc98)
1290   %m = tail call i8 @llvm.umin.i8(i8 %mh, i8 %ml)
1291   ret i8 %m
1292 }
1293
1294 ; This should not crash.
1295
1296 define void @PR49730() {
1297 ; CHECK-LABEL: @PR49730(
1298 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>)
1299 ; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <4 x i32> undef, [[TMP1]]
1300 ; CHECK-NEXT:    [[T12:%.*]] = sub nsw i32 undef, undef
1301 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP2]])
1302 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[T12]])
1303 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP4]], i32 undef)
1304 ; CHECK-NEXT:    [[T14:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP5]], i32 93)
1305 ; CHECK-NEXT:    ret void
1306 ;
1307   %t = call i32 @llvm.smin.i32(i32 undef, i32 2)
1308   %t1 = sub nsw i32 undef, %t
1309   %t2 = call i32 @llvm.umin.i32(i32 undef, i32 %t1)
1310   %t3 = call i32 @llvm.smin.i32(i32 undef, i32 2)
1311   %t4 = sub nsw i32 undef, %t3
1312   %t5 = call i32 @llvm.umin.i32(i32 %t2, i32 %t4)
1313   %t6 = call i32 @llvm.smin.i32(i32 undef, i32 1)
1314   %t7 = sub nuw nsw i32 undef, %t6
1315   %t8 = call i32 @llvm.umin.i32(i32 %t5, i32 %t7)
1316   %t9 = call i32 @llvm.smin.i32(i32 undef, i32 1)
1317   %t10 = sub nsw i32 undef, %t9
1318   %t11 = call i32 @llvm.umin.i32(i32 %t8, i32 %t10)
1319   %t12 = sub nsw i32 undef, undef
1320   %t13 = call i32 @llvm.umin.i32(i32 %t11, i32 %t12)
1321   %t14 = call i32 @llvm.umin.i32(i32 %t13, i32 93)
1322   ret void
1323 }