llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -O2 -expand-reductions -mattr=avx -S < %s | FileCheck %s
   3
   4 ; Test if SLP vector reduction patterns are recognized
   5 ; and optionally converted to reduction intrinsics and
   6 ; back to raw IR.
   7
   8 target triple = "x86_64--"
   9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
  10
  11 define i32 @add_v4i32(i32* %p) #0 {
  12 ; CHECK-LABEL: @add_v4i32(
  13 ; CHECK-NEXT:  entry:
  14 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
  15 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0:![0-9]+]]
  16 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  17 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[RDX_SHUF]]
  18 ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  19 ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF3]]
  20 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[BIN_RDX4]], i32 0
  21 ; CHECK-NEXT:    ret i32 [[TMP2]]
  22 ;
  23 entry:
  24   br label %for.cond
  25
  26 for.cond:
  27   %r.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
  28   %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
  29   %cmp = icmp slt i32 %i.0, 4
  30   br i1 %cmp, label %for.body, label %for.cond.cleanup
  31
  32 for.cond.cleanup:
  33   br label %for.end
  34
  35 for.body:
  36   %idxprom = sext i32 %i.0 to i64
  37   %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
  38   %0 = load i32, i32* %arrayidx, align 4, !tbaa !3
  39   %add = add nsw i32 %r.0, %0
  40   br label %for.inc
  41
  42 for.inc:
  43   %inc = add nsw i32 %i.0, 1
  44   br label %for.cond
  45
  46 for.end:
  47   ret i32 %r.0
  48 }
  49
  50 define signext i16 @mul_v8i16(i16* %p) #0 {
  51 ; CHECK-LABEL: @mul_v8i16(
  52 ; CHECK-NEXT:  entry:
  53 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[P:%.*]] to <8 x i16>*
  54 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2, !tbaa [[TBAA4:![0-9]+]]
  55 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  56 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = mul <8 x i16> [[TMP1]], [[RDX_SHUF]]
  57 ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  58 ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = mul <8 x i16> [[BIN_RDX]], [[RDX_SHUF3]]
  59 ; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX4]], <8 x i16> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  60 ; CHECK-NEXT:    [[BIN_RDX6:%.*]] = mul <8 x i16> [[BIN_RDX4]], [[RDX_SHUF5]]
  61 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i16> [[BIN_RDX6]], i32 0
  62 ; CHECK-NEXT:    ret i16 [[TMP2]]
  63 ;
  64 entry:
  65   br label %for.cond
  66
  67 for.cond:
  68   %r.0 = phi i16 [ 1, %entry ], [ %conv2, %for.inc ]
  69   %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
  70   %cmp = icmp slt i32 %i.0, 8
  71   br i1 %cmp, label %for.body, label %for.cond.cleanup
  72
  73 for.cond.cleanup:
  74   br label %for.end
  75
  76 for.body:
  77   %idxprom = sext i32 %i.0 to i64
  78   %arrayidx = getelementptr inbounds i16, i16* %p, i64 %idxprom
  79   %0 = load i16, i16* %arrayidx, align 2, !tbaa !7
  80   %conv = sext i16 %0 to i32
  81   %conv1 = sext i16 %r.0 to i32
  82   %mul = mul nsw i32 %conv1, %conv
  83   %conv2 = trunc i32 %mul to i16
  84   br label %for.inc
  85
  86 for.inc:
  87   %inc = add nsw i32 %i.0, 1
  88   br label %for.cond
  89
  90 for.end:
  91   ret i16 %r.0
  92 }
  93
  94 define signext i8 @or_v16i8(i8* %p) #0 {
  95 ; CHECK-LABEL: @or_v16i8(
  96 ; CHECK-NEXT:  entry:
  97 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P:%.*]] to <16 x i8>*
  98 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1, !tbaa [[TBAA6:![0-9]+]]
  99 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 100 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <16 x i8> [[TMP1]], [[RDX_SHUF]]
 101 ; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <16 x i8> [[BIN_RDX]], <16 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 102 ; CHECK-NEXT:    [[BIN_RDX5:%.*]] = or <16 x i8> [[BIN_RDX]], [[RDX_SHUF4]]
 103 ; CHECK-NEXT:    [[RDX_SHUF6:%.*]] = shufflevector <16 x i8> [[BIN_RDX5]], <16 x i8> poison, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 104 ; CHECK-NEXT:    [[BIN_RDX7:%.*]] = or <16 x i8> [[BIN_RDX5]], [[RDX_SHUF6]]
 105 ; CHECK-NEXT:    [[RDX_SHUF8:%.*]] = shufflevector <16 x i8> [[BIN_RDX7]], <16 x i8> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 106 ; CHECK-NEXT:    [[BIN_RDX9:%.*]] = or <16 x i8> [[BIN_RDX7]], [[RDX_SHUF8]]
 107 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x i8> [[BIN_RDX9]], i32 0
 108 ; CHECK-NEXT:    ret i8 [[TMP2]]
 109 ;
 110 entry:
 111   br label %for.cond
 112
 113 for.cond:
 114   %r.0 = phi i8 [ 0, %entry ], [ %conv2, %for.inc ]
 115   %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
 116   %cmp = icmp slt i32 %i.0, 16
 117   br i1 %cmp, label %for.body, label %for.cond.cleanup
 118
 119 for.cond.cleanup:
 120   br label %for.end
 121
 122 for.body:
 123   %idxprom = sext i32 %i.0 to i64
 124   %arrayidx = getelementptr inbounds i8, i8* %p, i64 %idxprom
 125   %0 = load i8, i8* %arrayidx, align 1, !tbaa !9
 126   %conv = sext i8 %0 to i32
 127   %conv1 = sext i8 %r.0 to i32
 128   %or = or i32 %conv1, %conv
 129   %conv2 = trunc i32 %or to i8
 130   br label %for.inc
 131
 132 for.inc:
 133   %inc = add nsw i32 %i.0, 1
 134   br label %for.cond
 135
 136 for.end:
 137   ret i8 %r.0
 138 }
 139
 140 define i32 @smin_v4i32(i32* %p) #0 {
 141 ; CHECK-LABEL: @smin_v4i32(
 142 ; CHECK-NEXT:  entry:
 143 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 144 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0]]
 145 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 146 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP1]], [[RDX_SHUF]]
 147 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
 148 ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 149 ; CHECK-NEXT:    [[RDX_MINMAX_CMP4:%.*]] = icmp slt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
 150 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]]
 151 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0
 152 ; CHECK-NEXT:    ret i32 [[TMP2]]
 153 ;
 154 entry:
 155   br label %for.cond
 156
 157 for.cond:
 158   %r.0 = phi i32 [ 2147483647, %entry ], [ %cond, %for.inc ]
 159   %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
 160   %cmp = icmp slt i32 %i.0, 4
 161   br i1 %cmp, label %for.body, label %for.cond.cleanup
 162
 163 for.cond.cleanup:
 164   br label %for.end
 165
 166 for.body:
 167   %idxprom = sext i32 %i.0 to i64
 168   %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
 169   %0 = load i32, i32* %arrayidx, align 4, !tbaa !3
 170   %cmp1 = icmp slt i32 %0, %r.0
 171   br i1 %cmp1, label %cond.true, label %cond.false
 172
 173 cond.true:
 174   %idxprom2 = sext i32 %i.0 to i64
 175   %arrayidx3 = getelementptr inbounds i32, i32* %p, i64 %idxprom2
 176   %1 = load i32, i32* %arrayidx3, align 4, !tbaa !3
 177   br label %cond.end
 178
 179 cond.false:
 180   br label %cond.end
 181
 182 cond.end:
 183   %cond = phi i32 [ %1, %cond.true ], [ %r.0, %cond.false ]
 184   br label %for.inc
 185
 186 for.inc:
 187   %inc = add nsw i32 %i.0, 1
 188   br label %for.cond
 189
 190 for.end:
 191   ret i32 %r.0
 192 }
 193
 194 define i32 @umax_v4i32(i32* %p) #0 {
 195 ; CHECK-LABEL: @umax_v4i32(
 196 ; CHECK-NEXT:  entry:
 197 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 198 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0]]
 199 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 200 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[RDX_SHUF]]
 201 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
 202 ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 203 ; CHECK-NEXT:    [[RDX_MINMAX_CMP4:%.*]] = icmp ugt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
 204 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]]
 205 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0
 206 ; CHECK-NEXT:    ret i32 [[TMP2]]
 207 ;
 208 entry:
 209   br label %for.cond
 210
 211 for.cond:
 212   %r.0 = phi i32 [ 0, %entry ], [ %cond, %for.inc ]
 213   %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
 214   %cmp = icmp slt i32 %i.0, 4
 215   br i1 %cmp, label %for.body, label %for.cond.cleanup
 216
 217 for.cond.cleanup:
 218   br label %for.end
 219
 220 for.body:
 221   %idxprom = sext i32 %i.0 to i64
 222   %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
 223   %0 = load i32, i32* %arrayidx, align 4, !tbaa !3
 224   %cmp1 = icmp ugt i32 %0, %r.0
 225   br i1 %cmp1, label %cond.true, label %cond.false
 226
 227 cond.true:
 228   %idxprom2 = sext i32 %i.0 to i64
 229   %arrayidx3 = getelementptr inbounds i32, i32* %p, i64 %idxprom2
 230   %1 = load i32, i32* %arrayidx3, align 4, !tbaa !3
 231   br label %cond.end
 232
 233 cond.false:
 234   br label %cond.end
 235
 236 cond.end:
 237   %cond = phi i32 [ %1, %cond.true ], [ %r.0, %cond.false ]
 238   br label %for.inc
 239
 240 for.inc:
 241   %inc = add nsw i32 %i.0, 1
 242   br label %for.cond
 243
 244 for.end:
 245   ret i32 %r.0
 246 }
 247
 248 define float @fadd_v4i32(float* %p) #0 {
 249 ; CHECK-LABEL: @fadd_v4i32(
 250 ; CHECK-NEXT:  entry:
 251 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 252 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7:![0-9]+]]
 253 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 254 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]]
 255 ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 256 ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]]
 257 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0
 258 ; CHECK-NEXT:    [[BIN_RDX5:%.*]] = fadd fast float 4.200000e+01, [[TMP2]]
 259 ; CHECK-NEXT:    ret float [[BIN_RDX5]]
 260 ;
 261 entry:
 262   br label %for.cond
 263
 264 for.cond:
 265   %r.0 = phi float [ 4.200000e+01, %entry ], [ %add, %for.inc ]
 266   %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
 267   %cmp = icmp slt i32 %i.0, 4
 268   br i1 %cmp, label %for.body, label %for.cond.cleanup
 269
 270 for.cond.cleanup:
 271   br label %for.end
 272
 273 for.body:
 274   %idxprom = sext i32 %i.0 to i64
 275   %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
 276   %0 = load float, float* %arrayidx, align 4, !tbaa !10
 277   %add = fadd fast float %r.0, %0
 278   br label %for.inc
 279
 280 for.inc:
 281   %inc = add nsw i32 %i.0, 1
 282   br label %for.cond
 283
 284 for.end:
 285   ret float %r.0
 286 }
 287
 288 define float @fmul_v4i32(float* %p) #0 {
 289 ; CHECK-LABEL: @fmul_v4i32(
 290 ; CHECK-NEXT:  entry:
 291 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 292 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7]]
 293 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 294 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[TMP1]], [[RDX_SHUF]]
 295 ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 296 ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]]
 297 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0
 298 ; CHECK-NEXT:    [[BIN_RDX5:%.*]] = fmul fast float 1.000000e+00, [[TMP2]]
 299 ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fmul fast float [[BIN_RDX5]], 4.200000e+01
 300 ; CHECK-NEXT:    ret float [[OP_EXTRA]]
 301 ;
 302 entry:
 303   br label %for.cond
 304
 305 for.cond:
 306   %r.0 = phi float [ 4.200000e+01, %entry ], [ %mul, %for.inc ]
 307   %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
 308   %cmp = icmp slt i32 %i.0, 4
 309   br i1 %cmp, label %for.body, label %for.cond.cleanup
 310
 311 for.cond.cleanup:
 312   br label %for.end
 313
 314 for.body:
 315   %idxprom = sext i32 %i.0 to i64
 316   %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
 317   %0 = load float, float* %arrayidx, align 4, !tbaa !10
 318   %mul = fmul fast float %r.0, %0
 319   br label %for.inc
 320
 321 for.inc:
 322   %inc = add nsw i32 %i.0, 1
 323   br label %for.cond
 324
 325 for.end:
 326   ret float %r.0
 327 }
 328
 329 define float @fmin_v4f32(float* %p) #0 {
 330 ; CHECK-LABEL: @fmin_v4f32(
 331 ; CHECK-NEXT:  entry:
 332 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 333 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7]]
 334 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 335 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP1]], [[RDX_SHUF]]
 336 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP1]], <4 x float> [[RDX_SHUF]]
 337 ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 338 ; CHECK-NEXT:    [[RDX_MINMAX_CMP4:%.*]] = fcmp fast olt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
 339 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT5:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP4]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF3]]
 340 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT5]], i32 0
 341 ; CHECK-NEXT:    ret float [[TMP2]]
 342 ;
 343 entry:
 344   br label %for.cond
 345
 346 for.cond:
 347   %r.0 = phi float [  0x47EFFFFFE0000000, %entry ], [ %cond, %for.inc ]
 348   %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
 349   %cmp = icmp slt i32 %i.0, 4
 350   br i1 %cmp, label %for.body, label %for.cond.cleanup
 351
 352 for.cond.cleanup:
 353   br label %for.end
 354
 355 for.body:
 356   %idxprom = sext i32 %i.0 to i64
 357   %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
 358   %0 = load float, float* %arrayidx, align 4, !tbaa !10
 359   %cmp1 = fcmp fast olt float %0, %r.0
 360   br i1 %cmp1, label %cond.true, label %cond.false
 361
 362 cond.true:
 363   %idxprom2 = sext i32 %i.0 to i64
 364   %arrayidx3 = getelementptr inbounds float, float* %p, i64 %idxprom2
 365   %1 = load float, float* %arrayidx3, align 4, !tbaa !10
 366   br label %cond.end
 367
 368 cond.false:
 369   br label %cond.end
 370
 371 cond.end:
 372   %cond = phi fast float [ %1, %cond.true ], [ %r.0, %cond.false ]
 373   br label %for.inc
 374
 375 for.inc:
 376   %inc = add nsw i32 %i.0, 1
 377   br label %for.cond
 378
 379 for.end:
 380   ret float %r.0
 381 }
 382
 383 define available_externally float @max(float %a, float %b) {
 384 entry:
 385   %a.addr = alloca float, align 4
 386   %b.addr = alloca float, align 4
 387   store float %a, float* %a.addr, align 4
 388   store float %b, float* %b.addr, align 4
 389   %0 = load float, float* %a.addr, align 4
 390   %1 = load float, float* %b.addr, align 4
 391   %cmp = fcmp nnan ninf nsz ogt float %0, %1
 392   br i1 %cmp, label %cond.true, label %cond.false
 393
 394 cond.true:                                        ; preds = %entry
 395   %2 = load float, float* %a.addr, align 4
 396   br label %cond.end
 397
 398 cond.false:                                       ; preds = %entry
 399   %3 = load float, float* %b.addr, align 4
 400   br label %cond.end
 401
 402 cond.end:                                         ; preds = %cond.false, %cond.true
 403   %cond = phi nnan ninf nsz float [ %2, %cond.true ], [ %3, %cond.false ]
 404   ret float %cond
 405 }
 406
 407 ; PR23116
 408
 409 define float @findMax(<8 x float>* byval(<8 x float>) align 16 %0) {
 410 ; CHECK-LABEL: @findMax(
 411 ; CHECK-NEXT:  entry:
 412 ; CHECK-NEXT:    [[V:%.*]] = load <8 x float>, <8 x float>* [[TMP0:%.*]], align 16, !tbaa [[TBAA0]]
 413 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[V]], <8 x float> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
 414 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[V]], [[RDX_SHUF]]
 415 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[V]], <8 x float> [[RDX_SHUF]]
 416 ; CHECK-NEXT:    [[RDX_SHUF8:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 417 ; CHECK-NEXT:    [[RDX_MINMAX_CMP9:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF8]]
 418 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT10:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP9]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF8]]
 419 ; CHECK-NEXT:    [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 420 ; CHECK-NEXT:    [[RDX_MINMAX_CMP12:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT10]], [[RDX_SHUF11]]
 421 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT13:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP12]], <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> [[RDX_SHUF11]]
 422 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT13]], i32 0
 423 ; CHECK-NEXT:    ret float [[TMP1]]
 424 ;
 425 entry:
 426   %v.addr = alloca <8 x float>, align 32
 427   %v = load <8 x float>, <8 x float>* %0, align 16, !tbaa !3
 428   store <8 x float> %v, <8 x float>* %v.addr, align 32, !tbaa !3
 429   %1 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
 430   %vecext = extractelement <8 x float> %1, i32 0
 431   %2 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
 432   %vecext1 = extractelement <8 x float> %2, i32 1
 433   %call = call nnan ninf nsz float @max(float %vecext, float %vecext1)
 434   %3 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
 435   %vecext2 = extractelement <8 x float> %3, i32 2
 436   %call3 = call nnan ninf nsz float @max(float %call, float %vecext2)
 437   %4 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
 438   %vecext4 = extractelement <8 x float> %4, i32 3
 439   %call5 = call nnan ninf nsz float @max(float %call3, float %vecext4)
 440   %5 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
 441   %vecext6 = extractelement <8 x float> %5, i32 4
 442   %call7 = call nnan ninf nsz float @max(float %call5, float %vecext6)
 443   %6 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
 444   %vecext8 = extractelement <8 x float> %6, i32 5
 445   %call9 = call nnan ninf nsz float @max(float %call7, float %vecext8)
 446   %7 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
 447   %vecext10 = extractelement <8 x float> %7, i32 6
 448   %call11 = call nnan ninf nsz float @max(float %call9, float %vecext10)
 449   %8 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
 450   %vecext12 = extractelement <8 x float> %8, i32 7
 451   %call13 = call nnan ninf nsz float @max(float %call11, float %vecext12)
 452   ret float %call13
 453 }
 454
 455 attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+avx,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="true" "use-soft-float"="false" }
 456
 457 !0 = !{i32 1, !"wchar_size", i32 4}
 458 !1 = !{i32 7, !"PIC Level", i32 2}
 459 !2 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git a9fe69c359de653015c39e413e48630d069abe27)"}
 460 !3 = !{!4, !4, i64 0}
 461 !4 = !{!"int", !5, i64 0}
 462 !5 = !{!"omnipotent char", !6, i64 0}
 463 !6 = !{!"Simple C/C++ TBAA"}
 464 !7 = !{!8, !8, i64 0}
 465 !8 = !{!"short", !5, i64 0}
 466 !9 = !{!5, !5, i64 0}
 467 !10 = !{!11, !11, i64 0}
 468 !11 = !{!"float", !5, i64 0}