1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck %s
4 define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
7 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1
8 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <2 x i32>*
9 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
10 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
11 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A7:%.*]], i32 0
12 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A8:%.*]], i32 1
13 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A1:%.*]], i32 2
14 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A2:%.*]], i32 3
15 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A3:%.*]], i32 4
16 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A4:%.*]], i32 5
17 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A5:%.*]], i32 6
18 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A6:%.*]], i32 7
19 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
20 ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
21 ; CHECK-NEXT: ret i32 [[TMP11]]
24 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 1
25 %0 = load i32, i32* %arrayidx, align 4
26 %add = add i32 %0, %a1
27 %add2 = add i32 %0, %a2
28 %add4 = add i32 %0, %a3
29 %add6 = add i32 %0, %a4
30 %add8 = add i32 %0, %a5
31 %add10 = add i32 %0, %a6
32 %1 = load i32, i32* %arr, align 4
33 %add12 = add i32 %1, %a7
34 %add14 = add i32 %1, %a8
35 %cmp = icmp ult i32 %add, %add2
36 %cond = select i1 %cmp, i32 %add, i32 %add2
37 %cmp15 = icmp ult i32 %cond, %add4
38 %cond19 = select i1 %cmp15, i32 %cond, i32 %add4
39 %cmp20 = icmp ult i32 %cond19, %add6
40 %cond24 = select i1 %cmp20, i32 %cond19, i32 %add6
41 %cmp25 = icmp ult i32 %cond24, %add8
42 %cond29 = select i1 %cmp25, i32 %cond24, i32 %add8
43 %cmp30 = icmp ult i32 %cond29, %add10
44 %cond34 = select i1 %cmp30, i32 %cond29, i32 %add10
45 %cmp35 = icmp ult i32 %cond34, %add12
46 %cond39 = select i1 %cmp35, i32 %cond34, i32 %add12
47 %cmp40 = icmp ult i32 %cond39, %add14
48 %cond44 = select i1 %cmp40, i32 %cond39, i32 %add14
52 define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
55 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1
56 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 2
57 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 3
58 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>*
59 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
60 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 3>
61 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A6:%.*]], i32 0
62 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
63 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A4:%.*]], i32 2
64 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A5:%.*]], i32 3
65 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A8:%.*]], i32 4
66 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A2:%.*]], i32 5
67 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
68 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A3:%.*]], i32 7
69 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
70 ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
71 ; CHECK-NEXT: ret i32 [[TMP11]]
74 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 1
75 %0 = load i32, i32* %arrayidx, align 4
76 %add = add i32 %0, %a1
77 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2
78 %1 = load i32, i32* %arrayidx1, align 4
79 %add2 = add i32 %1, %a2
80 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 3
81 %2 = load i32, i32* %arrayidx3, align 4
82 %add4 = add i32 %2, %a3
83 %add6 = add i32 %0, %a4
84 %add8 = add i32 %0, %a5
85 %3 = load i32, i32* %arr, align 4
86 %add10 = add i32 %3, %a6
87 %add12 = add i32 %1, %a7
88 %add14 = add i32 %0, %a8
89 %cmp = icmp ult i32 %add, %add2
90 %cond = select i1 %cmp, i32 %add, i32 %add2
91 %cmp15 = icmp ult i32 %cond, %add4
92 %cond19 = select i1 %cmp15, i32 %cond, i32 %add4
93 %cmp20 = icmp ult i32 %cond19, %add6
94 %cond24 = select i1 %cmp20, i32 %cond19, i32 %add6
95 %cmp25 = icmp ult i32 %cond24, %add8
96 %cond29 = select i1 %cmp25, i32 %cond24, i32 %add8
97 %cmp30 = icmp ult i32 %cond29, %add10
98 %cond34 = select i1 %cmp30, i32 %cond29, i32 %add10
99 %cmp35 = icmp ult i32 %cond34, %add12
100 %cond39 = select i1 %cmp35, i32 %cond34, i32 %add12
101 %cmp40 = icmp ult i32 %cond39, %add14
102 %cond44 = select i1 %cmp40, i32 %cond39, i32 %add14
106 define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
107 ; CHECK-LABEL: @foo2(
109 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 3
110 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 2
111 ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 1
112 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>*
113 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
114 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
115 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A4:%.*]], i32 0
116 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A6:%.*]], i32 1
117 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A5:%.*]], i32 2
118 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A8:%.*]], i32 3
119 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A2:%.*]], i32 4
120 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A7:%.*]], i32 5
121 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A1:%.*]], i32 6
122 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A3:%.*]], i32 7
123 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
124 ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
125 ; CHECK-NEXT: ret i32 [[TMP11]]
128 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
129 %0 = load i32, i32* %arrayidx, align 4
130 %add = add i32 %0, %a1
131 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2
132 %1 = load i32, i32* %arrayidx1, align 4
133 %add2 = add i32 %1, %a2
134 %add4 = add i32 %0, %a3
135 %2 = load i32, i32* %arr, align 4
136 %add6 = add i32 %2, %a4
137 %arrayidx7 = getelementptr inbounds i32, i32* %arr, i64 1
138 %3 = load i32, i32* %arrayidx7, align 4
139 %add8 = add i32 %3, %a5
140 %add10 = add i32 %2, %a6
141 %add12 = add i32 %1, %a7
142 %add14 = add i32 %3, %a8
143 %cmp = icmp ult i32 %add, %add2
144 %cond = select i1 %cmp, i32 %add, i32 %add2
145 %cmp15 = icmp ult i32 %cond, %add4
146 %cond19 = select i1 %cmp15, i32 %cond, i32 %add4
147 %cmp20 = icmp ult i32 %cond19, %add6
148 %cond24 = select i1 %cmp20, i32 %cond19, i32 %add6
149 %cmp25 = icmp ult i32 %cond24, %add8
150 %cond29 = select i1 %cmp25, i32 %cond24, i32 %add8
151 %cmp30 = icmp ult i32 %cond29, %add10
152 %cond34 = select i1 %cmp30, i32 %cond29, i32 %add10
153 %cmp35 = icmp ult i32 %cond34, %add12
154 %cond39 = select i1 %cmp35, i32 %cond34, i32 %add12
155 %cmp40 = icmp ult i32 %cond39, %add14
156 %cond44 = select i1 %cmp40, i32 %cond39, i32 %add14