1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+sse2 | FileCheck %s
3 ; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+avx | FileCheck %s
6 ; Check that we can commute operands based on the predicate.
9 define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, i32* %b) {
10 ; CHECK-LABEL: @icmp_eq_v4i32(
11 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
12 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
13 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], [[A:%.*]]
14 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
15 ; CHECK-NEXT: ret <4 x i32> [[R]]
17 %a0 = extractelement <4 x i32> %a, i32 0
18 %a1 = extractelement <4 x i32> %a, i32 1
19 %a2 = extractelement <4 x i32> %a, i32 2
20 %a3 = extractelement <4 x i32> %a, i32 3
21 %p0 = getelementptr inbounds i32, i32* %b, i32 0
22 %p1 = getelementptr inbounds i32, i32* %b, i32 1
23 %p2 = getelementptr inbounds i32, i32* %b, i32 2
24 %p3 = getelementptr inbounds i32, i32* %b, i32 3
25 %b0 = load i32, i32* %p0, align 4
26 %b1 = load i32, i32* %p1, align 4
27 %b2 = load i32, i32* %p2, align 4
28 %b3 = load i32, i32* %p3, align 4
29 %c0 = icmp eq i32 %a0, %b0
30 %c1 = icmp eq i32 %b1, %a1
31 %c2 = icmp eq i32 %b2, %a2
32 %c3 = icmp eq i32 %a3, %b3
33 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
34 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
35 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
36 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
37 %r = sext <4 x i1> %d3 to <4 x i32>
41 define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, i32* %b) {
42 ; CHECK-LABEL: @icmp_ne_v4i32(
43 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
44 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
45 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], [[A:%.*]]
46 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
47 ; CHECK-NEXT: ret <4 x i32> [[R]]
49 %a0 = extractelement <4 x i32> %a, i32 0
50 %a1 = extractelement <4 x i32> %a, i32 1
51 %a2 = extractelement <4 x i32> %a, i32 2
52 %a3 = extractelement <4 x i32> %a, i32 3
53 %p0 = getelementptr inbounds i32, i32* %b, i32 0
54 %p1 = getelementptr inbounds i32, i32* %b, i32 1
55 %p2 = getelementptr inbounds i32, i32* %b, i32 2
56 %p3 = getelementptr inbounds i32, i32* %b, i32 3
57 %b0 = load i32, i32* %p0, align 4
58 %b1 = load i32, i32* %p1, align 4
59 %b2 = load i32, i32* %p2, align 4
60 %b3 = load i32, i32* %p3, align 4
61 %c0 = icmp ne i32 %a0, %b0
62 %c1 = icmp ne i32 %b1, %a1
63 %c2 = icmp ne i32 %b2, %a2
64 %c3 = icmp ne i32 %a3, %b3
65 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
66 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
67 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
68 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
69 %r = sext <4 x i1> %d3 to <4 x i32>
73 define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, float* %b) {
74 ; CHECK-LABEL: @fcmp_oeq_v4i32(
75 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
76 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
77 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], [[A:%.*]]
78 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
79 ; CHECK-NEXT: ret <4 x i32> [[R]]
81 %a0 = extractelement <4 x float> %a, i32 0
82 %a1 = extractelement <4 x float> %a, i32 1
83 %a2 = extractelement <4 x float> %a, i32 2
84 %a3 = extractelement <4 x float> %a, i32 3
85 %p0 = getelementptr inbounds float, float* %b, i32 0
86 %p1 = getelementptr inbounds float, float* %b, i32 1
87 %p2 = getelementptr inbounds float, float* %b, i32 2
88 %p3 = getelementptr inbounds float, float* %b, i32 3
89 %b0 = load float, float* %p0, align 4
90 %b1 = load float, float* %p1, align 4
91 %b2 = load float, float* %p2, align 4
92 %b3 = load float, float* %p3, align 4
93 %c0 = fcmp oeq float %a0, %b0
94 %c1 = fcmp oeq float %b1, %a1
95 %c2 = fcmp oeq float %b2, %a2
96 %c3 = fcmp oeq float %a3, %b3
97 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
98 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
99 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
100 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
101 %r = sext <4 x i1> %d3 to <4 x i32>
105 define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, float* %b) {
106 ; CHECK-LABEL: @fcmp_uno_v4i32(
107 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
108 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
109 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[TMP2]], [[A:%.*]]
110 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
111 ; CHECK-NEXT: ret <4 x i32> [[R]]
113 %a0 = extractelement <4 x float> %a, i32 0
114 %a1 = extractelement <4 x float> %a, i32 1
115 %a2 = extractelement <4 x float> %a, i32 2
116 %a3 = extractelement <4 x float> %a, i32 3
117 %p0 = getelementptr inbounds float, float* %b, i32 0
118 %p1 = getelementptr inbounds float, float* %b, i32 1
119 %p2 = getelementptr inbounds float, float* %b, i32 2
120 %p3 = getelementptr inbounds float, float* %b, i32 3
121 %b0 = load float, float* %p0, align 4
122 %b1 = load float, float* %p1, align 4
123 %b2 = load float, float* %p2, align 4
124 %b3 = load float, float* %p3, align 4
125 %c0 = fcmp uno float %a0, %b0
126 %c1 = fcmp uno float %b1, %a1
127 %c2 = fcmp uno float %b2, %a2
128 %c3 = fcmp uno float %a3, %b3
129 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
130 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
131 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
132 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
133 %r = sext <4 x i1> %d3 to <4 x i32>
138 ; Check that we can commute operands by swapping the predicate.
141 define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, i32* %b) {
142 ; CHECK-LABEL: @icmp_sgt_slt_v4i32(
143 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
144 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
145 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP2]], [[A:%.*]]
146 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
147 ; CHECK-NEXT: ret <4 x i32> [[R]]
149 %a0 = extractelement <4 x i32> %a, i32 0
150 %a1 = extractelement <4 x i32> %a, i32 1
151 %a2 = extractelement <4 x i32> %a, i32 2
152 %a3 = extractelement <4 x i32> %a, i32 3
153 %p0 = getelementptr inbounds i32, i32* %b, i32 0
154 %p1 = getelementptr inbounds i32, i32* %b, i32 1
155 %p2 = getelementptr inbounds i32, i32* %b, i32 2
156 %p3 = getelementptr inbounds i32, i32* %b, i32 3
157 %b0 = load i32, i32* %p0, align 4
158 %b1 = load i32, i32* %p1, align 4
159 %b2 = load i32, i32* %p2, align 4
160 %b3 = load i32, i32* %p3, align 4
161 %c0 = icmp sgt i32 %a0, %b0
162 %c1 = icmp slt i32 %b1, %a1
163 %c2 = icmp slt i32 %b2, %a2
164 %c3 = icmp sgt i32 %a3, %b3
165 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
166 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
167 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
168 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
169 %r = sext <4 x i1> %d3 to <4 x i32>
173 define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, i32* %b) {
174 ; CHECK-LABEL: @icmp_uge_ule_v4i32(
175 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
176 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
177 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[TMP2]], [[A:%.*]]
178 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
179 ; CHECK-NEXT: ret <4 x i32> [[R]]
181 %a0 = extractelement <4 x i32> %a, i32 0
182 %a1 = extractelement <4 x i32> %a, i32 1
183 %a2 = extractelement <4 x i32> %a, i32 2
184 %a3 = extractelement <4 x i32> %a, i32 3
185 %p0 = getelementptr inbounds i32, i32* %b, i32 0
186 %p1 = getelementptr inbounds i32, i32* %b, i32 1
187 %p2 = getelementptr inbounds i32, i32* %b, i32 2
188 %p3 = getelementptr inbounds i32, i32* %b, i32 3
189 %b0 = load i32, i32* %p0, align 4
190 %b1 = load i32, i32* %p1, align 4
191 %b2 = load i32, i32* %p2, align 4
192 %b3 = load i32, i32* %p3, align 4
193 %c0 = icmp uge i32 %a0, %b0
194 %c1 = icmp ule i32 %b1, %a1
195 %c2 = icmp ule i32 %b2, %a2
196 %c3 = icmp uge i32 %a3, %b3
197 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
198 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
199 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
200 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
201 %r = sext <4 x i1> %d3 to <4 x i32>
205 define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
206 ; CHECK-LABEL: @fcmp_ogt_olt_v4i32(
207 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
208 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
209 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[TMP2]], [[A:%.*]]
210 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
211 ; CHECK-NEXT: ret <4 x i32> [[R]]
213 %a0 = extractelement <4 x float> %a, i32 0
214 %a1 = extractelement <4 x float> %a, i32 1
215 %a2 = extractelement <4 x float> %a, i32 2
216 %a3 = extractelement <4 x float> %a, i32 3
217 %p0 = getelementptr inbounds float, float* %b, i32 0
218 %p1 = getelementptr inbounds float, float* %b, i32 1
219 %p2 = getelementptr inbounds float, float* %b, i32 2
220 %p3 = getelementptr inbounds float, float* %b, i32 3
221 %b0 = load float, float* %p0, align 4
222 %b1 = load float, float* %p1, align 4
223 %b2 = load float, float* %p2, align 4
224 %b3 = load float, float* %p3, align 4
225 %c0 = fcmp ogt float %a0, %b0
226 %c1 = fcmp olt float %b1, %a1
227 %c2 = fcmp olt float %b2, %a2
228 %c3 = fcmp ogt float %a3, %b3
229 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
230 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
231 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
232 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
233 %r = sext <4 x i1> %d3 to <4 x i32>
237 define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, float* %b) {
238 ; CHECK-LABEL: @fcmp_ord_uno_v4i32(
239 ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
240 ; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
241 ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
242 ; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
243 ; CHECK-NEXT: [[B0:%.*]] = load float, float* [[B]], align 4
244 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P1]] to <2 x float>*
245 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
246 ; CHECK-NEXT: [[B3:%.*]] = load float, float* [[P3]], align 4
247 ; CHECK-NEXT: [[C0:%.*]] = fcmp ord float [[A0]], [[B0]]
248 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
249 ; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <2 x float> [[TMP2]], [[TMP3]]
250 ; CHECK-NEXT: [[C3:%.*]] = fcmp ord float [[A3]], [[B3]]
251 ; CHECK-NEXT: [[D0:%.*]] = insertelement <4 x i1> undef, i1 [[C0]], i32 0
252 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i1> [[TMP4]], <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
253 ; CHECK-NEXT: [[D21:%.*]] = shufflevector <4 x i1> [[D0]], <4 x i1> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
254 ; CHECK-NEXT: [[D3:%.*]] = insertelement <4 x i1> [[D21]], i1 [[C3]], i32 3
255 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[D3]] to <4 x i32>
256 ; CHECK-NEXT: ret <4 x i32> [[R]]
258 %a0 = extractelement <4 x float> %a, i32 0
259 %a1 = extractelement <4 x float> %a, i32 1
260 %a2 = extractelement <4 x float> %a, i32 2
261 %a3 = extractelement <4 x float> %a, i32 3
262 %p0 = getelementptr inbounds float, float* %b, i32 0
263 %p1 = getelementptr inbounds float, float* %b, i32 1
264 %p2 = getelementptr inbounds float, float* %b, i32 2
265 %p3 = getelementptr inbounds float, float* %b, i32 3
266 %b0 = load float, float* %p0, align 4
267 %b1 = load float, float* %p1, align 4
268 %b2 = load float, float* %p2, align 4
269 %b3 = load float, float* %p3, align 4
270 %c0 = fcmp ord float %a0, %b0
271 %c1 = fcmp uno float %b1, %a1
272 %c2 = fcmp uno float %b2, %a2
273 %c3 = fcmp ord float %a3, %b3
274 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
275 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
276 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
277 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
278 %r = sext <4 x i1> %d3 to <4 x i32>