1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64--- -mattr=+sse2 | FileCheck %s
3 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64--- -mattr=+avx | FileCheck %s
6 ; Check that we can commute operands based on the predicate.
9 define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, ptr %b) {
10 ; CHECK-LABEL: @icmp_eq_v4i32(
11 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
12 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], [[A:%.*]]
13 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
14 ; CHECK-NEXT: ret <4 x i32> [[R]]
16 %a0 = extractelement <4 x i32> %a, i32 0
17 %a1 = extractelement <4 x i32> %a, i32 1
18 %a2 = extractelement <4 x i32> %a, i32 2
19 %a3 = extractelement <4 x i32> %a, i32 3
20 %p1 = getelementptr inbounds i32, ptr %b, i32 1
21 %p2 = getelementptr inbounds i32, ptr %b, i32 2
22 %p3 = getelementptr inbounds i32, ptr %b, i32 3
23 %b0 = load i32, ptr %b, align 4
24 %b1 = load i32, ptr %p1, align 4
25 %b2 = load i32, ptr %p2, align 4
26 %b3 = load i32, ptr %p3, align 4
27 %c0 = icmp eq i32 %a0, %b0
28 %c1 = icmp eq i32 %b1, %a1
29 %c2 = icmp eq i32 %b2, %a2
30 %c3 = icmp eq i32 %a3, %b3
31 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
32 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
33 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
34 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
35 %r = sext <4 x i1> %d3 to <4 x i32>
39 define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, ptr %b) {
40 ; CHECK-LABEL: @icmp_ne_v4i32(
41 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
42 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[TMP1]], [[A:%.*]]
43 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
44 ; CHECK-NEXT: ret <4 x i32> [[R]]
46 %a0 = extractelement <4 x i32> %a, i32 0
47 %a1 = extractelement <4 x i32> %a, i32 1
48 %a2 = extractelement <4 x i32> %a, i32 2
49 %a3 = extractelement <4 x i32> %a, i32 3
50 %p1 = getelementptr inbounds i32, ptr %b, i32 1
51 %p2 = getelementptr inbounds i32, ptr %b, i32 2
52 %p3 = getelementptr inbounds i32, ptr %b, i32 3
53 %b0 = load i32, ptr %b, align 4
54 %b1 = load i32, ptr %p1, align 4
55 %b2 = load i32, ptr %p2, align 4
56 %b3 = load i32, ptr %p3, align 4
57 %c0 = icmp ne i32 %a0, %b0
58 %c1 = icmp ne i32 %b1, %a1
59 %c2 = icmp ne i32 %b2, %a2
60 %c3 = icmp ne i32 %a3, %b3
61 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
62 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
63 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
64 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
65 %r = sext <4 x i1> %d3 to <4 x i32>
69 define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, ptr %b) {
70 ; CHECK-LABEL: @fcmp_oeq_v4i32(
71 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
72 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <4 x float> [[TMP1]], [[A:%.*]]
73 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
74 ; CHECK-NEXT: ret <4 x i32> [[R]]
76 %a0 = extractelement <4 x float> %a, i32 0
77 %a1 = extractelement <4 x float> %a, i32 1
78 %a2 = extractelement <4 x float> %a, i32 2
79 %a3 = extractelement <4 x float> %a, i32 3
80 %p1 = getelementptr inbounds float, ptr %b, i32 1
81 %p2 = getelementptr inbounds float, ptr %b, i32 2
82 %p3 = getelementptr inbounds float, ptr %b, i32 3
83 %b0 = load float, ptr %b, align 4
84 %b1 = load float, ptr %p1, align 4
85 %b2 = load float, ptr %p2, align 4
86 %b3 = load float, ptr %p3, align 4
87 %c0 = fcmp oeq float %a0, %b0
88 %c1 = fcmp oeq float %b1, %a1
89 %c2 = fcmp oeq float %b2, %a2
90 %c3 = fcmp oeq float %a3, %b3
91 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
92 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
93 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
94 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
95 %r = sext <4 x i1> %d3 to <4 x i32>
99 define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, ptr %b) {
100 ; CHECK-LABEL: @fcmp_uno_v4i32(
101 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
102 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[TMP1]], [[A:%.*]]
103 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
104 ; CHECK-NEXT: ret <4 x i32> [[R]]
106 %a0 = extractelement <4 x float> %a, i32 0
107 %a1 = extractelement <4 x float> %a, i32 1
108 %a2 = extractelement <4 x float> %a, i32 2
109 %a3 = extractelement <4 x float> %a, i32 3
110 %p1 = getelementptr inbounds float, ptr %b, i32 1
111 %p2 = getelementptr inbounds float, ptr %b, i32 2
112 %p3 = getelementptr inbounds float, ptr %b, i32 3
113 %b0 = load float, ptr %b, align 4
114 %b1 = load float, ptr %p1, align 4
115 %b2 = load float, ptr %p2, align 4
116 %b3 = load float, ptr %p3, align 4
117 %c0 = fcmp uno float %a0, %b0
118 %c1 = fcmp uno float %b1, %a1
119 %c2 = fcmp uno float %b2, %a2
120 %c3 = fcmp uno float %a3, %b3
121 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
122 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
123 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
124 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
125 %r = sext <4 x i1> %d3 to <4 x i32>
130 ; Check that we can commute operands by swapping the predicate.
133 define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, ptr %b) {
134 ; CHECK-LABEL: @icmp_sgt_slt_v4i32(
135 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
136 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[TMP1]]
137 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
138 ; CHECK-NEXT: ret <4 x i32> [[R]]
140 %a0 = extractelement <4 x i32> %a, i32 0
141 %a1 = extractelement <4 x i32> %a, i32 1
142 %a2 = extractelement <4 x i32> %a, i32 2
143 %a3 = extractelement <4 x i32> %a, i32 3
144 %p1 = getelementptr inbounds i32, ptr %b, i32 1
145 %p2 = getelementptr inbounds i32, ptr %b, i32 2
146 %p3 = getelementptr inbounds i32, ptr %b, i32 3
147 %b0 = load i32, ptr %b, align 4
148 %b1 = load i32, ptr %p1, align 4
149 %b2 = load i32, ptr %p2, align 4
150 %b3 = load i32, ptr %p3, align 4
151 %c0 = icmp sgt i32 %a0, %b0
152 %c1 = icmp slt i32 %b1, %a1
153 %c2 = icmp slt i32 %b2, %a2
154 %c3 = icmp sgt i32 %a3, %b3
155 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
156 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
157 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
158 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
159 %r = sext <4 x i1> %d3 to <4 x i32>
163 define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, ptr %b) {
164 ; CHECK-LABEL: @icmp_uge_ule_v4i32(
165 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
166 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[TMP1]]
167 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
168 ; CHECK-NEXT: ret <4 x i32> [[R]]
170 %a0 = extractelement <4 x i32> %a, i32 0
171 %a1 = extractelement <4 x i32> %a, i32 1
172 %a2 = extractelement <4 x i32> %a, i32 2
173 %a3 = extractelement <4 x i32> %a, i32 3
174 %p1 = getelementptr inbounds i32, ptr %b, i32 1
175 %p2 = getelementptr inbounds i32, ptr %b, i32 2
176 %p3 = getelementptr inbounds i32, ptr %b, i32 3
177 %b0 = load i32, ptr %b, align 4
178 %b1 = load i32, ptr %p1, align 4
179 %b2 = load i32, ptr %p2, align 4
180 %b3 = load i32, ptr %p3, align 4
181 %c0 = icmp uge i32 %a0, %b0
182 %c1 = icmp ule i32 %b1, %a1
183 %c2 = icmp ule i32 %b2, %a2
184 %c3 = icmp uge i32 %a3, %b3
185 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
186 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
187 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
188 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
189 %r = sext <4 x i1> %d3 to <4 x i32>
193 define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, ptr %b) {
194 ; CHECK-LABEL: @fcmp_ogt_olt_v4i32(
195 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
196 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[A:%.*]], [[TMP1]]
197 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
198 ; CHECK-NEXT: ret <4 x i32> [[R]]
200 %a0 = extractelement <4 x float> %a, i32 0
201 %a1 = extractelement <4 x float> %a, i32 1
202 %a2 = extractelement <4 x float> %a, i32 2
203 %a3 = extractelement <4 x float> %a, i32 3
204 %p1 = getelementptr inbounds float, ptr %b, i32 1
205 %p2 = getelementptr inbounds float, ptr %b, i32 2
206 %p3 = getelementptr inbounds float, ptr %b, i32 3
207 %b0 = load float, ptr %b, align 4
208 %b1 = load float, ptr %p1, align 4
209 %b2 = load float, ptr %p2, align 4
210 %b3 = load float, ptr %p3, align 4
211 %c0 = fcmp ogt float %a0, %b0
212 %c1 = fcmp olt float %b1, %a1
213 %c2 = fcmp olt float %b2, %a2
214 %c3 = fcmp ogt float %a3, %b3
215 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
216 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
217 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
218 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
219 %r = sext <4 x i1> %d3 to <4 x i32>
223 define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, ptr %b) {
224 ; CHECK-LABEL: @fcmp_ord_uno_v4i32(
225 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
226 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp ord <4 x float> [[TMP1]], [[A:%.*]]
227 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[TMP1]], [[A]]
228 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
229 ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32>
230 ; CHECK-NEXT: ret <4 x i32> [[R]]
232 %a0 = extractelement <4 x float> %a, i32 0
233 %a1 = extractelement <4 x float> %a, i32 1
234 %a2 = extractelement <4 x float> %a, i32 2
235 %a3 = extractelement <4 x float> %a, i32 3
236 %p1 = getelementptr inbounds float, ptr %b, i32 1
237 %p2 = getelementptr inbounds float, ptr %b, i32 2
238 %p3 = getelementptr inbounds float, ptr %b, i32 3
239 %b0 = load float, ptr %b, align 4
240 %b1 = load float, ptr %p1, align 4
241 %b2 = load float, ptr %p2, align 4
242 %b3 = load float, ptr %p3, align 4
243 %c0 = fcmp ord float %a0, %b0
244 %c1 = fcmp uno float %b1, %a1
245 %c2 = fcmp uno float %b2, %a2
246 %c3 = fcmp ord float %a3, %b3
247 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0
248 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1
249 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2
250 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3
251 %r = sext <4 x i1> %d3 to <4 x i32>