1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE2
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
13 define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
14 ; SSE2-LABEL: @loadext_2i8_to_2i64(
15 ; SSE2-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
16 ; SSE2-NEXT: [[V1:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64>
17 ; SSE2-NEXT: ret <2 x i64> [[V1]]
19 ; SLM-LABEL: @loadext_2i8_to_2i64(
20 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
21 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
22 ; SLM-NEXT: ret <2 x i64> [[TMP3]]
24 ; AVX-LABEL: @loadext_2i8_to_2i64(
25 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
26 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
27 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
29 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
30 %i0 = load i8, ptr %p0, align 1
31 %i1 = load i8, ptr %p1, align 1
32 %x0 = zext i8 %i0 to i64
33 %x1 = zext i8 %i1 to i64
34 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
35 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
39 define <4 x i32> @loadext_4i8_to_4i32(ptr %p0) {
40 ; SSE2-LABEL: @loadext_4i8_to_4i32(
41 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
42 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
43 ; SSE2-NEXT: ret <4 x i32> [[TMP3]]
45 ; SLM-LABEL: @loadext_4i8_to_4i32(
46 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
47 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
48 ; SLM-NEXT: ret <4 x i32> [[TMP3]]
50 ; AVX-LABEL: @loadext_4i8_to_4i32(
51 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
52 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
53 ; AVX-NEXT: ret <4 x i32> [[TMP3]]
55 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
56 %p2 = getelementptr inbounds i8, ptr %p0, i64 2
57 %p3 = getelementptr inbounds i8, ptr %p0, i64 3
58 %i0 = load i8, ptr %p0, align 1
59 %i1 = load i8, ptr %p1, align 1
60 %i2 = load i8, ptr %p2, align 1
61 %i3 = load i8, ptr %p3, align 1
62 %x0 = zext i8 %i0 to i32
63 %x1 = zext i8 %i1 to i32
64 %x2 = zext i8 %i2 to i32
65 %x3 = zext i8 %i3 to i32
66 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
67 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
68 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
69 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
73 define <4 x i64> @loadext_4i8_to_4i64(ptr %p0) {
74 ; SSE2-LABEL: @loadext_4i8_to_4i64(
75 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
76 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
77 ; SSE2-NEXT: ret <4 x i64> [[TMP3]]
79 ; SLM-LABEL: @loadext_4i8_to_4i64(
80 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
81 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
82 ; SLM-NEXT: ret <4 x i64> [[TMP3]]
84 ; AVX-LABEL: @loadext_4i8_to_4i64(
85 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
86 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
87 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
89 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
90 %p2 = getelementptr inbounds i8, ptr %p0, i64 2
91 %p3 = getelementptr inbounds i8, ptr %p0, i64 3
92 %i0 = load i8, ptr %p0, align 1
93 %i1 = load i8, ptr %p1, align 1
94 %i2 = load i8, ptr %p2, align 1
95 %i3 = load i8, ptr %p3, align 1
96 %x0 = zext i8 %i0 to i64
97 %x1 = zext i8 %i1 to i64
98 %x2 = zext i8 %i2 to i64
99 %x3 = zext i8 %i3 to i64
100 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
101 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
102 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
103 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
107 define <8 x i16> @loadext_8i8_to_8i16(ptr %p0) {
108 ; SSE2-LABEL: @loadext_8i8_to_8i16(
109 ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
110 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
111 ; SSE2-NEXT: ret <8 x i16> [[TMP3]]
113 ; SLM-LABEL: @loadext_8i8_to_8i16(
114 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
115 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
116 ; SLM-NEXT: ret <8 x i16> [[TMP3]]
118 ; AVX-LABEL: @loadext_8i8_to_8i16(
119 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
120 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
121 ; AVX-NEXT: ret <8 x i16> [[TMP3]]
123 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
124 %p2 = getelementptr inbounds i8, ptr %p0, i64 2
125 %p3 = getelementptr inbounds i8, ptr %p0, i64 3
126 %p4 = getelementptr inbounds i8, ptr %p0, i64 4
127 %p5 = getelementptr inbounds i8, ptr %p0, i64 5
128 %p6 = getelementptr inbounds i8, ptr %p0, i64 6
129 %p7 = getelementptr inbounds i8, ptr %p0, i64 7
130 %i0 = load i8, ptr %p0, align 1
131 %i1 = load i8, ptr %p1, align 1
132 %i2 = load i8, ptr %p2, align 1
133 %i3 = load i8, ptr %p3, align 1
134 %i4 = load i8, ptr %p4, align 1
135 %i5 = load i8, ptr %p5, align 1
136 %i6 = load i8, ptr %p6, align 1
137 %i7 = load i8, ptr %p7, align 1
138 %x0 = zext i8 %i0 to i16
139 %x1 = zext i8 %i1 to i16
140 %x2 = zext i8 %i2 to i16
141 %x3 = zext i8 %i3 to i16
142 %x4 = zext i8 %i4 to i16
143 %x5 = zext i8 %i5 to i16
144 %x6 = zext i8 %i6 to i16
145 %x7 = zext i8 %i7 to i16
146 %v0 = insertelement <8 x i16> undef, i16 %x0, i32 0
147 %v1 = insertelement <8 x i16> %v0, i16 %x1, i32 1
148 %v2 = insertelement <8 x i16> %v1, i16 %x2, i32 2
149 %v3 = insertelement <8 x i16> %v2, i16 %x3, i32 3
150 %v4 = insertelement <8 x i16> %v3, i16 %x4, i32 4
151 %v5 = insertelement <8 x i16> %v4, i16 %x5, i32 5
152 %v6 = insertelement <8 x i16> %v5, i16 %x6, i32 6
153 %v7 = insertelement <8 x i16> %v6, i16 %x7, i32 7
157 define <8 x i32> @loadext_8i8_to_8i32(ptr %p0) {
158 ; SSE2-LABEL: @loadext_8i8_to_8i32(
159 ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
160 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
161 ; SSE2-NEXT: ret <8 x i32> [[TMP3]]
163 ; SLM-LABEL: @loadext_8i8_to_8i32(
164 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
165 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
166 ; SLM-NEXT: ret <8 x i32> [[TMP3]]
168 ; AVX-LABEL: @loadext_8i8_to_8i32(
169 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
170 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
171 ; AVX-NEXT: ret <8 x i32> [[TMP3]]
173 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
174 %p2 = getelementptr inbounds i8, ptr %p0, i64 2
175 %p3 = getelementptr inbounds i8, ptr %p0, i64 3
176 %p4 = getelementptr inbounds i8, ptr %p0, i64 4
177 %p5 = getelementptr inbounds i8, ptr %p0, i64 5
178 %p6 = getelementptr inbounds i8, ptr %p0, i64 6
179 %p7 = getelementptr inbounds i8, ptr %p0, i64 7
180 %i0 = load i8, ptr %p0, align 1
181 %i1 = load i8, ptr %p1, align 1
182 %i2 = load i8, ptr %p2, align 1
183 %i3 = load i8, ptr %p3, align 1
184 %i4 = load i8, ptr %p4, align 1
185 %i5 = load i8, ptr %p5, align 1
186 %i6 = load i8, ptr %p6, align 1
187 %i7 = load i8, ptr %p7, align 1
188 %x0 = zext i8 %i0 to i32
189 %x1 = zext i8 %i1 to i32
190 %x2 = zext i8 %i2 to i32
191 %x3 = zext i8 %i3 to i32
192 %x4 = zext i8 %i4 to i32
193 %x5 = zext i8 %i5 to i32
194 %x6 = zext i8 %i6 to i32
195 %x7 = zext i8 %i7 to i32
196 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
197 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
198 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
199 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
200 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
201 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
202 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
203 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
207 define <16 x i16> @loadext_16i8_to_16i16(ptr %p0) {
208 ; SSE2-LABEL: @loadext_16i8_to_16i16(
209 ; SSE2-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
210 ; SSE2-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
211 ; SSE2-NEXT: ret <16 x i16> [[TMP3]]
213 ; SLM-LABEL: @loadext_16i8_to_16i16(
214 ; SLM-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
215 ; SLM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
216 ; SLM-NEXT: ret <16 x i16> [[TMP3]]
218 ; AVX-LABEL: @loadext_16i8_to_16i16(
219 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
220 ; AVX-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
221 ; AVX-NEXT: ret <16 x i16> [[TMP3]]
223 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
224 %p2 = getelementptr inbounds i8, ptr %p0, i64 2
225 %p3 = getelementptr inbounds i8, ptr %p0, i64 3
226 %p4 = getelementptr inbounds i8, ptr %p0, i64 4
227 %p5 = getelementptr inbounds i8, ptr %p0, i64 5
228 %p6 = getelementptr inbounds i8, ptr %p0, i64 6
229 %p7 = getelementptr inbounds i8, ptr %p0, i64 7
230 %p8 = getelementptr inbounds i8, ptr %p0, i64 8
231 %p9 = getelementptr inbounds i8, ptr %p0, i64 9
232 %p10 = getelementptr inbounds i8, ptr %p0, i64 10
233 %p11 = getelementptr inbounds i8, ptr %p0, i64 11
234 %p12 = getelementptr inbounds i8, ptr %p0, i64 12
235 %p13 = getelementptr inbounds i8, ptr %p0, i64 13
236 %p14 = getelementptr inbounds i8, ptr %p0, i64 14
237 %p15 = getelementptr inbounds i8, ptr %p0, i64 15
238 %i0 = load i8, ptr %p0, align 1
239 %i1 = load i8, ptr %p1, align 1
240 %i2 = load i8, ptr %p2, align 1
241 %i3 = load i8, ptr %p3, align 1
242 %i4 = load i8, ptr %p4, align 1
243 %i5 = load i8, ptr %p5, align 1
244 %i6 = load i8, ptr %p6, align 1
245 %i7 = load i8, ptr %p7, align 1
246 %i8 = load i8, ptr %p8, align 1
247 %i9 = load i8, ptr %p9, align 1
248 %i10 = load i8, ptr %p10, align 1
249 %i11 = load i8, ptr %p11, align 1
250 %i12 = load i8, ptr %p12, align 1
251 %i13 = load i8, ptr %p13, align 1
252 %i14 = load i8, ptr %p14, align 1
253 %i15 = load i8, ptr %p15, align 1
254 %x0 = zext i8 %i0 to i16
255 %x1 = zext i8 %i1 to i16
256 %x2 = zext i8 %i2 to i16
257 %x3 = zext i8 %i3 to i16
258 %x4 = zext i8 %i4 to i16
259 %x5 = zext i8 %i5 to i16
260 %x6 = zext i8 %i6 to i16
261 %x7 = zext i8 %i7 to i16
262 %x8 = zext i8 %i8 to i16
263 %x9 = zext i8 %i9 to i16
264 %x10 = zext i8 %i10 to i16
265 %x11 = zext i8 %i11 to i16
266 %x12 = zext i8 %i12 to i16
267 %x13 = zext i8 %i13 to i16
268 %x14 = zext i8 %i14 to i16
269 %x15 = zext i8 %i15 to i16
270 %v0 = insertelement <16 x i16> undef, i16 %x0, i32 0
271 %v1 = insertelement <16 x i16> %v0, i16 %x1, i32 1
272 %v2 = insertelement <16 x i16> %v1, i16 %x2, i32 2
273 %v3 = insertelement <16 x i16> %v2, i16 %x3, i32 3
274 %v4 = insertelement <16 x i16> %v3, i16 %x4, i32 4
275 %v5 = insertelement <16 x i16> %v4, i16 %x5, i32 5
276 %v6 = insertelement <16 x i16> %v5, i16 %x6, i32 6
277 %v7 = insertelement <16 x i16> %v6, i16 %x7, i32 7
278 %v8 = insertelement <16 x i16> %v7, i16 %x8, i32 8
279 %v9 = insertelement <16 x i16> %v8, i16 %x9, i32 9
280 %v10 = insertelement <16 x i16> %v9, i16 %x10, i32 10
281 %v11 = insertelement <16 x i16> %v10, i16 %x11, i32 11
282 %v12 = insertelement <16 x i16> %v11, i16 %x12, i32 12
283 %v13 = insertelement <16 x i16> %v12, i16 %x13, i32 13
284 %v14 = insertelement <16 x i16> %v13, i16 %x14, i32 14
285 %v15 = insertelement <16 x i16> %v14, i16 %x15, i32 15
293 define <2 x i64> @loadext_2i16_to_2i64(ptr %p0) {
294 ; SSE2-LABEL: @loadext_2i16_to_2i64(
295 ; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
296 ; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
297 ; SSE2-NEXT: ret <2 x i64> [[TMP3]]
299 ; SLM-LABEL: @loadext_2i16_to_2i64(
300 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
301 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
302 ; SLM-NEXT: ret <2 x i64> [[TMP3]]
304 ; AVX-LABEL: @loadext_2i16_to_2i64(
305 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
306 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
307 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
309 %p1 = getelementptr inbounds i16, ptr %p0, i64 1
310 %i0 = load i16, ptr %p0, align 1
311 %i1 = load i16, ptr %p1, align 1
312 %x0 = zext i16 %i0 to i64
313 %x1 = zext i16 %i1 to i64
314 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
315 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
319 define <4 x i32> @loadext_4i16_to_4i32(ptr %p0) {
320 ; SSE2-LABEL: @loadext_4i16_to_4i32(
321 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
322 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
323 ; SSE2-NEXT: ret <4 x i32> [[TMP3]]
325 ; SLM-LABEL: @loadext_4i16_to_4i32(
326 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
327 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
328 ; SLM-NEXT: ret <4 x i32> [[TMP3]]
330 ; AVX-LABEL: @loadext_4i16_to_4i32(
331 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
332 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
333 ; AVX-NEXT: ret <4 x i32> [[TMP3]]
335 %p1 = getelementptr inbounds i16, ptr %p0, i64 1
336 %p2 = getelementptr inbounds i16, ptr %p0, i64 2
337 %p3 = getelementptr inbounds i16, ptr %p0, i64 3
338 %i0 = load i16, ptr %p0, align 1
339 %i1 = load i16, ptr %p1, align 1
340 %i2 = load i16, ptr %p2, align 1
341 %i3 = load i16, ptr %p3, align 1
342 %x0 = zext i16 %i0 to i32
343 %x1 = zext i16 %i1 to i32
344 %x2 = zext i16 %i2 to i32
345 %x3 = zext i16 %i3 to i32
346 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
347 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
348 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
349 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
353 define <4 x i64> @loadext_4i16_to_4i64(ptr %p0) {
354 ; SSE2-LABEL: @loadext_4i16_to_4i64(
355 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
356 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
357 ; SSE2-NEXT: ret <4 x i64> [[TMP3]]
359 ; SLM-LABEL: @loadext_4i16_to_4i64(
360 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
361 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
362 ; SLM-NEXT: ret <4 x i64> [[TMP3]]
364 ; AVX-LABEL: @loadext_4i16_to_4i64(
365 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
366 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
367 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
369 %p1 = getelementptr inbounds i16, ptr %p0, i64 1
370 %p2 = getelementptr inbounds i16, ptr %p0, i64 2
371 %p3 = getelementptr inbounds i16, ptr %p0, i64 3
372 %i0 = load i16, ptr %p0, align 1
373 %i1 = load i16, ptr %p1, align 1
374 %i2 = load i16, ptr %p2, align 1
375 %i3 = load i16, ptr %p3, align 1
376 %x0 = zext i16 %i0 to i64
377 %x1 = zext i16 %i1 to i64
378 %x2 = zext i16 %i2 to i64
379 %x3 = zext i16 %i3 to i64
380 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
381 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
382 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
383 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
387 define <8 x i32> @loadext_8i16_to_8i32(ptr %p0) {
388 ; SSE2-LABEL: @loadext_8i16_to_8i32(
389 ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
390 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
391 ; SSE2-NEXT: ret <8 x i32> [[TMP3]]
393 ; SLM-LABEL: @loadext_8i16_to_8i32(
394 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
395 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
396 ; SLM-NEXT: ret <8 x i32> [[TMP3]]
398 ; AVX-LABEL: @loadext_8i16_to_8i32(
399 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
400 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
401 ; AVX-NEXT: ret <8 x i32> [[TMP3]]
403 %p1 = getelementptr inbounds i16, ptr %p0, i64 1
404 %p2 = getelementptr inbounds i16, ptr %p0, i64 2
405 %p3 = getelementptr inbounds i16, ptr %p0, i64 3
406 %p4 = getelementptr inbounds i16, ptr %p0, i64 4
407 %p5 = getelementptr inbounds i16, ptr %p0, i64 5
408 %p6 = getelementptr inbounds i16, ptr %p0, i64 6
409 %p7 = getelementptr inbounds i16, ptr %p0, i64 7
410 %i0 = load i16, ptr %p0, align 1
411 %i1 = load i16, ptr %p1, align 1
412 %i2 = load i16, ptr %p2, align 1
413 %i3 = load i16, ptr %p3, align 1
414 %i4 = load i16, ptr %p4, align 1
415 %i5 = load i16, ptr %p5, align 1
416 %i6 = load i16, ptr %p6, align 1
417 %i7 = load i16, ptr %p7, align 1
418 %x0 = zext i16 %i0 to i32
419 %x1 = zext i16 %i1 to i32
420 %x2 = zext i16 %i2 to i32
421 %x3 = zext i16 %i3 to i32
422 %x4 = zext i16 %i4 to i32
423 %x5 = zext i16 %i5 to i32
424 %x6 = zext i16 %i6 to i32
425 %x7 = zext i16 %i7 to i32
426 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
427 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
428 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
429 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
430 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
431 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
432 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
433 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
441 define <2 x i64> @loadext_2i32_to_2i64(ptr %p0) {
442 ; SSE2-LABEL: @loadext_2i32_to_2i64(
443 ; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
444 ; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
445 ; SSE2-NEXT: ret <2 x i64> [[TMP3]]
447 ; SLM-LABEL: @loadext_2i32_to_2i64(
448 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
449 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
450 ; SLM-NEXT: ret <2 x i64> [[TMP3]]
452 ; AVX-LABEL: @loadext_2i32_to_2i64(
453 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
454 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
455 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
457 %p1 = getelementptr inbounds i32, ptr %p0, i64 1
458 %i0 = load i32, ptr %p0, align 1
459 %i1 = load i32, ptr %p1, align 1
460 %x0 = zext i32 %i0 to i64
461 %x1 = zext i32 %i1 to i64
462 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
463 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
467 define <4 x i64> @loadext_4i32_to_4i64(ptr %p0) {
468 ; SSE2-LABEL: @loadext_4i32_to_4i64(
469 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
470 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
471 ; SSE2-NEXT: ret <4 x i64> [[TMP3]]
473 ; SLM-LABEL: @loadext_4i32_to_4i64(
474 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
475 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
476 ; SLM-NEXT: ret <4 x i64> [[TMP3]]
478 ; AVX-LABEL: @loadext_4i32_to_4i64(
479 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
480 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
481 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
483 %p1 = getelementptr inbounds i32, ptr %p0, i64 1
484 %p2 = getelementptr inbounds i32, ptr %p0, i64 2
485 %p3 = getelementptr inbounds i32, ptr %p0, i64 3
486 %i0 = load i32, ptr %p0, align 1
487 %i1 = load i32, ptr %p1, align 1
488 %i2 = load i32, ptr %p2, align 1
489 %i3 = load i32, ptr %p3, align 1
490 %x0 = zext i32 %i0 to i64
491 %x1 = zext i32 %i1 to i64
492 %x2 = zext i32 %i2 to i64
493 %x3 = zext i32 %i3 to i64
494 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
495 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
496 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
497 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3