1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE2
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
13 define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
14 ; SSE2-LABEL: @loadext_2i8_to_2i64(
15 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
16 ; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1
17 ; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1
18 ; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64
19 ; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64
20 ; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
21 ; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
22 ; SSE2-NEXT: ret <2 x i64> [[V1]]
24 ; SLM-LABEL: @loadext_2i8_to_2i64(
25 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
26 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
27 ; SLM-NEXT: ret <2 x i64> [[TMP3]]
29 ; AVX-LABEL: @loadext_2i8_to_2i64(
30 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
31 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
32 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
34 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
35 %i0 = load i8, ptr %p0, align 1
36 %i1 = load i8, ptr %p1, align 1
37 %x0 = zext i8 %i0 to i64
38 %x1 = zext i8 %i1 to i64
39 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
40 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
44 define <4 x i32> @loadext_4i8_to_4i32(ptr %p0) {
45 ; SSE2-LABEL: @loadext_4i8_to_4i32(
46 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
47 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
48 ; SSE2-NEXT: ret <4 x i32> [[TMP3]]
50 ; SLM-LABEL: @loadext_4i8_to_4i32(
51 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
52 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
53 ; SLM-NEXT: ret <4 x i32> [[TMP3]]
55 ; AVX-LABEL: @loadext_4i8_to_4i32(
56 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
57 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
58 ; AVX-NEXT: ret <4 x i32> [[TMP3]]
60 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
61 %p2 = getelementptr inbounds i8, ptr %p0, i64 2
62 %p3 = getelementptr inbounds i8, ptr %p0, i64 3
63 %i0 = load i8, ptr %p0, align 1
64 %i1 = load i8, ptr %p1, align 1
65 %i2 = load i8, ptr %p2, align 1
66 %i3 = load i8, ptr %p3, align 1
67 %x0 = zext i8 %i0 to i32
68 %x1 = zext i8 %i1 to i32
69 %x2 = zext i8 %i2 to i32
70 %x3 = zext i8 %i3 to i32
71 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
72 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
73 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
74 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
78 define <4 x i64> @loadext_4i8_to_4i64(ptr %p0) {
79 ; SSE2-LABEL: @loadext_4i8_to_4i64(
80 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
81 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
82 ; SSE2-NEXT: ret <4 x i64> [[TMP3]]
84 ; SLM-LABEL: @loadext_4i8_to_4i64(
85 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
86 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
87 ; SLM-NEXT: ret <4 x i64> [[TMP3]]
89 ; AVX-LABEL: @loadext_4i8_to_4i64(
90 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
91 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
92 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
94 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
95 %p2 = getelementptr inbounds i8, ptr %p0, i64 2
96 %p3 = getelementptr inbounds i8, ptr %p0, i64 3
97 %i0 = load i8, ptr %p0, align 1
98 %i1 = load i8, ptr %p1, align 1
99 %i2 = load i8, ptr %p2, align 1
100 %i3 = load i8, ptr %p3, align 1
101 %x0 = zext i8 %i0 to i64
102 %x1 = zext i8 %i1 to i64
103 %x2 = zext i8 %i2 to i64
104 %x3 = zext i8 %i3 to i64
105 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
106 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
107 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
108 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
112 define <8 x i16> @loadext_8i8_to_8i16(ptr %p0) {
113 ; SSE2-LABEL: @loadext_8i8_to_8i16(
114 ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
115 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
116 ; SSE2-NEXT: ret <8 x i16> [[TMP3]]
118 ; SLM-LABEL: @loadext_8i8_to_8i16(
119 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
120 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
121 ; SLM-NEXT: ret <8 x i16> [[TMP3]]
123 ; AVX-LABEL: @loadext_8i8_to_8i16(
124 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
125 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
126 ; AVX-NEXT: ret <8 x i16> [[TMP3]]
128 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
129 %p2 = getelementptr inbounds i8, ptr %p0, i64 2
130 %p3 = getelementptr inbounds i8, ptr %p0, i64 3
131 %p4 = getelementptr inbounds i8, ptr %p0, i64 4
132 %p5 = getelementptr inbounds i8, ptr %p0, i64 5
133 %p6 = getelementptr inbounds i8, ptr %p0, i64 6
134 %p7 = getelementptr inbounds i8, ptr %p0, i64 7
135 %i0 = load i8, ptr %p0, align 1
136 %i1 = load i8, ptr %p1, align 1
137 %i2 = load i8, ptr %p2, align 1
138 %i3 = load i8, ptr %p3, align 1
139 %i4 = load i8, ptr %p4, align 1
140 %i5 = load i8, ptr %p5, align 1
141 %i6 = load i8, ptr %p6, align 1
142 %i7 = load i8, ptr %p7, align 1
143 %x0 = zext i8 %i0 to i16
144 %x1 = zext i8 %i1 to i16
145 %x2 = zext i8 %i2 to i16
146 %x3 = zext i8 %i3 to i16
147 %x4 = zext i8 %i4 to i16
148 %x5 = zext i8 %i5 to i16
149 %x6 = zext i8 %i6 to i16
150 %x7 = zext i8 %i7 to i16
151 %v0 = insertelement <8 x i16> undef, i16 %x0, i32 0
152 %v1 = insertelement <8 x i16> %v0, i16 %x1, i32 1
153 %v2 = insertelement <8 x i16> %v1, i16 %x2, i32 2
154 %v3 = insertelement <8 x i16> %v2, i16 %x3, i32 3
155 %v4 = insertelement <8 x i16> %v3, i16 %x4, i32 4
156 %v5 = insertelement <8 x i16> %v4, i16 %x5, i32 5
157 %v6 = insertelement <8 x i16> %v5, i16 %x6, i32 6
158 %v7 = insertelement <8 x i16> %v6, i16 %x7, i32 7
162 define <8 x i32> @loadext_8i8_to_8i32(ptr %p0) {
163 ; SSE2-LABEL: @loadext_8i8_to_8i32(
164 ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
165 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
166 ; SSE2-NEXT: ret <8 x i32> [[TMP3]]
168 ; SLM-LABEL: @loadext_8i8_to_8i32(
169 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
170 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
171 ; SLM-NEXT: ret <8 x i32> [[TMP3]]
173 ; AVX-LABEL: @loadext_8i8_to_8i32(
174 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
175 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
176 ; AVX-NEXT: ret <8 x i32> [[TMP3]]
178 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
179 %p2 = getelementptr inbounds i8, ptr %p0, i64 2
180 %p3 = getelementptr inbounds i8, ptr %p0, i64 3
181 %p4 = getelementptr inbounds i8, ptr %p0, i64 4
182 %p5 = getelementptr inbounds i8, ptr %p0, i64 5
183 %p6 = getelementptr inbounds i8, ptr %p0, i64 6
184 %p7 = getelementptr inbounds i8, ptr %p0, i64 7
185 %i0 = load i8, ptr %p0, align 1
186 %i1 = load i8, ptr %p1, align 1
187 %i2 = load i8, ptr %p2, align 1
188 %i3 = load i8, ptr %p3, align 1
189 %i4 = load i8, ptr %p4, align 1
190 %i5 = load i8, ptr %p5, align 1
191 %i6 = load i8, ptr %p6, align 1
192 %i7 = load i8, ptr %p7, align 1
193 %x0 = zext i8 %i0 to i32
194 %x1 = zext i8 %i1 to i32
195 %x2 = zext i8 %i2 to i32
196 %x3 = zext i8 %i3 to i32
197 %x4 = zext i8 %i4 to i32
198 %x5 = zext i8 %i5 to i32
199 %x6 = zext i8 %i6 to i32
200 %x7 = zext i8 %i7 to i32
201 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
202 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
203 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
204 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
205 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
206 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
207 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
208 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
212 define <16 x i16> @loadext_16i8_to_16i16(ptr %p0) {
213 ; SSE2-LABEL: @loadext_16i8_to_16i16(
214 ; SSE2-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
215 ; SSE2-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
216 ; SSE2-NEXT: ret <16 x i16> [[TMP3]]
218 ; SLM-LABEL: @loadext_16i8_to_16i16(
219 ; SLM-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
220 ; SLM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
221 ; SLM-NEXT: ret <16 x i16> [[TMP3]]
223 ; AVX-LABEL: @loadext_16i8_to_16i16(
224 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
225 ; AVX-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
226 ; AVX-NEXT: ret <16 x i16> [[TMP3]]
228 %p1 = getelementptr inbounds i8, ptr %p0, i64 1
229 %p2 = getelementptr inbounds i8, ptr %p0, i64 2
230 %p3 = getelementptr inbounds i8, ptr %p0, i64 3
231 %p4 = getelementptr inbounds i8, ptr %p0, i64 4
232 %p5 = getelementptr inbounds i8, ptr %p0, i64 5
233 %p6 = getelementptr inbounds i8, ptr %p0, i64 6
234 %p7 = getelementptr inbounds i8, ptr %p0, i64 7
235 %p8 = getelementptr inbounds i8, ptr %p0, i64 8
236 %p9 = getelementptr inbounds i8, ptr %p0, i64 9
237 %p10 = getelementptr inbounds i8, ptr %p0, i64 10
238 %p11 = getelementptr inbounds i8, ptr %p0, i64 11
239 %p12 = getelementptr inbounds i8, ptr %p0, i64 12
240 %p13 = getelementptr inbounds i8, ptr %p0, i64 13
241 %p14 = getelementptr inbounds i8, ptr %p0, i64 14
242 %p15 = getelementptr inbounds i8, ptr %p0, i64 15
243 %i0 = load i8, ptr %p0, align 1
244 %i1 = load i8, ptr %p1, align 1
245 %i2 = load i8, ptr %p2, align 1
246 %i3 = load i8, ptr %p3, align 1
247 %i4 = load i8, ptr %p4, align 1
248 %i5 = load i8, ptr %p5, align 1
249 %i6 = load i8, ptr %p6, align 1
250 %i7 = load i8, ptr %p7, align 1
251 %i8 = load i8, ptr %p8, align 1
252 %i9 = load i8, ptr %p9, align 1
253 %i10 = load i8, ptr %p10, align 1
254 %i11 = load i8, ptr %p11, align 1
255 %i12 = load i8, ptr %p12, align 1
256 %i13 = load i8, ptr %p13, align 1
257 %i14 = load i8, ptr %p14, align 1
258 %i15 = load i8, ptr %p15, align 1
259 %x0 = zext i8 %i0 to i16
260 %x1 = zext i8 %i1 to i16
261 %x2 = zext i8 %i2 to i16
262 %x3 = zext i8 %i3 to i16
263 %x4 = zext i8 %i4 to i16
264 %x5 = zext i8 %i5 to i16
265 %x6 = zext i8 %i6 to i16
266 %x7 = zext i8 %i7 to i16
267 %x8 = zext i8 %i8 to i16
268 %x9 = zext i8 %i9 to i16
269 %x10 = zext i8 %i10 to i16
270 %x11 = zext i8 %i11 to i16
271 %x12 = zext i8 %i12 to i16
272 %x13 = zext i8 %i13 to i16
273 %x14 = zext i8 %i14 to i16
274 %x15 = zext i8 %i15 to i16
275 %v0 = insertelement <16 x i16> undef, i16 %x0, i32 0
276 %v1 = insertelement <16 x i16> %v0, i16 %x1, i32 1
277 %v2 = insertelement <16 x i16> %v1, i16 %x2, i32 2
278 %v3 = insertelement <16 x i16> %v2, i16 %x3, i32 3
279 %v4 = insertelement <16 x i16> %v3, i16 %x4, i32 4
280 %v5 = insertelement <16 x i16> %v4, i16 %x5, i32 5
281 %v6 = insertelement <16 x i16> %v5, i16 %x6, i32 6
282 %v7 = insertelement <16 x i16> %v6, i16 %x7, i32 7
283 %v8 = insertelement <16 x i16> %v7, i16 %x8, i32 8
284 %v9 = insertelement <16 x i16> %v8, i16 %x9, i32 9
285 %v10 = insertelement <16 x i16> %v9, i16 %x10, i32 10
286 %v11 = insertelement <16 x i16> %v10, i16 %x11, i32 11
287 %v12 = insertelement <16 x i16> %v11, i16 %x12, i32 12
288 %v13 = insertelement <16 x i16> %v12, i16 %x13, i32 13
289 %v14 = insertelement <16 x i16> %v13, i16 %x14, i32 14
290 %v15 = insertelement <16 x i16> %v14, i16 %x15, i32 15
298 define <2 x i64> @loadext_2i16_to_2i64(ptr %p0) {
299 ; SSE2-LABEL: @loadext_2i16_to_2i64(
300 ; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
301 ; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
302 ; SSE2-NEXT: ret <2 x i64> [[TMP3]]
304 ; SLM-LABEL: @loadext_2i16_to_2i64(
305 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
306 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
307 ; SLM-NEXT: ret <2 x i64> [[TMP3]]
309 ; AVX-LABEL: @loadext_2i16_to_2i64(
310 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
311 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
312 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
314 %p1 = getelementptr inbounds i16, ptr %p0, i64 1
315 %i0 = load i16, ptr %p0, align 1
316 %i1 = load i16, ptr %p1, align 1
317 %x0 = zext i16 %i0 to i64
318 %x1 = zext i16 %i1 to i64
319 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
320 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
324 define <4 x i32> @loadext_4i16_to_4i32(ptr %p0) {
325 ; SSE2-LABEL: @loadext_4i16_to_4i32(
326 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
327 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
328 ; SSE2-NEXT: ret <4 x i32> [[TMP3]]
330 ; SLM-LABEL: @loadext_4i16_to_4i32(
331 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
332 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
333 ; SLM-NEXT: ret <4 x i32> [[TMP3]]
335 ; AVX-LABEL: @loadext_4i16_to_4i32(
336 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
337 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
338 ; AVX-NEXT: ret <4 x i32> [[TMP3]]
340 %p1 = getelementptr inbounds i16, ptr %p0, i64 1
341 %p2 = getelementptr inbounds i16, ptr %p0, i64 2
342 %p3 = getelementptr inbounds i16, ptr %p0, i64 3
343 %i0 = load i16, ptr %p0, align 1
344 %i1 = load i16, ptr %p1, align 1
345 %i2 = load i16, ptr %p2, align 1
346 %i3 = load i16, ptr %p3, align 1
347 %x0 = zext i16 %i0 to i32
348 %x1 = zext i16 %i1 to i32
349 %x2 = zext i16 %i2 to i32
350 %x3 = zext i16 %i3 to i32
351 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
352 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
353 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
354 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
358 define <4 x i64> @loadext_4i16_to_4i64(ptr %p0) {
359 ; SSE2-LABEL: @loadext_4i16_to_4i64(
360 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
361 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
362 ; SSE2-NEXT: ret <4 x i64> [[TMP3]]
364 ; SLM-LABEL: @loadext_4i16_to_4i64(
365 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
366 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
367 ; SLM-NEXT: ret <4 x i64> [[TMP3]]
369 ; AVX-LABEL: @loadext_4i16_to_4i64(
370 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
371 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
372 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
374 %p1 = getelementptr inbounds i16, ptr %p0, i64 1
375 %p2 = getelementptr inbounds i16, ptr %p0, i64 2
376 %p3 = getelementptr inbounds i16, ptr %p0, i64 3
377 %i0 = load i16, ptr %p0, align 1
378 %i1 = load i16, ptr %p1, align 1
379 %i2 = load i16, ptr %p2, align 1
380 %i3 = load i16, ptr %p3, align 1
381 %x0 = zext i16 %i0 to i64
382 %x1 = zext i16 %i1 to i64
383 %x2 = zext i16 %i2 to i64
384 %x3 = zext i16 %i3 to i64
385 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
386 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
387 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
388 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
392 define <8 x i32> @loadext_8i16_to_8i32(ptr %p0) {
393 ; SSE2-LABEL: @loadext_8i16_to_8i32(
394 ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
395 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
396 ; SSE2-NEXT: ret <8 x i32> [[TMP3]]
398 ; SLM-LABEL: @loadext_8i16_to_8i32(
399 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
400 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
401 ; SLM-NEXT: ret <8 x i32> [[TMP3]]
403 ; AVX-LABEL: @loadext_8i16_to_8i32(
404 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
405 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
406 ; AVX-NEXT: ret <8 x i32> [[TMP3]]
408 %p1 = getelementptr inbounds i16, ptr %p0, i64 1
409 %p2 = getelementptr inbounds i16, ptr %p0, i64 2
410 %p3 = getelementptr inbounds i16, ptr %p0, i64 3
411 %p4 = getelementptr inbounds i16, ptr %p0, i64 4
412 %p5 = getelementptr inbounds i16, ptr %p0, i64 5
413 %p6 = getelementptr inbounds i16, ptr %p0, i64 6
414 %p7 = getelementptr inbounds i16, ptr %p0, i64 7
415 %i0 = load i16, ptr %p0, align 1
416 %i1 = load i16, ptr %p1, align 1
417 %i2 = load i16, ptr %p2, align 1
418 %i3 = load i16, ptr %p3, align 1
419 %i4 = load i16, ptr %p4, align 1
420 %i5 = load i16, ptr %p5, align 1
421 %i6 = load i16, ptr %p6, align 1
422 %i7 = load i16, ptr %p7, align 1
423 %x0 = zext i16 %i0 to i32
424 %x1 = zext i16 %i1 to i32
425 %x2 = zext i16 %i2 to i32
426 %x3 = zext i16 %i3 to i32
427 %x4 = zext i16 %i4 to i32
428 %x5 = zext i16 %i5 to i32
429 %x6 = zext i16 %i6 to i32
430 %x7 = zext i16 %i7 to i32
431 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
432 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
433 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
434 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
435 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
436 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
437 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
438 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
446 define <2 x i64> @loadext_2i32_to_2i64(ptr %p0) {
447 ; SSE2-LABEL: @loadext_2i32_to_2i64(
448 ; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
449 ; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
450 ; SSE2-NEXT: ret <2 x i64> [[TMP3]]
452 ; SLM-LABEL: @loadext_2i32_to_2i64(
453 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
454 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
455 ; SLM-NEXT: ret <2 x i64> [[TMP3]]
457 ; AVX-LABEL: @loadext_2i32_to_2i64(
458 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
459 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
460 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
462 %p1 = getelementptr inbounds i32, ptr %p0, i64 1
463 %i0 = load i32, ptr %p0, align 1
464 %i1 = load i32, ptr %p1, align 1
465 %x0 = zext i32 %i0 to i64
466 %x1 = zext i32 %i1 to i64
467 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
468 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
472 define <4 x i64> @loadext_4i32_to_4i64(ptr %p0) {
473 ; SSE2-LABEL: @loadext_4i32_to_4i64(
474 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
475 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
476 ; SSE2-NEXT: ret <4 x i64> [[TMP3]]
478 ; SLM-LABEL: @loadext_4i32_to_4i64(
479 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
480 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
481 ; SLM-NEXT: ret <4 x i64> [[TMP3]]
483 ; AVX-LABEL: @loadext_4i32_to_4i64(
484 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
485 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
486 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
488 %p1 = getelementptr inbounds i32, ptr %p0, i64 1
489 %p2 = getelementptr inbounds i32, ptr %p0, i64 2
490 %p3 = getelementptr inbounds i32, ptr %p0, i64 3
491 %i0 = load i32, ptr %p0, align 1
492 %i1 = load i32, ptr %p1, align 1
493 %i2 = load i32, ptr %p2, align 1
494 %i3 = load i32, ptr %p3, align 1
495 %x0 = zext i32 %i0 to i64
496 %x1 = zext i32 %i1 to i64
497 %x2 = zext i32 %i2 to i64
498 %x3 = zext i32 %i3 to i64
499 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
500 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
501 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
502 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3