1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SSE2
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
13 define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
14 ; SSE2-LABEL: @loadext_2i8_to_2i64(
15 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
16 ; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
17 ; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
18 ; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64
19 ; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64
20 ; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
21 ; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
22 ; SSE2-NEXT: ret <2 x i64> [[V1]]
24 ; SLM-LABEL: @loadext_2i8_to_2i64(
25 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
26 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
27 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
28 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
29 ; SLM-NEXT: ret <2 x i64> [[TMP3]]
31 ; AVX-LABEL: @loadext_2i8_to_2i64(
32 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
33 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
34 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
35 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
36 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
38 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
39 %i0 = load i8, i8* %p0, align 1
40 %i1 = load i8, i8* %p1, align 1
41 %x0 = zext i8 %i0 to i64
42 %x1 = zext i8 %i1 to i64
43 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
44 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
48 define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
49 ; SSE2-LABEL: @loadext_4i8_to_4i32(
50 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
51 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
52 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
53 ; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
54 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
55 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
56 ; SSE2-NEXT: ret <4 x i32> [[TMP3]]
58 ; SLM-LABEL: @loadext_4i8_to_4i32(
59 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
60 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
61 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
62 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
63 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
64 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
65 ; SLM-NEXT: ret <4 x i32> [[TMP3]]
67 ; AVX-LABEL: @loadext_4i8_to_4i32(
68 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
69 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
70 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
71 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
72 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
73 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
74 ; AVX-NEXT: ret <4 x i32> [[TMP3]]
76 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
77 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
78 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
79 %i0 = load i8, i8* %p0, align 1
80 %i1 = load i8, i8* %p1, align 1
81 %i2 = load i8, i8* %p2, align 1
82 %i3 = load i8, i8* %p3, align 1
83 %x0 = zext i8 %i0 to i32
84 %x1 = zext i8 %i1 to i32
85 %x2 = zext i8 %i2 to i32
86 %x3 = zext i8 %i3 to i32
87 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
88 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
89 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
90 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
94 define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
95 ; SSE2-LABEL: @loadext_4i8_to_4i64(
96 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
97 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
98 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
99 ; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
100 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
101 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
102 ; SSE2-NEXT: ret <4 x i64> [[TMP3]]
104 ; SLM-LABEL: @loadext_4i8_to_4i64(
105 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
106 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
107 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
108 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
109 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
110 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
111 ; SLM-NEXT: ret <4 x i64> [[TMP3]]
113 ; AVX-LABEL: @loadext_4i8_to_4i64(
114 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
115 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
116 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
117 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
118 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
119 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
120 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
122 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
123 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
124 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
125 %i0 = load i8, i8* %p0, align 1
126 %i1 = load i8, i8* %p1, align 1
127 %i2 = load i8, i8* %p2, align 1
128 %i3 = load i8, i8* %p3, align 1
129 %x0 = zext i8 %i0 to i64
130 %x1 = zext i8 %i1 to i64
131 %x2 = zext i8 %i2 to i64
132 %x3 = zext i8 %i3 to i64
133 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
134 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
135 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
136 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
140 define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
141 ; SSE2-LABEL: @loadext_8i8_to_8i16(
142 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
143 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
144 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
145 ; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
146 ; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
147 ; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
148 ; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
149 ; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
150 ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
151 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
152 ; SSE2-NEXT: ret <8 x i16> [[TMP3]]
154 ; SLM-LABEL: @loadext_8i8_to_8i16(
155 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
156 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
157 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
158 ; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
159 ; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
160 ; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
161 ; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
162 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
163 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
164 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
165 ; SLM-NEXT: ret <8 x i16> [[TMP3]]
167 ; AVX-LABEL: @loadext_8i8_to_8i16(
168 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
169 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
170 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
171 ; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
172 ; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
173 ; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
174 ; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
175 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
176 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
177 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
178 ; AVX-NEXT: ret <8 x i16> [[TMP3]]
180 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
181 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
182 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
183 %p4 = getelementptr inbounds i8, i8* %p0, i64 4
184 %p5 = getelementptr inbounds i8, i8* %p0, i64 5
185 %p6 = getelementptr inbounds i8, i8* %p0, i64 6
186 %p7 = getelementptr inbounds i8, i8* %p0, i64 7
187 %i0 = load i8, i8* %p0, align 1
188 %i1 = load i8, i8* %p1, align 1
189 %i2 = load i8, i8* %p2, align 1
190 %i3 = load i8, i8* %p3, align 1
191 %i4 = load i8, i8* %p4, align 1
192 %i5 = load i8, i8* %p5, align 1
193 %i6 = load i8, i8* %p6, align 1
194 %i7 = load i8, i8* %p7, align 1
195 %x0 = zext i8 %i0 to i16
196 %x1 = zext i8 %i1 to i16
197 %x2 = zext i8 %i2 to i16
198 %x3 = zext i8 %i3 to i16
199 %x4 = zext i8 %i4 to i16
200 %x5 = zext i8 %i5 to i16
201 %x6 = zext i8 %i6 to i16
202 %x7 = zext i8 %i7 to i16
203 %v0 = insertelement <8 x i16> undef, i16 %x0, i32 0
204 %v1 = insertelement <8 x i16> %v0, i16 %x1, i32 1
205 %v2 = insertelement <8 x i16> %v1, i16 %x2, i32 2
206 %v3 = insertelement <8 x i16> %v2, i16 %x3, i32 3
207 %v4 = insertelement <8 x i16> %v3, i16 %x4, i32 4
208 %v5 = insertelement <8 x i16> %v4, i16 %x5, i32 5
209 %v6 = insertelement <8 x i16> %v5, i16 %x6, i32 6
210 %v7 = insertelement <8 x i16> %v6, i16 %x7, i32 7
214 define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
215 ; SSE2-LABEL: @loadext_8i8_to_8i32(
216 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
217 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
218 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
219 ; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
220 ; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
221 ; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
222 ; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
223 ; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
224 ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
225 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
226 ; SSE2-NEXT: ret <8 x i32> [[TMP3]]
228 ; SLM-LABEL: @loadext_8i8_to_8i32(
229 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
230 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
231 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
232 ; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
233 ; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
234 ; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
235 ; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
236 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
237 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
238 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
239 ; SLM-NEXT: ret <8 x i32> [[TMP3]]
241 ; AVX-LABEL: @loadext_8i8_to_8i32(
242 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
243 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
244 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
245 ; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
246 ; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
247 ; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
248 ; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
249 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
250 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
251 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
252 ; AVX-NEXT: ret <8 x i32> [[TMP3]]
254 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
255 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
256 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
257 %p4 = getelementptr inbounds i8, i8* %p0, i64 4
258 %p5 = getelementptr inbounds i8, i8* %p0, i64 5
259 %p6 = getelementptr inbounds i8, i8* %p0, i64 6
260 %p7 = getelementptr inbounds i8, i8* %p0, i64 7
261 %i0 = load i8, i8* %p0, align 1
262 %i1 = load i8, i8* %p1, align 1
263 %i2 = load i8, i8* %p2, align 1
264 %i3 = load i8, i8* %p3, align 1
265 %i4 = load i8, i8* %p4, align 1
266 %i5 = load i8, i8* %p5, align 1
267 %i6 = load i8, i8* %p6, align 1
268 %i7 = load i8, i8* %p7, align 1
269 %x0 = zext i8 %i0 to i32
270 %x1 = zext i8 %i1 to i32
271 %x2 = zext i8 %i2 to i32
272 %x3 = zext i8 %i3 to i32
273 %x4 = zext i8 %i4 to i32
274 %x5 = zext i8 %i5 to i32
275 %x6 = zext i8 %i6 to i32
276 %x7 = zext i8 %i7 to i32
277 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
278 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
279 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
280 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
281 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
282 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
283 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
284 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
288 define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
289 ; SSE2-LABEL: @loadext_16i8_to_16i16(
290 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
291 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
292 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
293 ; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
294 ; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
295 ; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
296 ; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
297 ; SSE2-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8
298 ; SSE2-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9
299 ; SSE2-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10
300 ; SSE2-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11
301 ; SSE2-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12
302 ; SSE2-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13
303 ; SSE2-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14
304 ; SSE2-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15
305 ; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>*
306 ; SSE2-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
307 ; SSE2-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
308 ; SSE2-NEXT: ret <16 x i16> [[TMP3]]
310 ; SLM-LABEL: @loadext_16i8_to_16i16(
311 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
312 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
313 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
314 ; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
315 ; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
316 ; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
317 ; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
318 ; SLM-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8
319 ; SLM-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9
320 ; SLM-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10
321 ; SLM-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11
322 ; SLM-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12
323 ; SLM-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13
324 ; SLM-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14
325 ; SLM-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15
326 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>*
327 ; SLM-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
328 ; SLM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
329 ; SLM-NEXT: ret <16 x i16> [[TMP3]]
331 ; AVX-LABEL: @loadext_16i8_to_16i16(
332 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
333 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
334 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
335 ; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
336 ; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
337 ; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
338 ; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
339 ; AVX-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8
340 ; AVX-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9
341 ; AVX-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10
342 ; AVX-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11
343 ; AVX-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12
344 ; AVX-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13
345 ; AVX-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14
346 ; AVX-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15
347 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>*
348 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
349 ; AVX-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
350 ; AVX-NEXT: ret <16 x i16> [[TMP3]]
352 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
353 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
354 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
355 %p4 = getelementptr inbounds i8, i8* %p0, i64 4
356 %p5 = getelementptr inbounds i8, i8* %p0, i64 5
357 %p6 = getelementptr inbounds i8, i8* %p0, i64 6
358 %p7 = getelementptr inbounds i8, i8* %p0, i64 7
359 %p8 = getelementptr inbounds i8, i8* %p0, i64 8
360 %p9 = getelementptr inbounds i8, i8* %p0, i64 9
361 %p10 = getelementptr inbounds i8, i8* %p0, i64 10
362 %p11 = getelementptr inbounds i8, i8* %p0, i64 11
363 %p12 = getelementptr inbounds i8, i8* %p0, i64 12
364 %p13 = getelementptr inbounds i8, i8* %p0, i64 13
365 %p14 = getelementptr inbounds i8, i8* %p0, i64 14
366 %p15 = getelementptr inbounds i8, i8* %p0, i64 15
367 %i0 = load i8, i8* %p0, align 1
368 %i1 = load i8, i8* %p1, align 1
369 %i2 = load i8, i8* %p2, align 1
370 %i3 = load i8, i8* %p3, align 1
371 %i4 = load i8, i8* %p4, align 1
372 %i5 = load i8, i8* %p5, align 1
373 %i6 = load i8, i8* %p6, align 1
374 %i7 = load i8, i8* %p7, align 1
375 %i8 = load i8, i8* %p8, align 1
376 %i9 = load i8, i8* %p9, align 1
377 %i10 = load i8, i8* %p10, align 1
378 %i11 = load i8, i8* %p11, align 1
379 %i12 = load i8, i8* %p12, align 1
380 %i13 = load i8, i8* %p13, align 1
381 %i14 = load i8, i8* %p14, align 1
382 %i15 = load i8, i8* %p15, align 1
383 %x0 = zext i8 %i0 to i16
384 %x1 = zext i8 %i1 to i16
385 %x2 = zext i8 %i2 to i16
386 %x3 = zext i8 %i3 to i16
387 %x4 = zext i8 %i4 to i16
388 %x5 = zext i8 %i5 to i16
389 %x6 = zext i8 %i6 to i16
390 %x7 = zext i8 %i7 to i16
391 %x8 = zext i8 %i8 to i16
392 %x9 = zext i8 %i9 to i16
393 %x10 = zext i8 %i10 to i16
394 %x11 = zext i8 %i11 to i16
395 %x12 = zext i8 %i12 to i16
396 %x13 = zext i8 %i13 to i16
397 %x14 = zext i8 %i14 to i16
398 %x15 = zext i8 %i15 to i16
399 %v0 = insertelement <16 x i16> undef, i16 %x0, i32 0
400 %v1 = insertelement <16 x i16> %v0, i16 %x1, i32 1
401 %v2 = insertelement <16 x i16> %v1, i16 %x2, i32 2
402 %v3 = insertelement <16 x i16> %v2, i16 %x3, i32 3
403 %v4 = insertelement <16 x i16> %v3, i16 %x4, i32 4
404 %v5 = insertelement <16 x i16> %v4, i16 %x5, i32 5
405 %v6 = insertelement <16 x i16> %v5, i16 %x6, i32 6
406 %v7 = insertelement <16 x i16> %v6, i16 %x7, i32 7
407 %v8 = insertelement <16 x i16> %v7, i16 %x8, i32 8
408 %v9 = insertelement <16 x i16> %v8, i16 %x9, i32 9
409 %v10 = insertelement <16 x i16> %v9, i16 %x10, i32 10
410 %v11 = insertelement <16 x i16> %v10, i16 %x11, i32 11
411 %v12 = insertelement <16 x i16> %v11, i16 %x12, i32 12
412 %v13 = insertelement <16 x i16> %v12, i16 %x13, i32 13
413 %v14 = insertelement <16 x i16> %v13, i16 %x14, i32 14
414 %v15 = insertelement <16 x i16> %v14, i16 %x15, i32 15
422 define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
423 ; SSE2-LABEL: @loadext_2i16_to_2i64(
424 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
425 ; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
426 ; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
427 ; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
428 ; SSE2-NEXT: ret <2 x i64> [[TMP3]]
430 ; SLM-LABEL: @loadext_2i16_to_2i64(
431 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
432 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
433 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
434 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
435 ; SLM-NEXT: ret <2 x i64> [[TMP3]]
437 ; AVX-LABEL: @loadext_2i16_to_2i64(
438 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
439 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
440 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
441 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
442 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
444 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
445 %i0 = load i16, i16* %p0, align 1
446 %i1 = load i16, i16* %p1, align 1
447 %x0 = zext i16 %i0 to i64
448 %x1 = zext i16 %i1 to i64
449 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
450 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
454 define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
455 ; SSE2-LABEL: @loadext_4i16_to_4i32(
456 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
457 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
458 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
459 ; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
460 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
461 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
462 ; SSE2-NEXT: ret <4 x i32> [[TMP3]]
464 ; SLM-LABEL: @loadext_4i16_to_4i32(
465 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
466 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
467 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
468 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
469 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
470 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
471 ; SLM-NEXT: ret <4 x i32> [[TMP3]]
473 ; AVX-LABEL: @loadext_4i16_to_4i32(
474 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
475 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
476 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
477 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
478 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
479 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
480 ; AVX-NEXT: ret <4 x i32> [[TMP3]]
482 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
483 %p2 = getelementptr inbounds i16, i16* %p0, i64 2
484 %p3 = getelementptr inbounds i16, i16* %p0, i64 3
485 %i0 = load i16, i16* %p0, align 1
486 %i1 = load i16, i16* %p1, align 1
487 %i2 = load i16, i16* %p2, align 1
488 %i3 = load i16, i16* %p3, align 1
489 %x0 = zext i16 %i0 to i32
490 %x1 = zext i16 %i1 to i32
491 %x2 = zext i16 %i2 to i32
492 %x3 = zext i16 %i3 to i32
493 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
494 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
495 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
496 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
500 define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
501 ; SSE2-LABEL: @loadext_4i16_to_4i64(
502 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
503 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
504 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
505 ; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
506 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
507 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
508 ; SSE2-NEXT: ret <4 x i64> [[TMP3]]
510 ; SLM-LABEL: @loadext_4i16_to_4i64(
511 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
512 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
513 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
514 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
515 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
516 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
517 ; SLM-NEXT: ret <4 x i64> [[TMP3]]
519 ; AVX-LABEL: @loadext_4i16_to_4i64(
520 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
521 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
522 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
523 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
524 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
525 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
526 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
528 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
529 %p2 = getelementptr inbounds i16, i16* %p0, i64 2
530 %p3 = getelementptr inbounds i16, i16* %p0, i64 3
531 %i0 = load i16, i16* %p0, align 1
532 %i1 = load i16, i16* %p1, align 1
533 %i2 = load i16, i16* %p2, align 1
534 %i3 = load i16, i16* %p3, align 1
535 %x0 = zext i16 %i0 to i64
536 %x1 = zext i16 %i1 to i64
537 %x2 = zext i16 %i2 to i64
538 %x3 = zext i16 %i3 to i64
539 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
540 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
541 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
542 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
546 define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
547 ; SSE2-LABEL: @loadext_8i16_to_8i32(
548 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
549 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
550 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
551 ; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4
552 ; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5
553 ; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6
554 ; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7
555 ; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
556 ; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
557 ; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
558 ; SSE2-NEXT: ret <8 x i32> [[TMP3]]
560 ; SLM-LABEL: @loadext_8i16_to_8i32(
561 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
562 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
563 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
564 ; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4
565 ; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5
566 ; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6
567 ; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7
568 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
569 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
570 ; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
571 ; SLM-NEXT: ret <8 x i32> [[TMP3]]
573 ; AVX-LABEL: @loadext_8i16_to_8i32(
574 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
575 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
576 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
577 ; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4
578 ; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5
579 ; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6
580 ; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7
581 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
582 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
583 ; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
584 ; AVX-NEXT: ret <8 x i32> [[TMP3]]
586 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
587 %p2 = getelementptr inbounds i16, i16* %p0, i64 2
588 %p3 = getelementptr inbounds i16, i16* %p0, i64 3
589 %p4 = getelementptr inbounds i16, i16* %p0, i64 4
590 %p5 = getelementptr inbounds i16, i16* %p0, i64 5
591 %p6 = getelementptr inbounds i16, i16* %p0, i64 6
592 %p7 = getelementptr inbounds i16, i16* %p0, i64 7
593 %i0 = load i16, i16* %p0, align 1
594 %i1 = load i16, i16* %p1, align 1
595 %i2 = load i16, i16* %p2, align 1
596 %i3 = load i16, i16* %p3, align 1
597 %i4 = load i16, i16* %p4, align 1
598 %i5 = load i16, i16* %p5, align 1
599 %i6 = load i16, i16* %p6, align 1
600 %i7 = load i16, i16* %p7, align 1
601 %x0 = zext i16 %i0 to i32
602 %x1 = zext i16 %i1 to i32
603 %x2 = zext i16 %i2 to i32
604 %x3 = zext i16 %i3 to i32
605 %x4 = zext i16 %i4 to i32
606 %x5 = zext i16 %i5 to i32
607 %x6 = zext i16 %i6 to i32
608 %x7 = zext i16 %i7 to i32
609 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
610 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
611 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
612 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
613 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
614 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
615 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
616 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
624 define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
625 ; SSE2-LABEL: @loadext_2i32_to_2i64(
626 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
627 ; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>*
628 ; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
629 ; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
630 ; SSE2-NEXT: ret <2 x i64> [[TMP3]]
632 ; SLM-LABEL: @loadext_2i32_to_2i64(
633 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
634 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>*
635 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
636 ; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
637 ; SLM-NEXT: ret <2 x i64> [[TMP3]]
639 ; AVX-LABEL: @loadext_2i32_to_2i64(
640 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
641 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>*
642 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
643 ; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
644 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
646 %p1 = getelementptr inbounds i32, i32* %p0, i64 1
647 %i0 = load i32, i32* %p0, align 1
648 %i1 = load i32, i32* %p1, align 1
649 %x0 = zext i32 %i0 to i64
650 %x1 = zext i32 %i1 to i64
651 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
652 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
656 define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) {
657 ; SSE2-LABEL: @loadext_4i32_to_4i64(
658 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
659 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
660 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
661 ; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>*
662 ; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
663 ; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
664 ; SSE2-NEXT: ret <4 x i64> [[TMP3]]
666 ; SLM-LABEL: @loadext_4i32_to_4i64(
667 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
668 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
669 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
670 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>*
671 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
672 ; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
673 ; SLM-NEXT: ret <4 x i64> [[TMP3]]
675 ; AVX-LABEL: @loadext_4i32_to_4i64(
676 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
677 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
678 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
679 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>*
680 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
681 ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
682 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
684 %p1 = getelementptr inbounds i32, i32* %p0, i64 1
685 %p2 = getelementptr inbounds i32, i32* %p0, i64 2
686 %p3 = getelementptr inbounds i32, i32* %p0, i64 3
687 %i0 = load i32, i32* %p0, align 1
688 %i1 = load i32, i32* %p1, align 1
689 %i2 = load i32, i32* %p2, align 1
690 %i3 = load i32, i32* %p3, align 1
691 %x0 = zext i32 %i0 to i64
692 %x1 = zext i32 %i1 to i64
693 %x2 = zext i32 %i2 to i64
694 %x3 = zext i32 %i3 to i64
695 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
696 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
697 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
698 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3