1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
13 define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
14 ; SSE2-LABEL: @loadext_2i8_to_2i64(
15 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
16 ; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
17 ; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
18 ; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64
19 ; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64
20 ; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
21 ; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
22 ; SSE2-NEXT: ret <2 x i64> [[V1]]
24 ; SLM-LABEL: @loadext_2i8_to_2i64(
25 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
26 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
27 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
28 ; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
29 ; SLM-NEXT: ret <2 x i64> [[TMP3]]
31 ; AVX-LABEL: @loadext_2i8_to_2i64(
32 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
33 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
34 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
35 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
36 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
38 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
39 %i0 = load i8, i8* %p0, align 1
40 %i1 = load i8, i8* %p1, align 1
41 %x0 = sext i8 %i0 to i64
42 %x1 = sext i8 %i1 to i64
43 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
44 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
48 define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
49 ; SSE-LABEL: @loadext_4i8_to_4i32(
50 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
51 ; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
52 ; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
53 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
54 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
55 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
56 ; SSE-NEXT: ret <4 x i32> [[TMP3]]
58 ; AVX-LABEL: @loadext_4i8_to_4i32(
59 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
60 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
61 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
62 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
63 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
64 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
65 ; AVX-NEXT: ret <4 x i32> [[TMP3]]
67 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
68 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
69 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
70 %i0 = load i8, i8* %p0, align 1
71 %i1 = load i8, i8* %p1, align 1
72 %i2 = load i8, i8* %p2, align 1
73 %i3 = load i8, i8* %p3, align 1
74 %x0 = sext i8 %i0 to i32
75 %x1 = sext i8 %i1 to i32
76 %x2 = sext i8 %i2 to i32
77 %x3 = sext i8 %i3 to i32
78 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
79 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
80 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
81 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
85 define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
86 ; SSE-LABEL: @loadext_4i8_to_4i64(
87 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
88 ; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
89 ; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
90 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
91 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
92 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
93 ; SSE-NEXT: ret <4 x i64> [[TMP3]]
95 ; AVX-LABEL: @loadext_4i8_to_4i64(
96 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
97 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
98 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
99 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
100 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
101 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
102 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
104 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
105 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
106 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
107 %i0 = load i8, i8* %p0, align 1
108 %i1 = load i8, i8* %p1, align 1
109 %i2 = load i8, i8* %p2, align 1
110 %i3 = load i8, i8* %p3, align 1
111 %x0 = sext i8 %i0 to i64
112 %x1 = sext i8 %i1 to i64
113 %x2 = sext i8 %i2 to i64
114 %x3 = sext i8 %i3 to i64
115 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
116 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
117 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
118 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
122 define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
123 ; SSE-LABEL: @loadext_8i8_to_8i16(
124 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
125 ; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
126 ; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
127 ; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
128 ; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
129 ; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
130 ; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
131 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
132 ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
133 ; SSE-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
134 ; SSE-NEXT: ret <8 x i16> [[TMP3]]
136 ; AVX-LABEL: @loadext_8i8_to_8i16(
137 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
138 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
139 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
140 ; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
141 ; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
142 ; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
143 ; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
144 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
145 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
146 ; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
147 ; AVX-NEXT: ret <8 x i16> [[TMP3]]
149 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
150 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
151 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
152 %p4 = getelementptr inbounds i8, i8* %p0, i64 4
153 %p5 = getelementptr inbounds i8, i8* %p0, i64 5
154 %p6 = getelementptr inbounds i8, i8* %p0, i64 6
155 %p7 = getelementptr inbounds i8, i8* %p0, i64 7
156 %i0 = load i8, i8* %p0, align 1
157 %i1 = load i8, i8* %p1, align 1
158 %i2 = load i8, i8* %p2, align 1
159 %i3 = load i8, i8* %p3, align 1
160 %i4 = load i8, i8* %p4, align 1
161 %i5 = load i8, i8* %p5, align 1
162 %i6 = load i8, i8* %p6, align 1
163 %i7 = load i8, i8* %p7, align 1
164 %x0 = sext i8 %i0 to i16
165 %x1 = sext i8 %i1 to i16
166 %x2 = sext i8 %i2 to i16
167 %x3 = sext i8 %i3 to i16
168 %x4 = sext i8 %i4 to i16
169 %x5 = sext i8 %i5 to i16
170 %x6 = sext i8 %i6 to i16
171 %x7 = sext i8 %i7 to i16
172 %v0 = insertelement <8 x i16> undef, i16 %x0, i32 0
173 %v1 = insertelement <8 x i16> %v0, i16 %x1, i32 1
174 %v2 = insertelement <8 x i16> %v1, i16 %x2, i32 2
175 %v3 = insertelement <8 x i16> %v2, i16 %x3, i32 3
176 %v4 = insertelement <8 x i16> %v3, i16 %x4, i32 4
177 %v5 = insertelement <8 x i16> %v4, i16 %x5, i32 5
178 %v6 = insertelement <8 x i16> %v5, i16 %x6, i32 6
179 %v7 = insertelement <8 x i16> %v6, i16 %x7, i32 7
183 define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
184 ; SSE-LABEL: @loadext_8i8_to_8i32(
185 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
186 ; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
187 ; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
188 ; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
189 ; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
190 ; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
191 ; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
192 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
193 ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
194 ; SSE-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
195 ; SSE-NEXT: ret <8 x i32> [[TMP3]]
197 ; AVX-LABEL: @loadext_8i8_to_8i32(
198 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
199 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
200 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
201 ; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
202 ; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
203 ; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
204 ; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
205 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
206 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
207 ; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
208 ; AVX-NEXT: ret <8 x i32> [[TMP3]]
210 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
211 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
212 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
213 %p4 = getelementptr inbounds i8, i8* %p0, i64 4
214 %p5 = getelementptr inbounds i8, i8* %p0, i64 5
215 %p6 = getelementptr inbounds i8, i8* %p0, i64 6
216 %p7 = getelementptr inbounds i8, i8* %p0, i64 7
217 %i0 = load i8, i8* %p0, align 1
218 %i1 = load i8, i8* %p1, align 1
219 %i2 = load i8, i8* %p2, align 1
220 %i3 = load i8, i8* %p3, align 1
221 %i4 = load i8, i8* %p4, align 1
222 %i5 = load i8, i8* %p5, align 1
223 %i6 = load i8, i8* %p6, align 1
224 %i7 = load i8, i8* %p7, align 1
225 %x0 = sext i8 %i0 to i32
226 %x1 = sext i8 %i1 to i32
227 %x2 = sext i8 %i2 to i32
228 %x3 = sext i8 %i3 to i32
229 %x4 = sext i8 %i4 to i32
230 %x5 = sext i8 %i5 to i32
231 %x6 = sext i8 %i6 to i32
232 %x7 = sext i8 %i7 to i32
233 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
234 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
235 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
236 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
237 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
238 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
239 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
240 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
244 define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
245 ; SSE-LABEL: @loadext_16i8_to_16i16(
246 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
247 ; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
248 ; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
249 ; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
250 ; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
251 ; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
252 ; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
253 ; SSE-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8
254 ; SSE-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9
255 ; SSE-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10
256 ; SSE-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11
257 ; SSE-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12
258 ; SSE-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13
259 ; SSE-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14
260 ; SSE-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15
261 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>*
262 ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
263 ; SSE-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
264 ; SSE-NEXT: ret <16 x i16> [[TMP3]]
266 ; AVX-LABEL: @loadext_16i8_to_16i16(
267 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
268 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
269 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
270 ; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
271 ; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
272 ; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
273 ; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
274 ; AVX-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8
275 ; AVX-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9
276 ; AVX-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10
277 ; AVX-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11
278 ; AVX-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12
279 ; AVX-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13
280 ; AVX-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14
281 ; AVX-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15
282 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>*
283 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
284 ; AVX-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
285 ; AVX-NEXT: ret <16 x i16> [[TMP3]]
287 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
288 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
289 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
290 %p4 = getelementptr inbounds i8, i8* %p0, i64 4
291 %p5 = getelementptr inbounds i8, i8* %p0, i64 5
292 %p6 = getelementptr inbounds i8, i8* %p0, i64 6
293 %p7 = getelementptr inbounds i8, i8* %p0, i64 7
294 %p8 = getelementptr inbounds i8, i8* %p0, i64 8
295 %p9 = getelementptr inbounds i8, i8* %p0, i64 9
296 %p10 = getelementptr inbounds i8, i8* %p0, i64 10
297 %p11 = getelementptr inbounds i8, i8* %p0, i64 11
298 %p12 = getelementptr inbounds i8, i8* %p0, i64 12
299 %p13 = getelementptr inbounds i8, i8* %p0, i64 13
300 %p14 = getelementptr inbounds i8, i8* %p0, i64 14
301 %p15 = getelementptr inbounds i8, i8* %p0, i64 15
302 %i0 = load i8, i8* %p0, align 1
303 %i1 = load i8, i8* %p1, align 1
304 %i2 = load i8, i8* %p2, align 1
305 %i3 = load i8, i8* %p3, align 1
306 %i4 = load i8, i8* %p4, align 1
307 %i5 = load i8, i8* %p5, align 1
308 %i6 = load i8, i8* %p6, align 1
309 %i7 = load i8, i8* %p7, align 1
310 %i8 = load i8, i8* %p8, align 1
311 %i9 = load i8, i8* %p9, align 1
312 %i10 = load i8, i8* %p10, align 1
313 %i11 = load i8, i8* %p11, align 1
314 %i12 = load i8, i8* %p12, align 1
315 %i13 = load i8, i8* %p13, align 1
316 %i14 = load i8, i8* %p14, align 1
317 %i15 = load i8, i8* %p15, align 1
318 %x0 = sext i8 %i0 to i16
319 %x1 = sext i8 %i1 to i16
320 %x2 = sext i8 %i2 to i16
321 %x3 = sext i8 %i3 to i16
322 %x4 = sext i8 %i4 to i16
323 %x5 = sext i8 %i5 to i16
324 %x6 = sext i8 %i6 to i16
325 %x7 = sext i8 %i7 to i16
326 %x8 = sext i8 %i8 to i16
327 %x9 = sext i8 %i9 to i16
328 %x10 = sext i8 %i10 to i16
329 %x11 = sext i8 %i11 to i16
330 %x12 = sext i8 %i12 to i16
331 %x13 = sext i8 %i13 to i16
332 %x14 = sext i8 %i14 to i16
333 %x15 = sext i8 %i15 to i16
334 %v0 = insertelement <16 x i16> undef, i16 %x0, i32 0
335 %v1 = insertelement <16 x i16> %v0, i16 %x1, i32 1
336 %v2 = insertelement <16 x i16> %v1, i16 %x2, i32 2
337 %v3 = insertelement <16 x i16> %v2, i16 %x3, i32 3
338 %v4 = insertelement <16 x i16> %v3, i16 %x4, i32 4
339 %v5 = insertelement <16 x i16> %v4, i16 %x5, i32 5
340 %v6 = insertelement <16 x i16> %v5, i16 %x6, i32 6
341 %v7 = insertelement <16 x i16> %v6, i16 %x7, i32 7
342 %v8 = insertelement <16 x i16> %v7, i16 %x8, i32 8
343 %v9 = insertelement <16 x i16> %v8, i16 %x9, i32 9
344 %v10 = insertelement <16 x i16> %v9, i16 %x10, i32 10
345 %v11 = insertelement <16 x i16> %v10, i16 %x11, i32 11
346 %v12 = insertelement <16 x i16> %v11, i16 %x12, i32 12
347 %v13 = insertelement <16 x i16> %v12, i16 %x13, i32 13
348 %v14 = insertelement <16 x i16> %v13, i16 %x14, i32 14
349 %v15 = insertelement <16 x i16> %v14, i16 %x15, i32 15
357 define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
358 ; SSE-LABEL: @loadext_2i16_to_2i64(
359 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
360 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
361 ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
362 ; SSE-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
363 ; SSE-NEXT: ret <2 x i64> [[TMP3]]
365 ; AVX-LABEL: @loadext_2i16_to_2i64(
366 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
367 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
368 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
369 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
370 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
372 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
373 %i0 = load i16, i16* %p0, align 1
374 %i1 = load i16, i16* %p1, align 1
375 %x0 = sext i16 %i0 to i64
376 %x1 = sext i16 %i1 to i64
377 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
378 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
382 define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
383 ; SSE-LABEL: @loadext_4i16_to_4i32(
384 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
385 ; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
386 ; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
387 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
388 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
389 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
390 ; SSE-NEXT: ret <4 x i32> [[TMP3]]
392 ; AVX-LABEL: @loadext_4i16_to_4i32(
393 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
394 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
395 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
396 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
397 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
398 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
399 ; AVX-NEXT: ret <4 x i32> [[TMP3]]
401 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
402 %p2 = getelementptr inbounds i16, i16* %p0, i64 2
403 %p3 = getelementptr inbounds i16, i16* %p0, i64 3
404 %i0 = load i16, i16* %p0, align 1
405 %i1 = load i16, i16* %p1, align 1
406 %i2 = load i16, i16* %p2, align 1
407 %i3 = load i16, i16* %p3, align 1
408 %x0 = sext i16 %i0 to i32
409 %x1 = sext i16 %i1 to i32
410 %x2 = sext i16 %i2 to i32
411 %x3 = sext i16 %i3 to i32
412 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
413 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
414 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
415 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
419 define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
420 ; SSE-LABEL: @loadext_4i16_to_4i64(
421 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
422 ; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
423 ; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
424 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
425 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
426 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
427 ; SSE-NEXT: ret <4 x i64> [[TMP3]]
429 ; AVX-LABEL: @loadext_4i16_to_4i64(
430 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
431 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
432 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
433 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
434 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
435 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
436 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
438 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
439 %p2 = getelementptr inbounds i16, i16* %p0, i64 2
440 %p3 = getelementptr inbounds i16, i16* %p0, i64 3
441 %i0 = load i16, i16* %p0, align 1
442 %i1 = load i16, i16* %p1, align 1
443 %i2 = load i16, i16* %p2, align 1
444 %i3 = load i16, i16* %p3, align 1
445 %x0 = sext i16 %i0 to i64
446 %x1 = sext i16 %i1 to i64
447 %x2 = sext i16 %i2 to i64
448 %x3 = sext i16 %i3 to i64
449 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
450 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
451 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
452 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
456 define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
457 ; SSE-LABEL: @loadext_8i16_to_8i32(
458 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
459 ; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
460 ; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
461 ; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4
462 ; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5
463 ; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6
464 ; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7
465 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
466 ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
467 ; SSE-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
468 ; SSE-NEXT: ret <8 x i32> [[TMP3]]
470 ; AVX-LABEL: @loadext_8i16_to_8i32(
471 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
472 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
473 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
474 ; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4
475 ; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5
476 ; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6
477 ; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7
478 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
479 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
480 ; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
481 ; AVX-NEXT: ret <8 x i32> [[TMP3]]
483 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
484 %p2 = getelementptr inbounds i16, i16* %p0, i64 2
485 %p3 = getelementptr inbounds i16, i16* %p0, i64 3
486 %p4 = getelementptr inbounds i16, i16* %p0, i64 4
487 %p5 = getelementptr inbounds i16, i16* %p0, i64 5
488 %p6 = getelementptr inbounds i16, i16* %p0, i64 6
489 %p7 = getelementptr inbounds i16, i16* %p0, i64 7
490 %i0 = load i16, i16* %p0, align 1
491 %i1 = load i16, i16* %p1, align 1
492 %i2 = load i16, i16* %p2, align 1
493 %i3 = load i16, i16* %p3, align 1
494 %i4 = load i16, i16* %p4, align 1
495 %i5 = load i16, i16* %p5, align 1
496 %i6 = load i16, i16* %p6, align 1
497 %i7 = load i16, i16* %p7, align 1
498 %x0 = sext i16 %i0 to i32
499 %x1 = sext i16 %i1 to i32
500 %x2 = sext i16 %i2 to i32
501 %x3 = sext i16 %i3 to i32
502 %x4 = sext i16 %i4 to i32
503 %x5 = sext i16 %i5 to i32
504 %x6 = sext i16 %i6 to i32
505 %x7 = sext i16 %i7 to i32
506 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
507 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
508 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
509 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
510 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
511 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
512 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
513 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
521 define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
522 ; SSE-LABEL: @loadext_2i32_to_2i64(
523 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
524 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>*
525 ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
526 ; SSE-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
527 ; SSE-NEXT: ret <2 x i64> [[TMP3]]
529 ; AVX-LABEL: @loadext_2i32_to_2i64(
530 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
531 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>*
532 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
533 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
534 ; AVX-NEXT: ret <2 x i64> [[TMP3]]
536 %p1 = getelementptr inbounds i32, i32* %p0, i64 1
537 %i0 = load i32, i32* %p0, align 1
538 %i1 = load i32, i32* %p1, align 1
539 %x0 = sext i32 %i0 to i64
540 %x1 = sext i32 %i1 to i64
541 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
542 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
546 define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) {
547 ; SSE-LABEL: @loadext_4i32_to_4i64(
548 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
549 ; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
550 ; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
551 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>*
552 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
553 ; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
554 ; SSE-NEXT: ret <4 x i64> [[TMP3]]
556 ; AVX-LABEL: @loadext_4i32_to_4i64(
557 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
558 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
559 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
560 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>*
561 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
562 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
563 ; AVX-NEXT: ret <4 x i64> [[TMP3]]
565 %p1 = getelementptr inbounds i32, i32* %p0, i64 1
566 %p2 = getelementptr inbounds i32, i32* %p0, i64 2
567 %p3 = getelementptr inbounds i32, i32* %p0, i64 3
568 %i0 = load i32, i32* %p0, align 1
569 %i1 = load i32, i32* %p1, align 1
570 %i2 = load i32, i32* %p2, align 1
571 %i3 = load i32, i32* %p3, align 1
572 %x0 = sext i32 %i0 to i64
573 %x1 = sext i32 %i1 to i64
574 %x2 = sext i32 %i2 to i64
575 %x3 = sext i32 %i3 to i64
576 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
577 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
578 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
579 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3