1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -basicaa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512BW
13 define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
14 ; SSE2-LABEL: @loadext_2i8_to_2i64(
15 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
16 ; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
17 ; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
18 ; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64
19 ; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64
20 ; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
21 ; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
22 ; SSE2-NEXT: ret <2 x i64> [[V1]]
24 ; SLM-LABEL: @loadext_2i8_to_2i64(
25 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
26 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
27 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
28 ; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
29 ; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
30 ; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
31 ; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
32 ; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
33 ; SLM-NEXT: ret <2 x i64> [[V1]]
35 ; AVX-LABEL: @loadext_2i8_to_2i64(
36 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
37 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
38 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
39 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
40 ; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
41 ; AVX-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
42 ; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
43 ; AVX-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
44 ; AVX-NEXT: ret <2 x i64> [[V1]]
46 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
47 %i0 = load i8, i8* %p0, align 1
48 %i1 = load i8, i8* %p1, align 1
49 %x0 = sext i8 %i0 to i64
50 %x1 = sext i8 %i1 to i64
51 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
52 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
56 define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
57 ; SSE2-LABEL: @loadext_4i8_to_4i32(
58 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
59 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
60 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
61 ; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
62 ; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
63 ; SSE2-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1
64 ; SSE2-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1
65 ; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i32
66 ; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i32
67 ; SSE2-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i32
68 ; SSE2-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i32
69 ; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0
70 ; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[X1]], i32 1
71 ; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[X2]], i32 2
72 ; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[X3]], i32 3
73 ; SSE2-NEXT: ret <4 x i32> [[V3]]
75 ; SLM-LABEL: @loadext_4i8_to_4i32(
76 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
77 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
78 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
79 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
80 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
81 ; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
82 ; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
83 ; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
84 ; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
85 ; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1
86 ; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
87 ; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2
88 ; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
89 ; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3
90 ; SLM-NEXT: ret <4 x i32> [[V3]]
92 ; AVX-LABEL: @loadext_4i8_to_4i32(
93 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
94 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
95 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
96 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
97 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
98 ; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
99 ; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
100 ; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
101 ; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
102 ; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1
103 ; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
104 ; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2
105 ; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
106 ; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3
107 ; AVX-NEXT: ret <4 x i32> [[V3]]
109 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
110 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
111 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
112 %i0 = load i8, i8* %p0, align 1
113 %i1 = load i8, i8* %p1, align 1
114 %i2 = load i8, i8* %p2, align 1
115 %i3 = load i8, i8* %p3, align 1
116 %x0 = sext i8 %i0 to i32
117 %x1 = sext i8 %i1 to i32
118 %x2 = sext i8 %i2 to i32
119 %x3 = sext i8 %i3 to i32
120 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
121 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
122 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
123 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
127 define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
128 ; SSE2-LABEL: @loadext_4i8_to_4i64(
129 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
130 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
131 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
132 ; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
133 ; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
134 ; SSE2-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1
135 ; SSE2-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1
136 ; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64
137 ; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64
138 ; SSE2-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64
139 ; SSE2-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64
140 ; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
141 ; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
142 ; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
143 ; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
144 ; SSE2-NEXT: ret <4 x i64> [[V3]]
146 ; SLM-LABEL: @loadext_4i8_to_4i64(
147 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
148 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
149 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
150 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
151 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
152 ; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
153 ; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
154 ; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
155 ; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
156 ; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
157 ; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
158 ; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
159 ; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
160 ; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
161 ; SLM-NEXT: ret <4 x i64> [[V3]]
163 ; AVX-LABEL: @loadext_4i8_to_4i64(
164 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
165 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
166 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
167 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
168 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
169 ; AVX-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1
170 ; AVX-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1
171 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
172 ; AVX-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64
173 ; AVX-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64
174 ; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
175 ; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
176 ; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
177 ; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
178 ; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
179 ; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
180 ; AVX-NEXT: ret <4 x i64> [[V3]]
182 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
183 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
184 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
185 %i0 = load i8, i8* %p0, align 1
186 %i1 = load i8, i8* %p1, align 1
187 %i2 = load i8, i8* %p2, align 1
188 %i3 = load i8, i8* %p3, align 1
189 %x0 = sext i8 %i0 to i64
190 %x1 = sext i8 %i1 to i64
191 %x2 = sext i8 %i2 to i64
192 %x3 = sext i8 %i3 to i64
193 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
194 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
195 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
196 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
200 define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
201 ; CHECK-LABEL: @loadext_8i8_to_8i16(
202 ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
203 ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
204 ; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
205 ; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
206 ; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
207 ; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
208 ; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
209 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
210 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
211 ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
212 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
213 ; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP4]], i32 0
214 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1
215 ; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[TMP5]], i32 1
216 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2
217 ; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[TMP6]], i32 2
218 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
219 ; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[TMP7]], i32 3
220 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4
221 ; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[TMP8]], i32 4
222 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5
223 ; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[TMP9]], i32 5
224 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6
225 ; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[TMP10]], i32 6
226 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7
227 ; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[TMP11]], i32 7
228 ; CHECK-NEXT: ret <8 x i16> [[V7]]
230 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
231 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
232 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
233 %p4 = getelementptr inbounds i8, i8* %p0, i64 4
234 %p5 = getelementptr inbounds i8, i8* %p0, i64 5
235 %p6 = getelementptr inbounds i8, i8* %p0, i64 6
236 %p7 = getelementptr inbounds i8, i8* %p0, i64 7
237 %i0 = load i8, i8* %p0, align 1
238 %i1 = load i8, i8* %p1, align 1
239 %i2 = load i8, i8* %p2, align 1
240 %i3 = load i8, i8* %p3, align 1
241 %i4 = load i8, i8* %p4, align 1
242 %i5 = load i8, i8* %p5, align 1
243 %i6 = load i8, i8* %p6, align 1
244 %i7 = load i8, i8* %p7, align 1
245 %x0 = sext i8 %i0 to i16
246 %x1 = sext i8 %i1 to i16
247 %x2 = sext i8 %i2 to i16
248 %x3 = sext i8 %i3 to i16
249 %x4 = sext i8 %i4 to i16
250 %x5 = sext i8 %i5 to i16
251 %x6 = sext i8 %i6 to i16
252 %x7 = sext i8 %i7 to i16
253 %v0 = insertelement <8 x i16> undef, i16 %x0, i32 0
254 %v1 = insertelement <8 x i16> %v0, i16 %x1, i32 1
255 %v2 = insertelement <8 x i16> %v1, i16 %x2, i32 2
256 %v3 = insertelement <8 x i16> %v2, i16 %x3, i32 3
257 %v4 = insertelement <8 x i16> %v3, i16 %x4, i32 4
258 %v5 = insertelement <8 x i16> %v4, i16 %x5, i32 5
259 %v6 = insertelement <8 x i16> %v5, i16 %x6, i32 6
260 %v7 = insertelement <8 x i16> %v6, i16 %x7, i32 7
264 define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
265 ; SSE-LABEL: @loadext_8i8_to_8i32(
266 ; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
267 ; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
268 ; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
269 ; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
270 ; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
271 ; SSE-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
272 ; SSE-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
273 ; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
274 ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
275 ; SSE-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
276 ; SSE-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
277 ; SSE-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
278 ; SSE-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
279 ; SSE-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
280 ; SSE-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
281 ; SSE-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
282 ; SSE-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
283 ; SSE-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
284 ; SSE-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
285 ; SSE-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
286 ; SSE-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
287 ; SSE-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
288 ; SSE-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
289 ; SSE-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
290 ; SSE-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
291 ; SSE-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
292 ; SSE-NEXT: ret <8 x i32> [[V7]]
294 ; AVX1-LABEL: @loadext_8i8_to_8i32(
295 ; AVX1-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
296 ; AVX1-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
297 ; AVX1-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
298 ; AVX1-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
299 ; AVX1-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
300 ; AVX1-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
301 ; AVX1-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
302 ; AVX1-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
303 ; AVX1-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
304 ; AVX1-NEXT: [[I4:%.*]] = load i8, i8* [[P4]], align 1
305 ; AVX1-NEXT: [[I5:%.*]] = load i8, i8* [[P5]], align 1
306 ; AVX1-NEXT: [[I6:%.*]] = load i8, i8* [[P6]], align 1
307 ; AVX1-NEXT: [[I7:%.*]] = load i8, i8* [[P7]], align 1
308 ; AVX1-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
309 ; AVX1-NEXT: [[X4:%.*]] = sext i8 [[I4]] to i32
310 ; AVX1-NEXT: [[X5:%.*]] = sext i8 [[I5]] to i32
311 ; AVX1-NEXT: [[X6:%.*]] = sext i8 [[I6]] to i32
312 ; AVX1-NEXT: [[X7:%.*]] = sext i8 [[I7]] to i32
313 ; AVX1-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
314 ; AVX1-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
315 ; AVX1-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
316 ; AVX1-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
317 ; AVX1-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
318 ; AVX1-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
319 ; AVX1-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
320 ; AVX1-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
321 ; AVX1-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[X4]], i32 4
322 ; AVX1-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[X5]], i32 5
323 ; AVX1-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[X6]], i32 6
324 ; AVX1-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[X7]], i32 7
325 ; AVX1-NEXT: ret <8 x i32> [[V7]]
327 ; AVX2-LABEL: @loadext_8i8_to_8i32(
328 ; AVX2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
329 ; AVX2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
330 ; AVX2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
331 ; AVX2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
332 ; AVX2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
333 ; AVX2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
334 ; AVX2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
335 ; AVX2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
336 ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
337 ; AVX2-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
338 ; AVX2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
339 ; AVX2-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
340 ; AVX2-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
341 ; AVX2-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
342 ; AVX2-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
343 ; AVX2-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
344 ; AVX2-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
345 ; AVX2-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
346 ; AVX2-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
347 ; AVX2-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
348 ; AVX2-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
349 ; AVX2-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
350 ; AVX2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
351 ; AVX2-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
352 ; AVX2-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
353 ; AVX2-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
354 ; AVX2-NEXT: ret <8 x i32> [[V7]]
356 ; AVX512-LABEL: @loadext_8i8_to_8i32(
357 ; AVX512-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
358 ; AVX512-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
359 ; AVX512-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
360 ; AVX512-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
361 ; AVX512-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
362 ; AVX512-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
363 ; AVX512-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
364 ; AVX512-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
365 ; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
366 ; AVX512-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
367 ; AVX512-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
368 ; AVX512-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
369 ; AVX512-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
370 ; AVX512-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
371 ; AVX512-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
372 ; AVX512-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
373 ; AVX512-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
374 ; AVX512-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
375 ; AVX512-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
376 ; AVX512-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
377 ; AVX512-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
378 ; AVX512-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
379 ; AVX512-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
380 ; AVX512-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
381 ; AVX512-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
382 ; AVX512-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
383 ; AVX512-NEXT: ret <8 x i32> [[V7]]
385 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
386 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
387 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
388 %p4 = getelementptr inbounds i8, i8* %p0, i64 4
389 %p5 = getelementptr inbounds i8, i8* %p0, i64 5
390 %p6 = getelementptr inbounds i8, i8* %p0, i64 6
391 %p7 = getelementptr inbounds i8, i8* %p0, i64 7
392 %i0 = load i8, i8* %p0, align 1
393 %i1 = load i8, i8* %p1, align 1
394 %i2 = load i8, i8* %p2, align 1
395 %i3 = load i8, i8* %p3, align 1
396 %i4 = load i8, i8* %p4, align 1
397 %i5 = load i8, i8* %p5, align 1
398 %i6 = load i8, i8* %p6, align 1
399 %i7 = load i8, i8* %p7, align 1
400 %x0 = sext i8 %i0 to i32
401 %x1 = sext i8 %i1 to i32
402 %x2 = sext i8 %i2 to i32
403 %x3 = sext i8 %i3 to i32
404 %x4 = sext i8 %i4 to i32
405 %x5 = sext i8 %i5 to i32
406 %x6 = sext i8 %i6 to i32
407 %x7 = sext i8 %i7 to i32
408 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
409 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
410 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
411 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
412 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
413 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
414 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
415 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
419 define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
420 ; CHECK-LABEL: @loadext_16i8_to_16i16(
421 ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
422 ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
423 ; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
424 ; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
425 ; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
426 ; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
427 ; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
428 ; CHECK-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8
429 ; CHECK-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9
430 ; CHECK-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10
431 ; CHECK-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11
432 ; CHECK-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12
433 ; CHECK-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13
434 ; CHECK-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14
435 ; CHECK-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15
436 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>*
437 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
438 ; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
439 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i16> [[TMP3]], i32 0
440 ; CHECK-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[TMP4]], i32 0
441 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i16> [[TMP3]], i32 1
442 ; CHECK-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[TMP5]], i32 1
443 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i16> [[TMP3]], i32 2
444 ; CHECK-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[TMP6]], i32 2
445 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i16> [[TMP3]], i32 3
446 ; CHECK-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[TMP7]], i32 3
447 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i16> [[TMP3]], i32 4
448 ; CHECK-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[TMP8]], i32 4
449 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i16> [[TMP3]], i32 5
450 ; CHECK-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[TMP9]], i32 5
451 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP3]], i32 6
452 ; CHECK-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[TMP10]], i32 6
453 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i16> [[TMP3]], i32 7
454 ; CHECK-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[TMP11]], i32 7
455 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP3]], i32 8
456 ; CHECK-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[TMP12]], i32 8
457 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i16> [[TMP3]], i32 9
458 ; CHECK-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[TMP13]], i32 9
459 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP3]], i32 10
460 ; CHECK-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[TMP14]], i32 10
461 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i16> [[TMP3]], i32 11
462 ; CHECK-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[TMP15]], i32 11
463 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP3]], i32 12
464 ; CHECK-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[TMP16]], i32 12
465 ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i16> [[TMP3]], i32 13
466 ; CHECK-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[TMP17]], i32 13
467 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP3]], i32 14
468 ; CHECK-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[TMP18]], i32 14
469 ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i16> [[TMP3]], i32 15
470 ; CHECK-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[TMP19]], i32 15
471 ; CHECK-NEXT: ret <16 x i16> [[V15]]
473 %p1 = getelementptr inbounds i8, i8* %p0, i64 1
474 %p2 = getelementptr inbounds i8, i8* %p0, i64 2
475 %p3 = getelementptr inbounds i8, i8* %p0, i64 3
476 %p4 = getelementptr inbounds i8, i8* %p0, i64 4
477 %p5 = getelementptr inbounds i8, i8* %p0, i64 5
478 %p6 = getelementptr inbounds i8, i8* %p0, i64 6
479 %p7 = getelementptr inbounds i8, i8* %p0, i64 7
480 %p8 = getelementptr inbounds i8, i8* %p0, i64 8
481 %p9 = getelementptr inbounds i8, i8* %p0, i64 9
482 %p10 = getelementptr inbounds i8, i8* %p0, i64 10
483 %p11 = getelementptr inbounds i8, i8* %p0, i64 11
484 %p12 = getelementptr inbounds i8, i8* %p0, i64 12
485 %p13 = getelementptr inbounds i8, i8* %p0, i64 13
486 %p14 = getelementptr inbounds i8, i8* %p0, i64 14
487 %p15 = getelementptr inbounds i8, i8* %p0, i64 15
488 %i0 = load i8, i8* %p0, align 1
489 %i1 = load i8, i8* %p1, align 1
490 %i2 = load i8, i8* %p2, align 1
491 %i3 = load i8, i8* %p3, align 1
492 %i4 = load i8, i8* %p4, align 1
493 %i5 = load i8, i8* %p5, align 1
494 %i6 = load i8, i8* %p6, align 1
495 %i7 = load i8, i8* %p7, align 1
496 %i8 = load i8, i8* %p8, align 1
497 %i9 = load i8, i8* %p9, align 1
498 %i10 = load i8, i8* %p10, align 1
499 %i11 = load i8, i8* %p11, align 1
500 %i12 = load i8, i8* %p12, align 1
501 %i13 = load i8, i8* %p13, align 1
502 %i14 = load i8, i8* %p14, align 1
503 %i15 = load i8, i8* %p15, align 1
504 %x0 = sext i8 %i0 to i16
505 %x1 = sext i8 %i1 to i16
506 %x2 = sext i8 %i2 to i16
507 %x3 = sext i8 %i3 to i16
508 %x4 = sext i8 %i4 to i16
509 %x5 = sext i8 %i5 to i16
510 %x6 = sext i8 %i6 to i16
511 %x7 = sext i8 %i7 to i16
512 %x8 = sext i8 %i8 to i16
513 %x9 = sext i8 %i9 to i16
514 %x10 = sext i8 %i10 to i16
515 %x11 = sext i8 %i11 to i16
516 %x12 = sext i8 %i12 to i16
517 %x13 = sext i8 %i13 to i16
518 %x14 = sext i8 %i14 to i16
519 %x15 = sext i8 %i15 to i16
520 %v0 = insertelement <16 x i16> undef, i16 %x0, i32 0
521 %v1 = insertelement <16 x i16> %v0, i16 %x1, i32 1
522 %v2 = insertelement <16 x i16> %v1, i16 %x2, i32 2
523 %v3 = insertelement <16 x i16> %v2, i16 %x3, i32 3
524 %v4 = insertelement <16 x i16> %v3, i16 %x4, i32 4
525 %v5 = insertelement <16 x i16> %v4, i16 %x5, i32 5
526 %v6 = insertelement <16 x i16> %v5, i16 %x6, i32 6
527 %v7 = insertelement <16 x i16> %v6, i16 %x7, i32 7
528 %v8 = insertelement <16 x i16> %v7, i16 %x8, i32 8
529 %v9 = insertelement <16 x i16> %v8, i16 %x9, i32 9
530 %v10 = insertelement <16 x i16> %v9, i16 %x10, i32 10
531 %v11 = insertelement <16 x i16> %v10, i16 %x11, i32 11
532 %v12 = insertelement <16 x i16> %v11, i16 %x12, i32 12
533 %v13 = insertelement <16 x i16> %v12, i16 %x13, i32 13
534 %v14 = insertelement <16 x i16> %v13, i16 %x14, i32 14
535 %v15 = insertelement <16 x i16> %v14, i16 %x15, i32 15
543 define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
544 ; SSE2-LABEL: @loadext_2i16_to_2i64(
545 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
546 ; SSE2-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1
547 ; SSE2-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1
548 ; SSE2-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64
549 ; SSE2-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64
550 ; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
551 ; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
552 ; SSE2-NEXT: ret <2 x i64> [[V1]]
554 ; SLM-LABEL: @loadext_2i16_to_2i64(
555 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
556 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
557 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
558 ; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
559 ; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
560 ; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
561 ; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
562 ; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
563 ; SLM-NEXT: ret <2 x i64> [[V1]]
565 ; AVX-LABEL: @loadext_2i16_to_2i64(
566 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
567 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
568 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
569 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
570 ; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
571 ; AVX-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
572 ; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
573 ; AVX-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
574 ; AVX-NEXT: ret <2 x i64> [[V1]]
576 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
577 %i0 = load i16, i16* %p0, align 1
578 %i1 = load i16, i16* %p1, align 1
579 %x0 = sext i16 %i0 to i64
580 %x1 = sext i16 %i1 to i64
581 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
582 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
586 define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
587 ; CHECK-LABEL: @loadext_4i16_to_4i32(
588 ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
589 ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
590 ; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
591 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
592 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
593 ; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
594 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
595 ; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
596 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
597 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1
598 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
599 ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2
600 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
601 ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3
602 ; CHECK-NEXT: ret <4 x i32> [[V3]]
604 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
605 %p2 = getelementptr inbounds i16, i16* %p0, i64 2
606 %p3 = getelementptr inbounds i16, i16* %p0, i64 3
607 %i0 = load i16, i16* %p0, align 1
608 %i1 = load i16, i16* %p1, align 1
609 %i2 = load i16, i16* %p2, align 1
610 %i3 = load i16, i16* %p3, align 1
611 %x0 = sext i16 %i0 to i32
612 %x1 = sext i16 %i1 to i32
613 %x2 = sext i16 %i2 to i32
614 %x3 = sext i16 %i3 to i32
615 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
616 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1
617 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2
618 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3
622 define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
623 ; SSE2-LABEL: @loadext_4i16_to_4i64(
624 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
625 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
626 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
627 ; SSE2-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1
628 ; SSE2-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1
629 ; SSE2-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1
630 ; SSE2-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1
631 ; SSE2-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64
632 ; SSE2-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64
633 ; SSE2-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64
634 ; SSE2-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64
635 ; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
636 ; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
637 ; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
638 ; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
639 ; SSE2-NEXT: ret <4 x i64> [[V3]]
641 ; SLM-LABEL: @loadext_4i16_to_4i64(
642 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
643 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
644 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
645 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
646 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
647 ; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
648 ; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
649 ; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
650 ; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
651 ; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
652 ; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
653 ; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
654 ; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
655 ; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
656 ; SLM-NEXT: ret <4 x i64> [[V3]]
658 ; AVX-LABEL: @loadext_4i16_to_4i64(
659 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
660 ; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
661 ; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
662 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
663 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
664 ; AVX-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1
665 ; AVX-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1
666 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
667 ; AVX-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64
668 ; AVX-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64
669 ; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
670 ; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
671 ; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
672 ; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
673 ; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
674 ; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
675 ; AVX-NEXT: ret <4 x i64> [[V3]]
677 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
678 %p2 = getelementptr inbounds i16, i16* %p0, i64 2
679 %p3 = getelementptr inbounds i16, i16* %p0, i64 3
680 %i0 = load i16, i16* %p0, align 1
681 %i1 = load i16, i16* %p1, align 1
682 %i2 = load i16, i16* %p2, align 1
683 %i3 = load i16, i16* %p3, align 1
684 %x0 = sext i16 %i0 to i64
685 %x1 = sext i16 %i1 to i64
686 %x2 = sext i16 %i2 to i64
687 %x3 = sext i16 %i3 to i64
688 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
689 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
690 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
691 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3
695 define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
696 ; CHECK-LABEL: @loadext_8i16_to_8i32(
697 ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
698 ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
699 ; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
700 ; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4
701 ; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5
702 ; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6
703 ; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7
704 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
705 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
706 ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
707 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
708 ; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
709 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
710 ; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
711 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
712 ; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
713 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
714 ; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
715 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
716 ; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
717 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
718 ; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
719 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
720 ; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
721 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
722 ; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
723 ; CHECK-NEXT: ret <8 x i32> [[V7]]
725 %p1 = getelementptr inbounds i16, i16* %p0, i64 1
726 %p2 = getelementptr inbounds i16, i16* %p0, i64 2
727 %p3 = getelementptr inbounds i16, i16* %p0, i64 3
728 %p4 = getelementptr inbounds i16, i16* %p0, i64 4
729 %p5 = getelementptr inbounds i16, i16* %p0, i64 5
730 %p6 = getelementptr inbounds i16, i16* %p0, i64 6
731 %p7 = getelementptr inbounds i16, i16* %p0, i64 7
732 %i0 = load i16, i16* %p0, align 1
733 %i1 = load i16, i16* %p1, align 1
734 %i2 = load i16, i16* %p2, align 1
735 %i3 = load i16, i16* %p3, align 1
736 %i4 = load i16, i16* %p4, align 1
737 %i5 = load i16, i16* %p5, align 1
738 %i6 = load i16, i16* %p6, align 1
739 %i7 = load i16, i16* %p7, align 1
740 %x0 = sext i16 %i0 to i32
741 %x1 = sext i16 %i1 to i32
742 %x2 = sext i16 %i2 to i32
743 %x3 = sext i16 %i3 to i32
744 %x4 = sext i16 %i4 to i32
745 %x5 = sext i16 %i5 to i32
746 %x6 = sext i16 %i6 to i32
747 %x7 = sext i16 %i7 to i32
748 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
749 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1
750 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2
751 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3
752 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4
753 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5
754 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6
755 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7
763 define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
764 ; SSE2-LABEL: @loadext_2i32_to_2i64(
765 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
766 ; SSE2-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1
767 ; SSE2-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1
768 ; SSE2-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64
769 ; SSE2-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64
770 ; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
771 ; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
772 ; SSE2-NEXT: ret <2 x i64> [[V1]]
774 ; SLM-LABEL: @loadext_2i32_to_2i64(
775 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
776 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>*
777 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
778 ; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
779 ; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
780 ; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
781 ; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
782 ; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
783 ; SLM-NEXT: ret <2 x i64> [[V1]]
785 ; AVX-LABEL: @loadext_2i32_to_2i64(
786 ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
787 ; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>*
788 ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
789 ; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
790 ; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
791 ; AVX-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
792 ; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
793 ; AVX-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1
794 ; AVX-NEXT: ret <2 x i64> [[V1]]
796 %p1 = getelementptr inbounds i32, i32* %p0, i64 1
797 %i0 = load i32, i32* %p0, align 1
798 %i1 = load i32, i32* %p1, align 1
799 %x0 = sext i32 %i0 to i64
800 %x1 = sext i32 %i1 to i64
801 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
802 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
806 define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) {
807 ; SSE2-LABEL: @loadext_4i32_to_4i64(
808 ; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
809 ; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
810 ; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
811 ; SSE2-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1
812 ; SSE2-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1
813 ; SSE2-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1
814 ; SSE2-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1
815 ; SSE2-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64
816 ; SSE2-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64
817 ; SSE2-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64
818 ; SSE2-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64
819 ; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
820 ; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
821 ; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
822 ; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
823 ; SSE2-NEXT: ret <4 x i64> [[V3]]
825 ; SLM-LABEL: @loadext_4i32_to_4i64(
826 ; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
827 ; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
828 ; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
829 ; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>*
830 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
831 ; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
832 ; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
833 ; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
834 ; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
835 ; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
836 ; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
837 ; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
838 ; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
839 ; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
840 ; SLM-NEXT: ret <4 x i64> [[V3]]
842 ; AVX1-LABEL: @loadext_4i32_to_4i64(
843 ; AVX1-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
844 ; AVX1-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
845 ; AVX1-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
846 ; AVX1-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>*
847 ; AVX1-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
848 ; AVX1-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1
849 ; AVX1-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1
850 ; AVX1-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
851 ; AVX1-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64
852 ; AVX1-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64
853 ; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
854 ; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
855 ; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
856 ; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
857 ; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
858 ; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
859 ; AVX1-NEXT: ret <4 x i64> [[V3]]
861 ; AVX2-LABEL: @loadext_4i32_to_4i64(
862 ; AVX2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
863 ; AVX2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
864 ; AVX2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
865 ; AVX2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>*
866 ; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
867 ; AVX2-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
868 ; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
869 ; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
870 ; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
871 ; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
872 ; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
873 ; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
874 ; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
875 ; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
876 ; AVX2-NEXT: ret <4 x i64> [[V3]]
878 ; AVX512-LABEL: @loadext_4i32_to_4i64(
879 ; AVX512-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
880 ; AVX512-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
881 ; AVX512-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
882 ; AVX512-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>*
883 ; AVX512-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
884 ; AVX512-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
885 ; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
886 ; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
887 ; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
888 ; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
889 ; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
890 ; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
891 ; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
892 ; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
893 ; AVX512-NEXT: ret <4 x i64> [[V3]]
895 %p1 = getelementptr inbounds i32, i32* %p0, i64 1
896 %p2 = getelementptr inbounds i32, i32* %p0, i64 2
897 %p3 = getelementptr inbounds i32, i32* %p0, i64 3
898 %i0 = load i32, i32* %p0, align 1
899 %i1 = load i32, i32* %p1, align 1
900 %i2 = load i32, i32* %p2, align 1
901 %i3 = load i32, i32* %p3, align 1
902 %x0 = sext i32 %i0 to i64
903 %x1 = sext i32 %i1 to i64
904 %x2 = sext i32 %i2 to i64
905 %x3 = sext i32 %i3 to i64
906 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
907 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1
908 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2
909 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3