1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX1
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX1
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX2
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256DQ
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11 @src64 = common global [8 x i64] zeroinitializer, align 64
12 @src32 = common global [16 x i32] zeroinitializer, align 64
13 @src16 = common global [32 x i16] zeroinitializer, align 64
14 @src8 = common global [64 x i8] zeroinitializer, align 64
16 @dst64 = common global [8 x double] zeroinitializer, align 64
17 @dst32 = common global [16 x float] zeroinitializer, align 64
23 define void @uitofp_2i64_2f64() #0 {
24 ; CHECK-LABEL: @uitofp_2i64_2f64(
25 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
26 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
27 ; CHECK-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
28 ; CHECK-NEXT: ret void
30 %ld0 = load i64, ptr @src64, align 64
31 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
32 %cvt0 = uitofp i64 %ld0 to double
33 %cvt1 = uitofp i64 %ld1 to double
34 store double %cvt0, ptr @dst64, align 64
35 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
39 define void @uitofp_4i64_4f64() #0 {
40 ; SSE-LABEL: @uitofp_4i64_4f64(
41 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
42 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
43 ; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
44 ; SSE-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
45 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i64> [[TMP3]] to <2 x double>
46 ; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
49 ; AVX-LABEL: @uitofp_4i64_4f64(
50 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
51 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
52 ; AVX-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
55 %ld0 = load i64, ptr @src64, align 64
56 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
57 %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
58 %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
59 %cvt0 = uitofp i64 %ld0 to double
60 %cvt1 = uitofp i64 %ld1 to double
61 %cvt2 = uitofp i64 %ld2 to double
62 %cvt3 = uitofp i64 %ld3 to double
63 store double %cvt0, ptr @dst64, align 64
64 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
65 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
66 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
70 define void @uitofp_8i64_8f64() #0 {
71 ; SSE-LABEL: @uitofp_8i64_8f64(
72 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
73 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
74 ; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
75 ; SSE-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
76 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i64> [[TMP3]] to <2 x double>
77 ; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
78 ; SSE-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
79 ; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i64> [[TMP5]] to <2 x double>
80 ; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
81 ; SSE-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
82 ; SSE-NEXT: [[TMP8:%.*]] = uitofp <2 x i64> [[TMP7]] to <2 x double>
83 ; SSE-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
86 ; AVX256-LABEL: @uitofp_8i64_8f64(
87 ; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
88 ; AVX256-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
89 ; AVX256-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
90 ; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
91 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i64> [[TMP3]] to <4 x double>
92 ; AVX256-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
93 ; AVX256-NEXT: ret void
95 ; AVX512-LABEL: @uitofp_8i64_8f64(
96 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
97 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x double>
98 ; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64
99 ; AVX512-NEXT: ret void
101 %ld0 = load i64, ptr @src64, align 64
102 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
103 %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
104 %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
105 %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
106 %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
107 %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
108 %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
109 %cvt0 = uitofp i64 %ld0 to double
110 %cvt1 = uitofp i64 %ld1 to double
111 %cvt2 = uitofp i64 %ld2 to double
112 %cvt3 = uitofp i64 %ld3 to double
113 %cvt4 = uitofp i64 %ld4 to double
114 %cvt5 = uitofp i64 %ld5 to double
115 %cvt6 = uitofp i64 %ld6 to double
116 %cvt7 = uitofp i64 %ld7 to double
117 store double %cvt0, ptr @dst64, align 64
118 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
119 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
120 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
121 store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
122 store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
123 store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
124 store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
128 define void @uitofp_2i32_2f64() #0 {
129 ; SSE-LABEL: @uitofp_2i32_2f64(
130 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
131 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
132 ; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
135 ; AVX1-LABEL: @uitofp_2i32_2f64(
136 ; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 64
137 ; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
138 ; AVX1-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
139 ; AVX1-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
140 ; AVX1-NEXT: store double [[CVT0]], ptr @dst64, align 64
141 ; AVX1-NEXT: store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
142 ; AVX1-NEXT: ret void
144 ; AVX2-LABEL: @uitofp_2i32_2f64(
145 ; AVX2-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
146 ; AVX2-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
147 ; AVX2-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
148 ; AVX2-NEXT: ret void
150 ; AVX512-LABEL: @uitofp_2i32_2f64(
151 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
152 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
153 ; AVX512-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
154 ; AVX512-NEXT: ret void
156 ; AVX256DQ-LABEL: @uitofp_2i32_2f64(
157 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
158 ; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
159 ; AVX256DQ-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
160 ; AVX256DQ-NEXT: ret void
162 %ld0 = load i32, ptr @src32, align 64
163 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
164 %cvt0 = uitofp i32 %ld0 to double
165 %cvt1 = uitofp i32 %ld1 to double
166 store double %cvt0, ptr @dst64, align 64
167 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
171 define void @uitofp_4i32_4f64() #0 {
172 ; SSE-LABEL: @uitofp_4i32_4f64(
173 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
174 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
175 ; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
176 ; SSE-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
177 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i32> [[TMP3]] to <2 x double>
178 ; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
181 ; AVX-LABEL: @uitofp_4i32_4f64(
182 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
183 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double>
184 ; AVX-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
187 %ld0 = load i32, ptr @src32, align 64
188 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
189 %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
190 %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
191 %cvt0 = uitofp i32 %ld0 to double
192 %cvt1 = uitofp i32 %ld1 to double
193 %cvt2 = uitofp i32 %ld2 to double
194 %cvt3 = uitofp i32 %ld3 to double
195 store double %cvt0, ptr @dst64, align 64
196 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
197 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
198 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
202 define void @uitofp_8i32_8f64() #0 {
203 ; SSE-LABEL: @uitofp_8i32_8f64(
204 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
205 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
206 ; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
207 ; SSE-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
208 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i32> [[TMP3]] to <2 x double>
209 ; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
210 ; SSE-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
211 ; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i32> [[TMP5]] to <2 x double>
212 ; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
213 ; SSE-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
214 ; SSE-NEXT: [[TMP8:%.*]] = uitofp <2 x i32> [[TMP7]] to <2 x double>
215 ; SSE-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
218 ; AVX256-LABEL: @uitofp_8i32_8f64(
219 ; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
220 ; AVX256-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double>
221 ; AVX256-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
222 ; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
223 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x double>
224 ; AVX256-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
225 ; AVX256-NEXT: ret void
227 ; AVX512-LABEL: @uitofp_8i32_8f64(
228 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
229 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x double>
230 ; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64
231 ; AVX512-NEXT: ret void
233 %ld0 = load i32, ptr @src32, align 64
234 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
235 %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
236 %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
237 %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
238 %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
239 %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
240 %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
241 %cvt0 = uitofp i32 %ld0 to double
242 %cvt1 = uitofp i32 %ld1 to double
243 %cvt2 = uitofp i32 %ld2 to double
244 %cvt3 = uitofp i32 %ld3 to double
245 %cvt4 = uitofp i32 %ld4 to double
246 %cvt5 = uitofp i32 %ld5 to double
247 %cvt6 = uitofp i32 %ld6 to double
248 %cvt7 = uitofp i32 %ld7 to double
249 store double %cvt0, ptr @dst64, align 64
250 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
251 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
252 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
253 store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
254 store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
255 store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
256 store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
260 define void @uitofp_2i16_2f64() #0 {
261 ; CHECK-LABEL: @uitofp_2i16_2f64(
262 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
263 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double>
264 ; CHECK-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
265 ; CHECK-NEXT: ret void
267 %ld0 = load i16, ptr @src16, align 64
268 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
269 %cvt0 = uitofp i16 %ld0 to double
270 %cvt1 = uitofp i16 %ld1 to double
271 store double %cvt0, ptr @dst64, align 64
272 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
276 define void @uitofp_4i16_4f64() #0 {
277 ; SSE-LABEL: @uitofp_4i16_4f64(
278 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
279 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double>
280 ; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
281 ; SSE-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
282 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double>
283 ; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
286 ; AVX-LABEL: @uitofp_4i16_4f64(
287 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
288 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double>
289 ; AVX-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
292 %ld0 = load i16, ptr @src16, align 64
293 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
294 %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
295 %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
296 %cvt0 = uitofp i16 %ld0 to double
297 %cvt1 = uitofp i16 %ld1 to double
298 %cvt2 = uitofp i16 %ld2 to double
299 %cvt3 = uitofp i16 %ld3 to double
300 store double %cvt0, ptr @dst64, align 64
301 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
302 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
303 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
307 define void @uitofp_8i16_8f64() #0 {
308 ; SSE-LABEL: @uitofp_8i16_8f64(
309 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
310 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double>
311 ; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
312 ; SSE-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
313 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double>
314 ; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
315 ; SSE-NEXT: [[TMP5:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
316 ; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP5]] to <2 x double>
317 ; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
318 ; SSE-NEXT: [[TMP7:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
319 ; SSE-NEXT: [[TMP8:%.*]] = uitofp <2 x i16> [[TMP7]] to <2 x double>
320 ; SSE-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
323 ; AVX256-LABEL: @uitofp_8i16_8f64(
324 ; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
325 ; AVX256-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double>
326 ; AVX256-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
327 ; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
328 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x double>
329 ; AVX256-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
330 ; AVX256-NEXT: ret void
332 ; AVX512-LABEL: @uitofp_8i16_8f64(
333 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
334 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x double>
335 ; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64
336 ; AVX512-NEXT: ret void
338 %ld0 = load i16, ptr @src16, align 64
339 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
340 %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
341 %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
342 %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
343 %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
344 %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
345 %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
346 %cvt0 = uitofp i16 %ld0 to double
347 %cvt1 = uitofp i16 %ld1 to double
348 %cvt2 = uitofp i16 %ld2 to double
349 %cvt3 = uitofp i16 %ld3 to double
350 %cvt4 = uitofp i16 %ld4 to double
351 %cvt5 = uitofp i16 %ld5 to double
352 %cvt6 = uitofp i16 %ld6 to double
353 %cvt7 = uitofp i16 %ld7 to double
354 store double %cvt0, ptr @dst64, align 64
355 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
356 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
357 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
358 store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
359 store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
360 store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
361 store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
365 define void @uitofp_2i8_2f64() #0 {
366 ; CHECK-LABEL: @uitofp_2i8_2f64(
367 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
368 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
369 ; CHECK-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
370 ; CHECK-NEXT: ret void
372 %ld0 = load i8, ptr @src8, align 64
373 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
374 %cvt0 = uitofp i8 %ld0 to double
375 %cvt1 = uitofp i8 %ld1 to double
376 store double %cvt0, ptr @dst64, align 64
377 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
381 define void @uitofp_4i8_4f64() #0 {
382 ; SSE-LABEL: @uitofp_4i8_4f64(
383 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
384 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
385 ; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
386 ; SSE-NEXT: [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
387 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i8> [[TMP3]] to <2 x double>
388 ; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
391 ; AVX-LABEL: @uitofp_4i8_4f64(
392 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
393 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double>
394 ; AVX-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
397 %ld0 = load i8, ptr @src8, align 64
398 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
399 %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
400 %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
401 %cvt0 = uitofp i8 %ld0 to double
402 %cvt1 = uitofp i8 %ld1 to double
403 %cvt2 = uitofp i8 %ld2 to double
404 %cvt3 = uitofp i8 %ld3 to double
405 store double %cvt0, ptr @dst64, align 64
406 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
407 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
408 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
412 define void @uitofp_8i8_8f64() #0 {
413 ; SSE-LABEL: @uitofp_8i8_8f64(
414 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
415 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
416 ; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
417 ; SSE-NEXT: [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
418 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i8> [[TMP3]] to <2 x double>
419 ; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
420 ; SSE-NEXT: [[TMP5:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
421 ; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i8> [[TMP5]] to <2 x double>
422 ; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
423 ; SSE-NEXT: [[TMP7:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
424 ; SSE-NEXT: [[TMP8:%.*]] = uitofp <2 x i8> [[TMP7]] to <2 x double>
425 ; SSE-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
428 ; AVX256-LABEL: @uitofp_8i8_8f64(
429 ; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
430 ; AVX256-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double>
431 ; AVX256-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
432 ; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
433 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x double>
434 ; AVX256-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
435 ; AVX256-NEXT: ret void
437 ; AVX512-LABEL: @uitofp_8i8_8f64(
438 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
439 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x double>
440 ; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64
441 ; AVX512-NEXT: ret void
443 %ld0 = load i8, ptr @src8, align 64
444 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
445 %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
446 %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
447 %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
448 %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
449 %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
450 %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
451 %cvt0 = uitofp i8 %ld0 to double
452 %cvt1 = uitofp i8 %ld1 to double
453 %cvt2 = uitofp i8 %ld2 to double
454 %cvt3 = uitofp i8 %ld3 to double
455 %cvt4 = uitofp i8 %ld4 to double
456 %cvt5 = uitofp i8 %ld5 to double
457 %cvt6 = uitofp i8 %ld6 to double
458 %cvt7 = uitofp i8 %ld7 to double
459 store double %cvt0, ptr @dst64, align 64
460 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
461 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
462 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
463 store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
464 store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
465 store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
466 store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
474 define void @uitofp_2i64_2f32() #0 {
475 ; SSE-LABEL: @uitofp_2i64_2f32(
476 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64
477 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
478 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
479 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
480 ; SSE-NEXT: store float [[CVT0]], ptr @dst32, align 64
481 ; SSE-NEXT: store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
484 ; AVX1-LABEL: @uitofp_2i64_2f32(
485 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64
486 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
487 ; AVX1-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
488 ; AVX1-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
489 ; AVX1-NEXT: store float [[CVT0]], ptr @dst32, align 64
490 ; AVX1-NEXT: store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
491 ; AVX1-NEXT: ret void
493 ; AVX2-LABEL: @uitofp_2i64_2f32(
494 ; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64
495 ; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
496 ; AVX2-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
497 ; AVX2-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
498 ; AVX2-NEXT: store float [[CVT0]], ptr @dst32, align 64
499 ; AVX2-NEXT: store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
500 ; AVX2-NEXT: ret void
502 ; AVX512-LABEL: @uitofp_2i64_2f32(
503 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
504 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x float>
505 ; AVX512-NEXT: store <2 x float> [[TMP2]], ptr @dst32, align 64
506 ; AVX512-NEXT: ret void
508 ; AVX256DQ-LABEL: @uitofp_2i64_2f32(
509 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
510 ; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x float>
511 ; AVX256DQ-NEXT: store <2 x float> [[TMP2]], ptr @dst32, align 64
512 ; AVX256DQ-NEXT: ret void
514 %ld0 = load i64, ptr @src64, align 64
515 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
516 %cvt0 = uitofp i64 %ld0 to float
517 %cvt1 = uitofp i64 %ld1 to float
518 store float %cvt0, ptr @dst32, align 64
519 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
523 define void @uitofp_4i64_4f32() #0 {
524 ; CHECK-LABEL: @uitofp_4i64_4f32(
525 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
526 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float>
527 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
528 ; CHECK-NEXT: ret void
530 %ld0 = load i64, ptr @src64, align 64
531 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
532 %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
533 %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
534 %cvt0 = uitofp i64 %ld0 to float
535 %cvt1 = uitofp i64 %ld1 to float
536 %cvt2 = uitofp i64 %ld2 to float
537 %cvt3 = uitofp i64 %ld3 to float
538 store float %cvt0, ptr @dst32, align 64
539 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
540 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
541 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
545 define void @uitofp_8i64_8f32() #0 {
546 ; SSE-LABEL: @uitofp_8i64_8f32(
547 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
548 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float>
549 ; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
550 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
551 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i64> [[TMP3]] to <4 x float>
552 ; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
555 ; AVX-LABEL: @uitofp_8i64_8f32(
556 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
557 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x float>
558 ; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
561 %ld0 = load i64, ptr @src64, align 64
562 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
563 %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
564 %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
565 %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
566 %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
567 %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
568 %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
569 %cvt0 = uitofp i64 %ld0 to float
570 %cvt1 = uitofp i64 %ld1 to float
571 %cvt2 = uitofp i64 %ld2 to float
572 %cvt3 = uitofp i64 %ld3 to float
573 %cvt4 = uitofp i64 %ld4 to float
574 %cvt5 = uitofp i64 %ld5 to float
575 %cvt6 = uitofp i64 %ld6 to float
576 %cvt7 = uitofp i64 %ld7 to float
577 store float %cvt0, ptr @dst32, align 64
578 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
579 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
580 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
581 store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
582 store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
583 store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
584 store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
588 define void @uitofp_4i32_4f32() #0 {
589 ; CHECK-LABEL: @uitofp_4i32_4f32(
590 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
591 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
592 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
593 ; CHECK-NEXT: ret void
595 %ld0 = load i32, ptr @src32, align 64
596 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
597 %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
598 %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
599 %cvt0 = uitofp i32 %ld0 to float
600 %cvt1 = uitofp i32 %ld1 to float
601 %cvt2 = uitofp i32 %ld2 to float
602 %cvt3 = uitofp i32 %ld3 to float
603 store float %cvt0, ptr @dst32, align 64
604 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
605 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
606 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
610 define void @uitofp_8i32_8f32() #0 {
611 ; SSE-LABEL: @uitofp_8i32_8f32(
612 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
613 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
614 ; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
615 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
616 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float>
617 ; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
620 ; AVX-LABEL: @uitofp_8i32_8f32(
621 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
622 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float>
623 ; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
626 %ld0 = load i32, ptr @src32, align 64
627 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
628 %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
629 %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
630 %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
631 %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
632 %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
633 %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
634 %cvt0 = uitofp i32 %ld0 to float
635 %cvt1 = uitofp i32 %ld1 to float
636 %cvt2 = uitofp i32 %ld2 to float
637 %cvt3 = uitofp i32 %ld3 to float
638 %cvt4 = uitofp i32 %ld4 to float
639 %cvt5 = uitofp i32 %ld5 to float
640 %cvt6 = uitofp i32 %ld6 to float
641 %cvt7 = uitofp i32 %ld7 to float
642 store float %cvt0, ptr @dst32, align 64
643 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
644 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
645 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
646 store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
647 store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
648 store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
649 store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
653 define void @uitofp_16i32_16f32() #0 {
654 ; SSE-LABEL: @uitofp_16i32_16f32(
655 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
656 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
657 ; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
658 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
659 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float>
660 ; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
661 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
662 ; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i32> [[TMP5]] to <4 x float>
663 ; SSE-NEXT: store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
664 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
665 ; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i32> [[TMP7]] to <4 x float>
666 ; SSE-NEXT: store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
669 ; AVX256-LABEL: @uitofp_16i32_16f32(
670 ; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
671 ; AVX256-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float>
672 ; AVX256-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
673 ; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
674 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i32> [[TMP3]] to <8 x float>
675 ; AVX256-NEXT: store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
676 ; AVX256-NEXT: ret void
678 ; AVX512-LABEL: @uitofp_16i32_16f32(
679 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @src32, align 64
680 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i32> [[TMP1]] to <16 x float>
681 ; AVX512-NEXT: store <16 x float> [[TMP2]], ptr @dst32, align 64
682 ; AVX512-NEXT: ret void
684 %ld0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 0 ), align 64
685 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1 ), align 4
686 %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2 ), align 8
687 %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3 ), align 4
688 %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4 ), align 16
689 %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5 ), align 4
690 %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6 ), align 8
691 %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7 ), align 4
692 %ld8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8 ), align 32
693 %ld9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 9 ), align 4
694 %ld10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 10), align 8
695 %ld11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 11), align 4
696 %ld12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
697 %ld13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 13), align 4
698 %ld14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 14), align 8
699 %ld15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 15), align 4
700 %cvt0 = uitofp i32 %ld0 to float
701 %cvt1 = uitofp i32 %ld1 to float
702 %cvt2 = uitofp i32 %ld2 to float
703 %cvt3 = uitofp i32 %ld3 to float
704 %cvt4 = uitofp i32 %ld4 to float
705 %cvt5 = uitofp i32 %ld5 to float
706 %cvt6 = uitofp i32 %ld6 to float
707 %cvt7 = uitofp i32 %ld7 to float
708 %cvt8 = uitofp i32 %ld8 to float
709 %cvt9 = uitofp i32 %ld9 to float
710 %cvt10 = uitofp i32 %ld10 to float
711 %cvt11 = uitofp i32 %ld11 to float
712 %cvt12 = uitofp i32 %ld12 to float
713 %cvt13 = uitofp i32 %ld13 to float
714 %cvt14 = uitofp i32 %ld14 to float
715 %cvt15 = uitofp i32 %ld15 to float
716 store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
717 store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
718 store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
719 store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
720 store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
721 store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
722 store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
723 store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
724 store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
725 store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
726 store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
727 store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
728 store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
729 store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
730 store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
731 store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
735 define void @uitofp_4i16_4f32() #0 {
736 ; CHECK-LABEL: @uitofp_4i16_4f32(
737 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
738 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
739 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
740 ; CHECK-NEXT: ret void
742 %ld0 = load i16, ptr @src16, align 64
743 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
744 %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
745 %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
746 %cvt0 = uitofp i16 %ld0 to float
747 %cvt1 = uitofp i16 %ld1 to float
748 %cvt2 = uitofp i16 %ld2 to float
749 %cvt3 = uitofp i16 %ld3 to float
750 store float %cvt0, ptr @dst32, align 64
751 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
752 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
753 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
757 define void @uitofp_8i16_8f32() #0 {
758 ; SSE-LABEL: @uitofp_8i16_8f32(
759 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
760 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
761 ; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
762 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
763 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x float>
764 ; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
767 ; AVX-LABEL: @uitofp_8i16_8f32(
768 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
769 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float>
770 ; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
773 %ld0 = load i16, ptr @src16, align 64
774 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
775 %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
776 %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
777 %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
778 %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
779 %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
780 %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
781 %cvt0 = uitofp i16 %ld0 to float
782 %cvt1 = uitofp i16 %ld1 to float
783 %cvt2 = uitofp i16 %ld2 to float
784 %cvt3 = uitofp i16 %ld3 to float
785 %cvt4 = uitofp i16 %ld4 to float
786 %cvt5 = uitofp i16 %ld5 to float
787 %cvt6 = uitofp i16 %ld6 to float
788 %cvt7 = uitofp i16 %ld7 to float
789 store float %cvt0, ptr @dst32, align 64
790 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
791 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
792 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
793 store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
794 store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
795 store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
796 store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
800 define void @uitofp_16i16_16f32() #0 {
801 ; SSE-LABEL: @uitofp_16i16_16f32(
802 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
803 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
804 ; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
805 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
806 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x float>
807 ; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
808 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
809 ; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i16> [[TMP5]] to <4 x float>
810 ; SSE-NEXT: store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
811 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
812 ; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i16> [[TMP7]] to <4 x float>
813 ; SSE-NEXT: store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
816 ; AVX256-LABEL: @uitofp_16i16_16f32(
817 ; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
818 ; AVX256-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float>
819 ; AVX256-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
820 ; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
821 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i16> [[TMP3]] to <8 x float>
822 ; AVX256-NEXT: store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
823 ; AVX256-NEXT: ret void
825 ; AVX512-LABEL: @uitofp_16i16_16f32(
826 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 64
827 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i16> [[TMP1]] to <16 x float>
828 ; AVX512-NEXT: store <16 x float> [[TMP2]], ptr @dst32, align 64
829 ; AVX512-NEXT: ret void
831 %ld0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 0 ), align 64
832 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1 ), align 2
833 %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2 ), align 4
834 %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3 ), align 2
835 %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4 ), align 8
836 %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5 ), align 2
837 %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6 ), align 4
838 %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7 ), align 2
839 %ld8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8 ), align 16
840 %ld9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 9 ), align 2
841 %ld10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 10), align 4
842 %ld11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 11), align 2
843 %ld12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
844 %ld13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 13), align 2
845 %ld14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 14), align 4
846 %ld15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 15), align 2
847 %cvt0 = uitofp i16 %ld0 to float
848 %cvt1 = uitofp i16 %ld1 to float
849 %cvt2 = uitofp i16 %ld2 to float
850 %cvt3 = uitofp i16 %ld3 to float
851 %cvt4 = uitofp i16 %ld4 to float
852 %cvt5 = uitofp i16 %ld5 to float
853 %cvt6 = uitofp i16 %ld6 to float
854 %cvt7 = uitofp i16 %ld7 to float
855 %cvt8 = uitofp i16 %ld8 to float
856 %cvt9 = uitofp i16 %ld9 to float
857 %cvt10 = uitofp i16 %ld10 to float
858 %cvt11 = uitofp i16 %ld11 to float
859 %cvt12 = uitofp i16 %ld12 to float
860 %cvt13 = uitofp i16 %ld13 to float
861 %cvt14 = uitofp i16 %ld14 to float
862 %cvt15 = uitofp i16 %ld15 to float
863 store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
864 store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
865 store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
866 store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
867 store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
868 store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
869 store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
870 store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
871 store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
872 store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
873 store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
874 store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
875 store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
876 store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
877 store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
878 store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
882 define void @uitofp_4i8_4f32() #0 {
883 ; CHECK-LABEL: @uitofp_4i8_4f32(
884 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
885 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
886 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
887 ; CHECK-NEXT: ret void
889 %ld0 = load i8, ptr @src8, align 64
890 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
891 %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
892 %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
893 %cvt0 = uitofp i8 %ld0 to float
894 %cvt1 = uitofp i8 %ld1 to float
895 %cvt2 = uitofp i8 %ld2 to float
896 %cvt3 = uitofp i8 %ld3 to float
897 store float %cvt0, ptr @dst32, align 64
898 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
899 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
900 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
904 define void @uitofp_8i8_8f32() #0 {
905 ; SSE-LABEL: @uitofp_8i8_8f32(
906 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
907 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
908 ; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
909 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
910 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x float>
911 ; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
914 ; AVX-LABEL: @uitofp_8i8_8f32(
915 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
916 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float>
917 ; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
920 %ld0 = load i8, ptr @src8, align 64
921 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
922 %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
923 %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
924 %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
925 %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
926 %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
927 %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
928 %cvt0 = uitofp i8 %ld0 to float
929 %cvt1 = uitofp i8 %ld1 to float
930 %cvt2 = uitofp i8 %ld2 to float
931 %cvt3 = uitofp i8 %ld3 to float
932 %cvt4 = uitofp i8 %ld4 to float
933 %cvt5 = uitofp i8 %ld5 to float
934 %cvt6 = uitofp i8 %ld6 to float
935 %cvt7 = uitofp i8 %ld7 to float
936 store float %cvt0, ptr @dst32, align 64
937 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
938 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
939 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
940 store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
941 store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
942 store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
943 store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
947 define void @uitofp_16i8_16f32() #0 {
948 ; SSE-LABEL: @uitofp_16i8_16f32(
949 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
950 ; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
951 ; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
952 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
953 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x float>
954 ; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
955 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
956 ; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i8> [[TMP5]] to <4 x float>
957 ; SSE-NEXT: store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
958 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
959 ; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i8> [[TMP7]] to <4 x float>
960 ; SSE-NEXT: store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
963 ; AVX256-LABEL: @uitofp_16i8_16f32(
964 ; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
965 ; AVX256-NEXT: [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float>
966 ; AVX256-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
967 ; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
968 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i8> [[TMP3]] to <8 x float>
969 ; AVX256-NEXT: store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
970 ; AVX256-NEXT: ret void
972 ; AVX512-LABEL: @uitofp_16i8_16f32(
973 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 64
974 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i8> [[TMP1]] to <16 x float>
975 ; AVX512-NEXT: store <16 x float> [[TMP2]], ptr @dst32, align 64
976 ; AVX512-NEXT: ret void
978 %ld0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 0 ), align 64
979 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1 ), align 1
980 %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2 ), align 2
981 %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3 ), align 1
982 %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4 ), align 4
983 %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5 ), align 1
984 %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6 ), align 2
985 %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7 ), align 1
986 %ld8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8 ), align 8
987 %ld9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 9 ), align 1
988 %ld10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 10), align 2
989 %ld11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 11), align 1
990 %ld12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
991 %ld13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 13), align 1
992 %ld14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 14), align 2
993 %ld15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 15), align 1
994 %cvt0 = uitofp i8 %ld0 to float
995 %cvt1 = uitofp i8 %ld1 to float
996 %cvt2 = uitofp i8 %ld2 to float
997 %cvt3 = uitofp i8 %ld3 to float
998 %cvt4 = uitofp i8 %ld4 to float
999 %cvt5 = uitofp i8 %ld5 to float
1000 %cvt6 = uitofp i8 %ld6 to float
1001 %cvt7 = uitofp i8 %ld7 to float
1002 %cvt8 = uitofp i8 %ld8 to float
1003 %cvt9 = uitofp i8 %ld9 to float
1004 %cvt10 = uitofp i8 %ld10 to float
1005 %cvt11 = uitofp i8 %ld11 to float
1006 %cvt12 = uitofp i8 %ld12 to float
1007 %cvt13 = uitofp i8 %ld13 to float
1008 %cvt14 = uitofp i8 %ld14 to float
1009 %cvt15 = uitofp i8 %ld15 to float
1010 store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
1011 store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
1012 store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
1013 store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
1014 store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
1015 store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
1016 store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
1017 store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
1018 store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
1019 store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
1020 store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
1021 store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
1022 store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
1023 store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
1024 store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
1025 store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
1029 attributes #0 = { nounwind }