1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256DQ
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11 @src64 = common global [8 x i64] zeroinitializer, align 64
12 @src32 = common global [16 x i32] zeroinitializer, align 64
13 @src16 = common global [32 x i16] zeroinitializer, align 64
14 @src8 = common global [64 x i8] zeroinitializer, align 64
16 @dst64 = common global [8 x double] zeroinitializer, align 64
17 @dst32 = common global [16 x float] zeroinitializer, align 64
23 define void @uitofp_2i64_2f64() #0 {
24 ; CHECK-LABEL: @uitofp_2i64_2f64(
25 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
26 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
27 ; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
28 ; CHECK-NEXT: ret void
30 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
31 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
32 %cvt0 = uitofp i64 %ld0 to double
33 %cvt1 = uitofp i64 %ld1 to double
34 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
35 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
39 define void @uitofp_4i64_4f64() #0 {
40 ; SSE-LABEL: @uitofp_4i64_4f64(
41 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
42 ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2) to <2 x i64>*), align 16
43 ; SSE-NEXT: [[TMP3:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
44 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i64> [[TMP2]] to <2 x double>
45 ; SSE-NEXT: store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
46 ; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
49 ; AVX-LABEL: @uitofp_4i64_4f64(
50 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
51 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
52 ; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
55 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
56 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
57 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
58 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
59 %cvt0 = uitofp i64 %ld0 to double
60 %cvt1 = uitofp i64 %ld1 to double
61 %cvt2 = uitofp i64 %ld2 to double
62 %cvt3 = uitofp i64 %ld3 to double
63 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
64 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
65 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
66 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
70 define void @uitofp_8i64_8f64() #0 {
71 ; SSE-LABEL: @uitofp_8i64_8f64(
72 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
73 ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2) to <2 x i64>*), align 16
74 ; SSE-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <2 x i64>*), align 32
75 ; SSE-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6) to <2 x i64>*), align 16
76 ; SSE-NEXT: [[TMP5:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
77 ; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i64> [[TMP2]] to <2 x double>
78 ; SSE-NEXT: [[TMP7:%.*]] = uitofp <2 x i64> [[TMP3]] to <2 x double>
79 ; SSE-NEXT: [[TMP8:%.*]] = uitofp <2 x i64> [[TMP4]] to <2 x double>
80 ; SSE-NEXT: store <2 x double> [[TMP5]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
81 ; SSE-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
82 ; SSE-NEXT: store <2 x double> [[TMP7]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
83 ; SSE-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
86 ; AVX256-LABEL: @uitofp_8i64_8f64(
87 ; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
88 ; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
89 ; AVX256-NEXT: [[TMP3:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
90 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i64> [[TMP2]] to <4 x double>
91 ; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
92 ; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
93 ; AVX256-NEXT: ret void
95 ; AVX512-LABEL: @uitofp_8i64_8f64(
96 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
97 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x double>
98 ; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
99 ; AVX512-NEXT: ret void
101 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
102 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
103 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
104 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
105 %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
106 %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
107 %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
108 %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
109 %cvt0 = uitofp i64 %ld0 to double
110 %cvt1 = uitofp i64 %ld1 to double
111 %cvt2 = uitofp i64 %ld2 to double
112 %cvt3 = uitofp i64 %ld3 to double
113 %cvt4 = uitofp i64 %ld4 to double
114 %cvt5 = uitofp i64 %ld5 to double
115 %cvt6 = uitofp i64 %ld6 to double
116 %cvt7 = uitofp i64 %ld7 to double
117 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
118 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
119 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
120 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
121 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
122 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
123 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
124 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
128 define void @uitofp_2i32_2f64() #0 {
129 ; SSE-LABEL: @uitofp_2i32_2f64(
130 ; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
131 ; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
132 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
133 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
134 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
135 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
138 ; AVX256NODQ-LABEL: @uitofp_2i32_2f64(
139 ; AVX256NODQ-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
140 ; AVX256NODQ-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
141 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
142 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
143 ; AVX256NODQ-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
144 ; AVX256NODQ-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
145 ; AVX256NODQ-NEXT: ret void
147 ; AVX512-LABEL: @uitofp_2i32_2f64(
148 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
149 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
150 ; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
151 ; AVX512-NEXT: ret void
153 ; AVX256DQ-LABEL: @uitofp_2i32_2f64(
154 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
155 ; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
156 ; AVX256DQ-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
157 ; AVX256DQ-NEXT: ret void
159 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
160 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
161 %cvt0 = uitofp i32 %ld0 to double
162 %cvt1 = uitofp i32 %ld1 to double
163 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
164 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
168 define void @uitofp_4i32_4f64() #0 {
169 ; SSE-LABEL: @uitofp_4i32_4f64(
170 ; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
171 ; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
172 ; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
173 ; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
174 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
175 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
176 ; SSE-NEXT: [[CVT2:%.*]] = uitofp i32 [[LD2]] to double
177 ; SSE-NEXT: [[CVT3:%.*]] = uitofp i32 [[LD3]] to double
178 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
179 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
180 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
181 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
184 ; AVX-LABEL: @uitofp_4i32_4f64(
185 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
186 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double>
187 ; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
190 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
191 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
192 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
193 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
194 %cvt0 = uitofp i32 %ld0 to double
195 %cvt1 = uitofp i32 %ld1 to double
196 %cvt2 = uitofp i32 %ld2 to double
197 %cvt3 = uitofp i32 %ld3 to double
198 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
199 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
200 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
201 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
205 define void @uitofp_8i32_8f64() #0 {
206 ; SSE-LABEL: @uitofp_8i32_8f64(
207 ; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
208 ; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
209 ; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
210 ; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
211 ; SSE-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
212 ; SSE-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
213 ; SSE-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
214 ; SSE-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
215 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
216 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
217 ; SSE-NEXT: [[CVT2:%.*]] = uitofp i32 [[LD2]] to double
218 ; SSE-NEXT: [[CVT3:%.*]] = uitofp i32 [[LD3]] to double
219 ; SSE-NEXT: [[CVT4:%.*]] = uitofp i32 [[LD4]] to double
220 ; SSE-NEXT: [[CVT5:%.*]] = uitofp i32 [[LD5]] to double
221 ; SSE-NEXT: [[CVT6:%.*]] = uitofp i32 [[LD6]] to double
222 ; SSE-NEXT: [[CVT7:%.*]] = uitofp i32 [[LD7]] to double
223 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
224 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
225 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
226 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
227 ; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
228 ; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
229 ; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
230 ; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
233 ; AVX256-LABEL: @uitofp_8i32_8f64(
234 ; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
235 ; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
236 ; AVX256-NEXT: [[TMP3:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double>
237 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP2]] to <4 x double>
238 ; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
239 ; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
240 ; AVX256-NEXT: ret void
242 ; AVX512-LABEL: @uitofp_8i32_8f64(
243 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
244 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x double>
245 ; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
246 ; AVX512-NEXT: ret void
248 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
249 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
250 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
251 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
252 %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
253 %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
254 %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
255 %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
256 %cvt0 = uitofp i32 %ld0 to double
257 %cvt1 = uitofp i32 %ld1 to double
258 %cvt2 = uitofp i32 %ld2 to double
259 %cvt3 = uitofp i32 %ld3 to double
260 %cvt4 = uitofp i32 %ld4 to double
261 %cvt5 = uitofp i32 %ld5 to double
262 %cvt6 = uitofp i32 %ld6 to double
263 %cvt7 = uitofp i32 %ld7 to double
264 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
265 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
266 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
267 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
268 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
269 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
270 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
271 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
275 define void @uitofp_2i16_2f64() #0 {
276 ; CHECK-LABEL: @uitofp_2i16_2f64(
277 ; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
278 ; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
279 ; CHECK-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to double
280 ; CHECK-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to double
281 ; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
282 ; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
283 ; CHECK-NEXT: ret void
285 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
286 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
287 %cvt0 = uitofp i16 %ld0 to double
288 %cvt1 = uitofp i16 %ld1 to double
289 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
290 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
294 define void @uitofp_4i16_4f64() #0 {
295 ; SSE-LABEL: @uitofp_4i16_4f64(
296 ; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
297 ; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
298 ; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
299 ; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
300 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to double
301 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to double
302 ; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to double
303 ; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to double
304 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
305 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
306 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
307 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
310 ; AVX-LABEL: @uitofp_4i16_4f64(
311 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
312 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double>
313 ; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
316 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
317 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
318 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
319 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
320 %cvt0 = uitofp i16 %ld0 to double
321 %cvt1 = uitofp i16 %ld1 to double
322 %cvt2 = uitofp i16 %ld2 to double
323 %cvt3 = uitofp i16 %ld3 to double
324 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
325 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
326 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
327 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
331 define void @uitofp_8i16_8f64() #0 {
332 ; SSE-LABEL: @uitofp_8i16_8f64(
333 ; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
334 ; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
335 ; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
336 ; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
337 ; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
338 ; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
339 ; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
340 ; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
341 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to double
342 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to double
343 ; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to double
344 ; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to double
345 ; SSE-NEXT: [[CVT4:%.*]] = uitofp i16 [[LD4]] to double
346 ; SSE-NEXT: [[CVT5:%.*]] = uitofp i16 [[LD5]] to double
347 ; SSE-NEXT: [[CVT6:%.*]] = uitofp i16 [[LD6]] to double
348 ; SSE-NEXT: [[CVT7:%.*]] = uitofp i16 [[LD7]] to double
349 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
350 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
351 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
352 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
353 ; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
354 ; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
355 ; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
356 ; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
359 ; AVX256-LABEL: @uitofp_8i16_8f64(
360 ; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
361 ; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
362 ; AVX256-NEXT: [[TMP3:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double>
363 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i16> [[TMP2]] to <4 x double>
364 ; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
365 ; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
366 ; AVX256-NEXT: ret void
368 ; AVX512-LABEL: @uitofp_8i16_8f64(
369 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
370 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x double>
371 ; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
372 ; AVX512-NEXT: ret void
374 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
375 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
376 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
377 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
378 %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
379 %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
380 %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
381 %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
382 %cvt0 = uitofp i16 %ld0 to double
383 %cvt1 = uitofp i16 %ld1 to double
384 %cvt2 = uitofp i16 %ld2 to double
385 %cvt3 = uitofp i16 %ld3 to double
386 %cvt4 = uitofp i16 %ld4 to double
387 %cvt5 = uitofp i16 %ld5 to double
388 %cvt6 = uitofp i16 %ld6 to double
389 %cvt7 = uitofp i16 %ld7 to double
390 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
391 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
392 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
393 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
394 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
395 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
396 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
397 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
401 define void @uitofp_2i8_2f64() #0 {
402 ; SSE-LABEL: @uitofp_2i8_2f64(
403 ; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
404 ; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
405 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double
406 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double
407 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
408 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
411 ; AVX256NODQ-LABEL: @uitofp_2i8_2f64(
412 ; AVX256NODQ-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
413 ; AVX256NODQ-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
414 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double
415 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double
416 ; AVX256NODQ-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
417 ; AVX256NODQ-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
418 ; AVX256NODQ-NEXT: ret void
420 ; AVX512-LABEL: @uitofp_2i8_2f64(
421 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
422 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
423 ; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
424 ; AVX512-NEXT: ret void
426 ; AVX256DQ-LABEL: @uitofp_2i8_2f64(
427 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
428 ; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
429 ; AVX256DQ-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
430 ; AVX256DQ-NEXT: ret void
432 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
433 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
434 %cvt0 = uitofp i8 %ld0 to double
435 %cvt1 = uitofp i8 %ld1 to double
436 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
437 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
441 define void @uitofp_4i8_4f64() #0 {
442 ; SSE-LABEL: @uitofp_4i8_4f64(
443 ; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
444 ; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
445 ; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
446 ; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
447 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double
448 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double
449 ; SSE-NEXT: [[CVT2:%.*]] = uitofp i8 [[LD2]] to double
450 ; SSE-NEXT: [[CVT3:%.*]] = uitofp i8 [[LD3]] to double
451 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
452 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
453 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
454 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
457 ; AVX-LABEL: @uitofp_4i8_4f64(
458 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
459 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double>
460 ; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
463 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
464 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
465 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
466 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
467 %cvt0 = uitofp i8 %ld0 to double
468 %cvt1 = uitofp i8 %ld1 to double
469 %cvt2 = uitofp i8 %ld2 to double
470 %cvt3 = uitofp i8 %ld3 to double
471 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
472 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
473 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
474 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
478 define void @uitofp_8i8_8f64() #0 {
479 ; SSE-LABEL: @uitofp_8i8_8f64(
480 ; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
481 ; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
482 ; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
483 ; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
484 ; SSE-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
485 ; SSE-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
486 ; SSE-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
487 ; SSE-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
488 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double
489 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double
490 ; SSE-NEXT: [[CVT2:%.*]] = uitofp i8 [[LD2]] to double
491 ; SSE-NEXT: [[CVT3:%.*]] = uitofp i8 [[LD3]] to double
492 ; SSE-NEXT: [[CVT4:%.*]] = uitofp i8 [[LD4]] to double
493 ; SSE-NEXT: [[CVT5:%.*]] = uitofp i8 [[LD5]] to double
494 ; SSE-NEXT: [[CVT6:%.*]] = uitofp i8 [[LD6]] to double
495 ; SSE-NEXT: [[CVT7:%.*]] = uitofp i8 [[LD7]] to double
496 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
497 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
498 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
499 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
500 ; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
501 ; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
502 ; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
503 ; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
506 ; AVX256-LABEL: @uitofp_8i8_8f64(
507 ; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
508 ; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
509 ; AVX256-NEXT: [[TMP3:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double>
510 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i8> [[TMP2]] to <4 x double>
511 ; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
512 ; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
513 ; AVX256-NEXT: ret void
515 ; AVX512-LABEL: @uitofp_8i8_8f64(
516 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
517 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x double>
518 ; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
519 ; AVX512-NEXT: ret void
521 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
522 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
523 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
524 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
525 %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
526 %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
527 %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
528 %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
529 %cvt0 = uitofp i8 %ld0 to double
530 %cvt1 = uitofp i8 %ld1 to double
531 %cvt2 = uitofp i8 %ld2 to double
532 %cvt3 = uitofp i8 %ld3 to double
533 %cvt4 = uitofp i8 %ld4 to double
534 %cvt5 = uitofp i8 %ld5 to double
535 %cvt6 = uitofp i8 %ld6 to double
536 %cvt7 = uitofp i8 %ld7 to double
537 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
538 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
539 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
540 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
541 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
542 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
543 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
544 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
552 define void @uitofp_2i64_2f32() #0 {
553 ; CHECK-LABEL: @uitofp_2i64_2f32(
554 ; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
555 ; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
556 ; CHECK-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
557 ; CHECK-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
558 ; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
559 ; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
560 ; CHECK-NEXT: ret void
562 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
563 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
564 %cvt0 = uitofp i64 %ld0 to float
565 %cvt1 = uitofp i64 %ld1 to float
566 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
567 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
571 define void @uitofp_4i64_4f32() #0 {
572 ; SSE-LABEL: @uitofp_4i64_4f32(
573 ; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
574 ; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
575 ; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
576 ; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
577 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
578 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
579 ; SSE-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float
580 ; SSE-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float
581 ; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
582 ; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
583 ; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
584 ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
587 ; AVX256NODQ-LABEL: @uitofp_4i64_4f32(
588 ; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
589 ; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
590 ; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
591 ; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
592 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
593 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
594 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float
595 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float
596 ; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
597 ; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
598 ; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
599 ; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
600 ; AVX256NODQ-NEXT: ret void
602 ; AVX512-LABEL: @uitofp_4i64_4f32(
603 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
604 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float>
605 ; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
606 ; AVX512-NEXT: ret void
608 ; AVX256DQ-LABEL: @uitofp_4i64_4f32(
609 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
610 ; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float>
611 ; AVX256DQ-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
612 ; AVX256DQ-NEXT: ret void
614 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
615 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
616 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
617 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
618 %cvt0 = uitofp i64 %ld0 to float
619 %cvt1 = uitofp i64 %ld1 to float
620 %cvt2 = uitofp i64 %ld2 to float
621 %cvt3 = uitofp i64 %ld3 to float
622 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
623 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
624 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
625 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
629 define void @uitofp_8i64_8f32() #0 {
630 ; SSE-LABEL: @uitofp_8i64_8f32(
631 ; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
632 ; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
633 ; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
634 ; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
635 ; SSE-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
636 ; SSE-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
637 ; SSE-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
638 ; SSE-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
639 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
640 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
641 ; SSE-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float
642 ; SSE-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float
643 ; SSE-NEXT: [[CVT4:%.*]] = uitofp i64 [[LD4]] to float
644 ; SSE-NEXT: [[CVT5:%.*]] = uitofp i64 [[LD5]] to float
645 ; SSE-NEXT: [[CVT6:%.*]] = uitofp i64 [[LD6]] to float
646 ; SSE-NEXT: [[CVT7:%.*]] = uitofp i64 [[LD7]] to float
647 ; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
648 ; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
649 ; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
650 ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
651 ; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
652 ; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
653 ; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
654 ; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
657 ; AVX256NODQ-LABEL: @uitofp_8i64_8f32(
658 ; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
659 ; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
660 ; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
661 ; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
662 ; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
663 ; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
664 ; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
665 ; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
666 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
667 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
668 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float
669 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float
670 ; AVX256NODQ-NEXT: [[CVT4:%.*]] = uitofp i64 [[LD4]] to float
671 ; AVX256NODQ-NEXT: [[CVT5:%.*]] = uitofp i64 [[LD5]] to float
672 ; AVX256NODQ-NEXT: [[CVT6:%.*]] = uitofp i64 [[LD6]] to float
673 ; AVX256NODQ-NEXT: [[CVT7:%.*]] = uitofp i64 [[LD7]] to float
674 ; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
675 ; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
676 ; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
677 ; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
678 ; AVX256NODQ-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
679 ; AVX256NODQ-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
680 ; AVX256NODQ-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
681 ; AVX256NODQ-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
682 ; AVX256NODQ-NEXT: ret void
684 ; AVX512-LABEL: @uitofp_8i64_8f32(
685 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
686 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x float>
687 ; AVX512-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
688 ; AVX512-NEXT: ret void
690 ; AVX256DQ-LABEL: @uitofp_8i64_8f32(
691 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
692 ; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x float>
693 ; AVX256DQ-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
694 ; AVX256DQ-NEXT: ret void
696 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
697 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
698 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
699 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
700 %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
701 %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
702 %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
703 %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
704 %cvt0 = uitofp i64 %ld0 to float
705 %cvt1 = uitofp i64 %ld1 to float
706 %cvt2 = uitofp i64 %ld2 to float
707 %cvt3 = uitofp i64 %ld3 to float
708 %cvt4 = uitofp i64 %ld4 to float
709 %cvt5 = uitofp i64 %ld5 to float
710 %cvt6 = uitofp i64 %ld6 to float
711 %cvt7 = uitofp i64 %ld7 to float
712 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
713 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
714 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
715 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
716 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
717 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
718 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
719 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
723 define void @uitofp_4i32_4f32() #0 {
724 ; CHECK-LABEL: @uitofp_4i32_4f32(
725 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
726 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
727 ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
728 ; CHECK-NEXT: ret void
730 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
731 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
732 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
733 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
734 %cvt0 = uitofp i32 %ld0 to float
735 %cvt1 = uitofp i32 %ld1 to float
736 %cvt2 = uitofp i32 %ld2 to float
737 %cvt3 = uitofp i32 %ld3 to float
738 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
739 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
740 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
741 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
745 define void @uitofp_8i32_8f32() #0 {
746 ; SSE-LABEL: @uitofp_8i32_8f32(
747 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
748 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
749 ; SSE-NEXT: [[TMP3:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
750 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP2]] to <4 x float>
751 ; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
752 ; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
755 ; AVX-LABEL: @uitofp_8i32_8f32(
756 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
757 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float>
758 ; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
761 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
762 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
763 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
764 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
765 %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
766 %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
767 %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
768 %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
769 %cvt0 = uitofp i32 %ld0 to float
770 %cvt1 = uitofp i32 %ld1 to float
771 %cvt2 = uitofp i32 %ld2 to float
772 %cvt3 = uitofp i32 %ld3 to float
773 %cvt4 = uitofp i32 %ld4 to float
774 %cvt5 = uitofp i32 %ld5 to float
775 %cvt6 = uitofp i32 %ld6 to float
776 %cvt7 = uitofp i32 %ld7 to float
777 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
778 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
779 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
780 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
781 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
782 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
783 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
784 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
788 define void @uitofp_16i32_16f32() #0 {
789 ; SSE-LABEL: @uitofp_16i32_16f32(
790 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
791 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
792 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <4 x i32>*), align 32
793 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12) to <4 x i32>*), align 16
794 ; SSE-NEXT: [[TMP5:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
795 ; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i32> [[TMP2]] to <4 x float>
796 ; SSE-NEXT: [[TMP7:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float>
797 ; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i32> [[TMP4]] to <4 x float>
798 ; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
799 ; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
800 ; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
801 ; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
804 ; AVX256-LABEL: @uitofp_16i32_16f32(
805 ; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
806 ; AVX256-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <8 x i32>*), align 32
807 ; AVX256-NEXT: [[TMP3:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float>
808 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i32> [[TMP2]] to <8 x float>
809 ; AVX256-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
810 ; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
811 ; AVX256-NEXT: ret void
813 ; AVX512-LABEL: @uitofp_16i32_16f32(
814 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @src32 to <16 x i32>*), align 64
815 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i32> [[TMP1]] to <16 x float>
816 ; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
817 ; AVX512-NEXT: ret void
819 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0 ), align 64
820 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1 ), align 4
821 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2 ), align 8
822 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3 ), align 4
823 %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4 ), align 16
824 %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5 ), align 4
825 %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6 ), align 8
826 %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7 ), align 4
827 %ld8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8 ), align 32
828 %ld9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 9 ), align 4
829 %ld10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 10), align 8
830 %ld11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 11), align 4
831 %ld12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12), align 16
832 %ld13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 13), align 4
833 %ld14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 14), align 8
834 %ld15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 15), align 4
835 %cvt0 = uitofp i32 %ld0 to float
836 %cvt1 = uitofp i32 %ld1 to float
837 %cvt2 = uitofp i32 %ld2 to float
838 %cvt3 = uitofp i32 %ld3 to float
839 %cvt4 = uitofp i32 %ld4 to float
840 %cvt5 = uitofp i32 %ld5 to float
841 %cvt6 = uitofp i32 %ld6 to float
842 %cvt7 = uitofp i32 %ld7 to float
843 %cvt8 = uitofp i32 %ld8 to float
844 %cvt9 = uitofp i32 %ld9 to float
845 %cvt10 = uitofp i32 %ld10 to float
846 %cvt11 = uitofp i32 %ld11 to float
847 %cvt12 = uitofp i32 %ld12 to float
848 %cvt13 = uitofp i32 %ld13 to float
849 %cvt14 = uitofp i32 %ld14 to float
850 %cvt15 = uitofp i32 %ld15 to float
851 store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
852 store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
853 store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
854 store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
855 store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
856 store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
857 store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
858 store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
859 store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
860 store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
861 store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
862 store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
863 store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
864 store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
865 store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
866 store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
870 define void @uitofp_4i16_4f32() #0 {
871 ; SSE-LABEL: @uitofp_4i16_4f32(
872 ; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
873 ; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
874 ; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
875 ; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
876 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float
877 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float
878 ; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float
879 ; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float
880 ; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
881 ; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
882 ; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
883 ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
886 ; AVX-LABEL: @uitofp_4i16_4f32(
887 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
888 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
889 ; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
892 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
893 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
894 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
895 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
896 %cvt0 = uitofp i16 %ld0 to float
897 %cvt1 = uitofp i16 %ld1 to float
898 %cvt2 = uitofp i16 %ld2 to float
899 %cvt3 = uitofp i16 %ld3 to float
900 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
901 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
902 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
903 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
907 define void @uitofp_8i16_8f32() #0 {
908 ; SSE-LABEL: @uitofp_8i16_8f32(
909 ; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
910 ; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
911 ; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
912 ; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
913 ; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
914 ; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
915 ; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
916 ; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
917 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float
918 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float
919 ; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float
920 ; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float
921 ; SSE-NEXT: [[CVT4:%.*]] = uitofp i16 [[LD4]] to float
922 ; SSE-NEXT: [[CVT5:%.*]] = uitofp i16 [[LD5]] to float
923 ; SSE-NEXT: [[CVT6:%.*]] = uitofp i16 [[LD6]] to float
924 ; SSE-NEXT: [[CVT7:%.*]] = uitofp i16 [[LD7]] to float
925 ; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
926 ; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
927 ; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
928 ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
929 ; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
930 ; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
931 ; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
932 ; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
935 ; AVX-LABEL: @uitofp_8i16_8f32(
936 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
937 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float>
938 ; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
941 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
942 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
943 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
944 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
945 %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
946 %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
947 %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
948 %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
949 %cvt0 = uitofp i16 %ld0 to float
950 %cvt1 = uitofp i16 %ld1 to float
951 %cvt2 = uitofp i16 %ld2 to float
952 %cvt3 = uitofp i16 %ld3 to float
953 %cvt4 = uitofp i16 %ld4 to float
954 %cvt5 = uitofp i16 %ld5 to float
955 %cvt6 = uitofp i16 %ld6 to float
956 %cvt7 = uitofp i16 %ld7 to float
957 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
958 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
959 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
960 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
961 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
962 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
963 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
964 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
968 define void @uitofp_16i16_16f32() #0 {
969 ; SSE-LABEL: @uitofp_16i16_16f32(
970 ; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
971 ; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
972 ; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
973 ; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
974 ; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
975 ; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
976 ; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
977 ; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
978 ; SSE-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8), align 16
979 ; SSE-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9), align 2
980 ; SSE-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4
981 ; SSE-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2
982 ; SSE-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8
983 ; SSE-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2
984 ; SSE-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4
985 ; SSE-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2
986 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float
987 ; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float
988 ; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float
989 ; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float
990 ; SSE-NEXT: [[CVT4:%.*]] = uitofp i16 [[LD4]] to float
991 ; SSE-NEXT: [[CVT5:%.*]] = uitofp i16 [[LD5]] to float
992 ; SSE-NEXT: [[CVT6:%.*]] = uitofp i16 [[LD6]] to float
993 ; SSE-NEXT: [[CVT7:%.*]] = uitofp i16 [[LD7]] to float
994 ; SSE-NEXT: [[CVT8:%.*]] = uitofp i16 [[LD8]] to float
995 ; SSE-NEXT: [[CVT9:%.*]] = uitofp i16 [[LD9]] to float
996 ; SSE-NEXT: [[CVT10:%.*]] = uitofp i16 [[LD10]] to float
997 ; SSE-NEXT: [[CVT11:%.*]] = uitofp i16 [[LD11]] to float
998 ; SSE-NEXT: [[CVT12:%.*]] = uitofp i16 [[LD12]] to float
999 ; SSE-NEXT: [[CVT13:%.*]] = uitofp i16 [[LD13]] to float
1000 ; SSE-NEXT: [[CVT14:%.*]] = uitofp i16 [[LD14]] to float
1001 ; SSE-NEXT: [[CVT15:%.*]] = uitofp i16 [[LD15]] to float
1002 ; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
1003 ; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
1004 ; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
1005 ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
1006 ; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
1007 ; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
1008 ; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
1009 ; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
1010 ; SSE-NEXT: store float [[CVT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 32
1011 ; SSE-NEXT: store float [[CVT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
1012 ; SSE-NEXT: store float [[CVT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
1013 ; SSE-NEXT: store float [[CVT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
1014 ; SSE-NEXT: store float [[CVT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
1015 ; SSE-NEXT: store float [[CVT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
1016 ; SSE-NEXT: store float [[CVT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
1017 ; SSE-NEXT: store float [[CVT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
1018 ; SSE-NEXT: ret void
1020 ; AVX256-LABEL: @uitofp_16i16_16f32(
1021 ; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
1022 ; AVX256-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <8 x i16>*), align 16
1023 ; AVX256-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float>
1024 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x float>
1025 ; AVX256-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
1026 ; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
1027 ; AVX256-NEXT: ret void
1029 ; AVX512-LABEL: @uitofp_16i16_16f32(
1030 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @src16 to <16 x i16>*), align 64
1031 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i16> [[TMP1]] to <16 x float>
1032 ; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
1033 ; AVX512-NEXT: ret void
1035 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0 ), align 64
1036 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1 ), align 2
1037 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2 ), align 4
1038 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3 ), align 2
1039 %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4 ), align 8
1040 %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5 ), align 2
1041 %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6 ), align 4
1042 %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7 ), align 2
1043 %ld8 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8 ), align 16
1044 %ld9 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9 ), align 2
1045 %ld10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4
1046 %ld11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2
1047 %ld12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8
1048 %ld13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2
1049 %ld14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4
1050 %ld15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2
1051 %cvt0 = uitofp i16 %ld0 to float
1052 %cvt1 = uitofp i16 %ld1 to float
1053 %cvt2 = uitofp i16 %ld2 to float
1054 %cvt3 = uitofp i16 %ld3 to float
1055 %cvt4 = uitofp i16 %ld4 to float
1056 %cvt5 = uitofp i16 %ld5 to float
1057 %cvt6 = uitofp i16 %ld6 to float
1058 %cvt7 = uitofp i16 %ld7 to float
1059 %cvt8 = uitofp i16 %ld8 to float
1060 %cvt9 = uitofp i16 %ld9 to float
1061 %cvt10 = uitofp i16 %ld10 to float
1062 %cvt11 = uitofp i16 %ld11 to float
1063 %cvt12 = uitofp i16 %ld12 to float
1064 %cvt13 = uitofp i16 %ld13 to float
1065 %cvt14 = uitofp i16 %ld14 to float
1066 %cvt15 = uitofp i16 %ld15 to float
1067 store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
1068 store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
1069 store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
1070 store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
1071 store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
1072 store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
1073 store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
1074 store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
1075 store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
1076 store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
1077 store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
1078 store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
1079 store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
1080 store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
1081 store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
1082 store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
1086 define void @uitofp_4i8_4f32() #0 {
1087 ; CHECK-LABEL: @uitofp_4i8_4f32(
1088 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
1089 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
1090 ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
1091 ; CHECK-NEXT: ret void
1093 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
1094 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
1095 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
1096 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
1097 %cvt0 = uitofp i8 %ld0 to float
1098 %cvt1 = uitofp i8 %ld1 to float
1099 %cvt2 = uitofp i8 %ld2 to float
1100 %cvt3 = uitofp i8 %ld3 to float
1101 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
1102 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
1103 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
1104 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
1108 define void @uitofp_8i8_8f32() #0 {
1109 ; SSE-LABEL: @uitofp_8i8_8f32(
1110 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
1111 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
1112 ; SSE-NEXT: [[TMP3:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
1113 ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i8> [[TMP2]] to <4 x float>
1114 ; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
1115 ; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
1116 ; SSE-NEXT: ret void
1118 ; AVX-LABEL: @uitofp_8i8_8f32(
1119 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
1120 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float>
1121 ; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
1122 ; AVX-NEXT: ret void
1124 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
1125 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
1126 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
1127 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
1128 %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
1129 %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
1130 %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
1131 %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
1132 %cvt0 = uitofp i8 %ld0 to float
1133 %cvt1 = uitofp i8 %ld1 to float
1134 %cvt2 = uitofp i8 %ld2 to float
1135 %cvt3 = uitofp i8 %ld3 to float
1136 %cvt4 = uitofp i8 %ld4 to float
1137 %cvt5 = uitofp i8 %ld5 to float
1138 %cvt6 = uitofp i8 %ld6 to float
1139 %cvt7 = uitofp i8 %ld7 to float
1140 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
1141 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
1142 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
1143 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
1144 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
1145 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
1146 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
1147 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
1151 define void @uitofp_16i8_16f32() #0 {
1152 ; SSE-LABEL: @uitofp_16i8_16f32(
1153 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
1154 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
1155 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <4 x i8>*), align 8
1156 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12) to <4 x i8>*), align 4
1157 ; SSE-NEXT: [[TMP5:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
1158 ; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i8> [[TMP2]] to <4 x float>
1159 ; SSE-NEXT: [[TMP7:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x float>
1160 ; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i8> [[TMP4]] to <4 x float>
1161 ; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
1162 ; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
1163 ; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
1164 ; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
1165 ; SSE-NEXT: ret void
1167 ; AVX256-LABEL: @uitofp_16i8_16f32(
1168 ; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
1169 ; AVX256-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <8 x i8>*), align 8
1170 ; AVX256-NEXT: [[TMP3:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float>
1171 ; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i8> [[TMP2]] to <8 x float>
1172 ; AVX256-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
1173 ; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
1174 ; AVX256-NEXT: ret void
1176 ; AVX512-LABEL: @uitofp_16i8_16f32(
1177 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @src8 to <16 x i8>*), align 64
1178 ; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i8> [[TMP1]] to <16 x float>
1179 ; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
1180 ; AVX512-NEXT: ret void
1182 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0 ), align 64
1183 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1 ), align 1
1184 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2 ), align 2
1185 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3 ), align 1
1186 %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4 ), align 4
1187 %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5 ), align 1
1188 %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6 ), align 2
1189 %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7 ), align 1
1190 %ld8 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8 ), align 8
1191 %ld9 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 9 ), align 1
1192 %ld10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 10), align 2
1193 %ld11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 11), align 1
1194 %ld12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12), align 4
1195 %ld13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 13), align 1
1196 %ld14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 14), align 2
1197 %ld15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 15), align 1
1198 %cvt0 = uitofp i8 %ld0 to float
1199 %cvt1 = uitofp i8 %ld1 to float
1200 %cvt2 = uitofp i8 %ld2 to float
1201 %cvt3 = uitofp i8 %ld3 to float
1202 %cvt4 = uitofp i8 %ld4 to float
1203 %cvt5 = uitofp i8 %ld5 to float
1204 %cvt6 = uitofp i8 %ld6 to float
1205 %cvt7 = uitofp i8 %ld7 to float
1206 %cvt8 = uitofp i8 %ld8 to float
1207 %cvt9 = uitofp i8 %ld9 to float
1208 %cvt10 = uitofp i8 %ld10 to float
1209 %cvt11 = uitofp i8 %ld11 to float
1210 %cvt12 = uitofp i8 %ld12 to float
1211 %cvt13 = uitofp i8 %ld13 to float
1212 %cvt14 = uitofp i8 %ld14 to float
1213 %cvt15 = uitofp i8 %ld15 to float
1214 store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
1215 store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
1216 store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
1217 store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
1218 store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
1219 store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
1220 store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
1221 store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
1222 store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
1223 store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
1224 store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
1225 store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
1226 store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
1227 store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
1228 store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
1229 store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
1233 attributes #0 = { nounwind }