1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256DQ
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11 @src64 = common global [8 x i64] zeroinitializer, align 64
12 @src32 = common global [16 x i32] zeroinitializer, align 64
13 @src16 = common global [32 x i16] zeroinitializer, align 64
14 @src8 = common global [64 x i8] zeroinitializer, align 64
16 @dst64 = common global [8 x double] zeroinitializer, align 64
17 @dst32 = common global [16 x float] zeroinitializer, align 64
23 define void @sitofp_2i64_2f64() #0 {
24 ; SSE-LABEL: @sitofp_2i64_2f64(
25 ; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
26 ; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
27 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
28 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
29 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
30 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
33 ; AVX256NODQ-LABEL: @sitofp_2i64_2f64(
34 ; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
35 ; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
36 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
37 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
38 ; AVX256NODQ-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
39 ; AVX256NODQ-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
40 ; AVX256NODQ-NEXT: ret void
42 ; AVX512-LABEL: @sitofp_2i64_2f64(
43 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
44 ; AVX512-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
45 ; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
46 ; AVX512-NEXT: ret void
48 ; AVX256DQ-LABEL: @sitofp_2i64_2f64(
49 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
50 ; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
51 ; AVX256DQ-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
52 ; AVX256DQ-NEXT: ret void
54 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
55 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
56 %cvt0 = sitofp i64 %ld0 to double
57 %cvt1 = sitofp i64 %ld1 to double
58 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
59 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
63 define void @sitofp_4i64_4f64() #0 {
64 ; SSE-LABEL: @sitofp_4i64_4f64(
65 ; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
66 ; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
67 ; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
68 ; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
69 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
70 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
71 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
72 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
73 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
74 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
75 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
76 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
79 ; AVX256NODQ-LABEL: @sitofp_4i64_4f64(
80 ; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
81 ; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
82 ; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
83 ; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
84 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
85 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
86 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
87 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
88 ; AVX256NODQ-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
89 ; AVX256NODQ-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
90 ; AVX256NODQ-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
91 ; AVX256NODQ-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
92 ; AVX256NODQ-NEXT: ret void
94 ; AVX512-LABEL: @sitofp_4i64_4f64(
95 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
96 ; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
97 ; AVX512-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
98 ; AVX512-NEXT: ret void
100 ; AVX256DQ-LABEL: @sitofp_4i64_4f64(
101 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
102 ; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
103 ; AVX256DQ-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
104 ; AVX256DQ-NEXT: ret void
106 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
107 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
108 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
109 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
110 %cvt0 = sitofp i64 %ld0 to double
111 %cvt1 = sitofp i64 %ld1 to double
112 %cvt2 = sitofp i64 %ld2 to double
113 %cvt3 = sitofp i64 %ld3 to double
114 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
115 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
116 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
117 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
121 define void @sitofp_8i64_8f64() #0 {
122 ; SSE-LABEL: @sitofp_8i64_8f64(
123 ; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
124 ; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
125 ; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
126 ; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
127 ; SSE-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
128 ; SSE-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
129 ; SSE-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
130 ; SSE-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
131 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
132 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
133 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
134 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
135 ; SSE-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to double
136 ; SSE-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
137 ; SSE-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
138 ; SSE-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
139 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
140 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
141 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
142 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
143 ; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
144 ; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
145 ; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
146 ; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
149 ; AVX256NODQ-LABEL: @sitofp_8i64_8f64(
150 ; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
151 ; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
152 ; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
153 ; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
154 ; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
155 ; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
156 ; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
157 ; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
158 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
159 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
160 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
161 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
162 ; AVX256NODQ-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to double
163 ; AVX256NODQ-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
164 ; AVX256NODQ-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
165 ; AVX256NODQ-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
166 ; AVX256NODQ-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
167 ; AVX256NODQ-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
168 ; AVX256NODQ-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
169 ; AVX256NODQ-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
170 ; AVX256NODQ-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
171 ; AVX256NODQ-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
172 ; AVX256NODQ-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
173 ; AVX256NODQ-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
174 ; AVX256NODQ-NEXT: ret void
176 ; AVX512-LABEL: @sitofp_8i64_8f64(
177 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
178 ; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x double>
179 ; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
180 ; AVX512-NEXT: ret void
182 ; AVX256DQ-LABEL: @sitofp_8i64_8f64(
183 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
184 ; AVX256DQ-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
185 ; AVX256DQ-NEXT: [[TMP3:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
186 ; AVX256DQ-NEXT: [[TMP4:%.*]] = sitofp <4 x i64> [[TMP2]] to <4 x double>
187 ; AVX256DQ-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
188 ; AVX256DQ-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
189 ; AVX256DQ-NEXT: ret void
191 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
192 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
193 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
194 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
195 %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
196 %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
197 %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
198 %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
199 %cvt0 = sitofp i64 %ld0 to double
200 %cvt1 = sitofp i64 %ld1 to double
201 %cvt2 = sitofp i64 %ld2 to double
202 %cvt3 = sitofp i64 %ld3 to double
203 %cvt4 = sitofp i64 %ld4 to double
204 %cvt5 = sitofp i64 %ld5 to double
205 %cvt6 = sitofp i64 %ld6 to double
206 %cvt7 = sitofp i64 %ld7 to double
207 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
208 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
209 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
210 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
211 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
212 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
213 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
214 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
218 define void @sitofp_2i32_2f64() #0 {
219 ; CHECK-LABEL: @sitofp_2i32_2f64(
220 ; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
221 ; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
222 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
223 ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
224 ; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
225 ; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
226 ; CHECK-NEXT: ret void
228 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
229 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
230 %cvt0 = sitofp i32 %ld0 to double
231 %cvt1 = sitofp i32 %ld1 to double
232 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
233 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
237 define void @sitofp_4i32_4f64() #0 {
238 ; SSE-LABEL: @sitofp_4i32_4f64(
239 ; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
240 ; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
241 ; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
242 ; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
243 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
244 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
245 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[LD2]] to double
246 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[LD3]] to double
247 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
248 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
249 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
250 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
253 ; AVX-LABEL: @sitofp_4i32_4f64(
254 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
255 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
256 ; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
259 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
260 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
261 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
262 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
263 %cvt0 = sitofp i32 %ld0 to double
264 %cvt1 = sitofp i32 %ld1 to double
265 %cvt2 = sitofp i32 %ld2 to double
266 %cvt3 = sitofp i32 %ld3 to double
267 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
268 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
269 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
270 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
274 define void @sitofp_8i32_8f64() #0 {
275 ; SSE-LABEL: @sitofp_8i32_8f64(
276 ; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
277 ; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
278 ; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
279 ; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
280 ; SSE-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
281 ; SSE-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
282 ; SSE-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
283 ; SSE-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
284 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
285 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
286 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[LD2]] to double
287 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[LD3]] to double
288 ; SSE-NEXT: [[CVT4:%.*]] = sitofp i32 [[LD4]] to double
289 ; SSE-NEXT: [[CVT5:%.*]] = sitofp i32 [[LD5]] to double
290 ; SSE-NEXT: [[CVT6:%.*]] = sitofp i32 [[LD6]] to double
291 ; SSE-NEXT: [[CVT7:%.*]] = sitofp i32 [[LD7]] to double
292 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
293 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
294 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
295 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
296 ; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
297 ; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
298 ; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
299 ; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
302 ; AVX256-LABEL: @sitofp_8i32_8f64(
303 ; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
304 ; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
305 ; AVX256-NEXT: [[TMP3:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
306 ; AVX256-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[TMP2]] to <4 x double>
307 ; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
308 ; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
309 ; AVX256-NEXT: ret void
311 ; AVX512-LABEL: @sitofp_8i32_8f64(
312 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
313 ; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x double>
314 ; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
315 ; AVX512-NEXT: ret void
317 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
318 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
319 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
320 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
321 %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
322 %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
323 %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
324 %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
325 %cvt0 = sitofp i32 %ld0 to double
326 %cvt1 = sitofp i32 %ld1 to double
327 %cvt2 = sitofp i32 %ld2 to double
328 %cvt3 = sitofp i32 %ld3 to double
329 %cvt4 = sitofp i32 %ld4 to double
330 %cvt5 = sitofp i32 %ld5 to double
331 %cvt6 = sitofp i32 %ld6 to double
332 %cvt7 = sitofp i32 %ld7 to double
333 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
334 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
335 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
336 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
337 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
338 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
339 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
340 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
344 define void @sitofp_2i16_2f64() #0 {
345 ; CHECK-LABEL: @sitofp_2i16_2f64(
346 ; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
347 ; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
348 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to double
349 ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to double
350 ; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
351 ; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
352 ; CHECK-NEXT: ret void
354 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
355 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
356 %cvt0 = sitofp i16 %ld0 to double
357 %cvt1 = sitofp i16 %ld1 to double
358 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
359 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
363 define void @sitofp_4i16_4f64() #0 {
364 ; SSE-LABEL: @sitofp_4i16_4f64(
365 ; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
366 ; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
367 ; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
368 ; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
369 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to double
370 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to double
371 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to double
372 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to double
373 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
374 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
375 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
376 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
379 ; AVX-LABEL: @sitofp_4i16_4f64(
380 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
381 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
382 ; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
385 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
386 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
387 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
388 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
389 %cvt0 = sitofp i16 %ld0 to double
390 %cvt1 = sitofp i16 %ld1 to double
391 %cvt2 = sitofp i16 %ld2 to double
392 %cvt3 = sitofp i16 %ld3 to double
393 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
394 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
395 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
396 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
400 define void @sitofp_8i16_8f64() #0 {
401 ; SSE-LABEL: @sitofp_8i16_8f64(
402 ; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
403 ; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
404 ; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
405 ; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
406 ; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
407 ; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
408 ; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
409 ; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
410 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to double
411 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to double
412 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to double
413 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to double
414 ; SSE-NEXT: [[CVT4:%.*]] = sitofp i16 [[LD4]] to double
415 ; SSE-NEXT: [[CVT5:%.*]] = sitofp i16 [[LD5]] to double
416 ; SSE-NEXT: [[CVT6:%.*]] = sitofp i16 [[LD6]] to double
417 ; SSE-NEXT: [[CVT7:%.*]] = sitofp i16 [[LD7]] to double
418 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
419 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
420 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
421 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
422 ; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
423 ; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
424 ; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
425 ; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
428 ; AVX256-LABEL: @sitofp_8i16_8f64(
429 ; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
430 ; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
431 ; AVX256-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
432 ; AVX256-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x double>
433 ; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
434 ; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
435 ; AVX256-NEXT: ret void
437 ; AVX512-LABEL: @sitofp_8i16_8f64(
438 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
439 ; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x double>
440 ; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
441 ; AVX512-NEXT: ret void
443 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
444 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
445 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
446 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
447 %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
448 %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
449 %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
450 %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
451 %cvt0 = sitofp i16 %ld0 to double
452 %cvt1 = sitofp i16 %ld1 to double
453 %cvt2 = sitofp i16 %ld2 to double
454 %cvt3 = sitofp i16 %ld3 to double
455 %cvt4 = sitofp i16 %ld4 to double
456 %cvt5 = sitofp i16 %ld5 to double
457 %cvt6 = sitofp i16 %ld6 to double
458 %cvt7 = sitofp i16 %ld7 to double
459 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
460 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
461 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
462 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
463 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
464 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
465 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
466 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
470 define void @sitofp_2i8_2f64() #0 {
471 ; CHECK-LABEL: @sitofp_2i8_2f64(
472 ; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
473 ; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
474 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i8 [[LD0]] to double
475 ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i8 [[LD1]] to double
476 ; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
477 ; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
478 ; CHECK-NEXT: ret void
480 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
481 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
482 %cvt0 = sitofp i8 %ld0 to double
483 %cvt1 = sitofp i8 %ld1 to double
484 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
485 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
489 define void @sitofp_4i8_4f64() #0 {
490 ; SSE-LABEL: @sitofp_4i8_4f64(
491 ; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
492 ; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
493 ; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
494 ; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
495 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i8 [[LD0]] to double
496 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i8 [[LD1]] to double
497 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i8 [[LD2]] to double
498 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i8 [[LD3]] to double
499 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
500 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
501 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
502 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
505 ; AVX-LABEL: @sitofp_4i8_4f64(
506 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
507 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
508 ; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
511 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
512 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
513 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
514 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
515 %cvt0 = sitofp i8 %ld0 to double
516 %cvt1 = sitofp i8 %ld1 to double
517 %cvt2 = sitofp i8 %ld2 to double
518 %cvt3 = sitofp i8 %ld3 to double
519 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
520 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
521 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
522 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
526 define void @sitofp_8i8_8f64() #0 {
527 ; SSE-LABEL: @sitofp_8i8_8f64(
528 ; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
529 ; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
530 ; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
531 ; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
532 ; SSE-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
533 ; SSE-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
534 ; SSE-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
535 ; SSE-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
536 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i8 [[LD0]] to double
537 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i8 [[LD1]] to double
538 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i8 [[LD2]] to double
539 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i8 [[LD3]] to double
540 ; SSE-NEXT: [[CVT4:%.*]] = sitofp i8 [[LD4]] to double
541 ; SSE-NEXT: [[CVT5:%.*]] = sitofp i8 [[LD5]] to double
542 ; SSE-NEXT: [[CVT6:%.*]] = sitofp i8 [[LD6]] to double
543 ; SSE-NEXT: [[CVT7:%.*]] = sitofp i8 [[LD7]] to double
544 ; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
545 ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
546 ; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
547 ; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
548 ; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
549 ; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
550 ; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
551 ; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
554 ; AVX256-LABEL: @sitofp_8i8_8f64(
555 ; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
556 ; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
557 ; AVX256-NEXT: [[TMP3:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
558 ; AVX256-NEXT: [[TMP4:%.*]] = sitofp <4 x i8> [[TMP2]] to <4 x double>
559 ; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
560 ; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
561 ; AVX256-NEXT: ret void
563 ; AVX512-LABEL: @sitofp_8i8_8f64(
564 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
565 ; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x double>
566 ; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
567 ; AVX512-NEXT: ret void
569 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
570 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
571 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
572 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
573 %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
574 %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
575 %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
576 %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
577 %cvt0 = sitofp i8 %ld0 to double
578 %cvt1 = sitofp i8 %ld1 to double
579 %cvt2 = sitofp i8 %ld2 to double
580 %cvt3 = sitofp i8 %ld3 to double
581 %cvt4 = sitofp i8 %ld4 to double
582 %cvt5 = sitofp i8 %ld5 to double
583 %cvt6 = sitofp i8 %ld6 to double
584 %cvt7 = sitofp i8 %ld7 to double
585 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
586 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
587 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
588 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
589 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
590 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
591 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
592 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
600 define void @sitofp_2i64_2f32() #0 {
601 ; CHECK-LABEL: @sitofp_2i64_2f32(
602 ; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
603 ; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
604 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
605 ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
606 ; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
607 ; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
608 ; CHECK-NEXT: ret void
610 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
611 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
612 %cvt0 = sitofp i64 %ld0 to float
613 %cvt1 = sitofp i64 %ld1 to float
614 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
615 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
619 define void @sitofp_4i64_4f32() #0 {
620 ; SSE-LABEL: @sitofp_4i64_4f32(
621 ; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
622 ; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
623 ; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
624 ; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
625 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
626 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
627 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
628 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
629 ; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
630 ; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
631 ; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
632 ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
635 ; AVX256NODQ-LABEL: @sitofp_4i64_4f32(
636 ; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
637 ; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
638 ; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
639 ; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
640 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
641 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
642 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
643 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
644 ; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
645 ; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
646 ; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
647 ; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
648 ; AVX256NODQ-NEXT: ret void
650 ; AVX512-LABEL: @sitofp_4i64_4f32(
651 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
652 ; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
653 ; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
654 ; AVX512-NEXT: ret void
656 ; AVX256DQ-LABEL: @sitofp_4i64_4f32(
657 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
658 ; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
659 ; AVX256DQ-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
660 ; AVX256DQ-NEXT: ret void
662 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
663 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
664 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
665 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
666 %cvt0 = sitofp i64 %ld0 to float
667 %cvt1 = sitofp i64 %ld1 to float
668 %cvt2 = sitofp i64 %ld2 to float
669 %cvt3 = sitofp i64 %ld3 to float
670 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
671 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
672 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
673 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
677 define void @sitofp_8i64_8f32() #0 {
678 ; SSE-LABEL: @sitofp_8i64_8f32(
679 ; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
680 ; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
681 ; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
682 ; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
683 ; SSE-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
684 ; SSE-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
685 ; SSE-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
686 ; SSE-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
687 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
688 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
689 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
690 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
691 ; SSE-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to float
692 ; SSE-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to float
693 ; SSE-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to float
694 ; SSE-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to float
695 ; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
696 ; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
697 ; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
698 ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
699 ; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
700 ; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
701 ; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
702 ; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
705 ; AVX256NODQ-LABEL: @sitofp_8i64_8f32(
706 ; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
707 ; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
708 ; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
709 ; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
710 ; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
711 ; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
712 ; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
713 ; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
714 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
715 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
716 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
717 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
718 ; AVX256NODQ-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to float
719 ; AVX256NODQ-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to float
720 ; AVX256NODQ-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to float
721 ; AVX256NODQ-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to float
722 ; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
723 ; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
724 ; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
725 ; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
726 ; AVX256NODQ-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
727 ; AVX256NODQ-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
728 ; AVX256NODQ-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
729 ; AVX256NODQ-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
730 ; AVX256NODQ-NEXT: ret void
732 ; AVX512-LABEL: @sitofp_8i64_8f32(
733 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
734 ; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
735 ; AVX512-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
736 ; AVX512-NEXT: ret void
738 ; AVX256DQ-LABEL: @sitofp_8i64_8f32(
739 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
740 ; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
741 ; AVX256DQ-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
742 ; AVX256DQ-NEXT: ret void
744 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
745 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
746 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
747 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
748 %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
749 %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
750 %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
751 %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
752 %cvt0 = sitofp i64 %ld0 to float
753 %cvt1 = sitofp i64 %ld1 to float
754 %cvt2 = sitofp i64 %ld2 to float
755 %cvt3 = sitofp i64 %ld3 to float
756 %cvt4 = sitofp i64 %ld4 to float
757 %cvt5 = sitofp i64 %ld5 to float
758 %cvt6 = sitofp i64 %ld6 to float
759 %cvt7 = sitofp i64 %ld7 to float
760 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
761 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
762 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
763 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
764 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
765 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
766 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
767 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
771 define void @sitofp_4i32_4f32() #0 {
772 ; CHECK-LABEL: @sitofp_4i32_4f32(
773 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
774 ; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
775 ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
776 ; CHECK-NEXT: ret void
778 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
779 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
780 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
781 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
782 %cvt0 = sitofp i32 %ld0 to float
783 %cvt1 = sitofp i32 %ld1 to float
784 %cvt2 = sitofp i32 %ld2 to float
785 %cvt3 = sitofp i32 %ld3 to float
786 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
787 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
788 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
789 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
793 define void @sitofp_8i32_8f32() #0 {
794 ; SSE-LABEL: @sitofp_8i32_8f32(
795 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
796 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
797 ; SSE-NEXT: [[TMP3:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
798 ; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[TMP2]] to <4 x float>
799 ; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
800 ; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
803 ; AVX-LABEL: @sitofp_8i32_8f32(
804 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
805 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
806 ; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
809 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
810 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
811 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
812 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
813 %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
814 %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
815 %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
816 %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
817 %cvt0 = sitofp i32 %ld0 to float
818 %cvt1 = sitofp i32 %ld1 to float
819 %cvt2 = sitofp i32 %ld2 to float
820 %cvt3 = sitofp i32 %ld3 to float
821 %cvt4 = sitofp i32 %ld4 to float
822 %cvt5 = sitofp i32 %ld5 to float
823 %cvt6 = sitofp i32 %ld6 to float
824 %cvt7 = sitofp i32 %ld7 to float
825 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
826 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
827 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
828 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
829 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
830 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
831 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
832 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
836 define void @sitofp_16i32_16f32() #0 {
837 ; SSE-LABEL: @sitofp_16i32_16f32(
838 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
839 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
840 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <4 x i32>*), align 32
841 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12) to <4 x i32>*), align 16
842 ; SSE-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
843 ; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i32> [[TMP2]] to <4 x float>
844 ; SSE-NEXT: [[TMP7:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x float>
845 ; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
846 ; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
847 ; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
848 ; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
849 ; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
852 ; AVX256-LABEL: @sitofp_16i32_16f32(
853 ; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
854 ; AVX256-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <8 x i32>*), align 32
855 ; AVX256-NEXT: [[TMP3:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
856 ; AVX256-NEXT: [[TMP4:%.*]] = sitofp <8 x i32> [[TMP2]] to <8 x float>
857 ; AVX256-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
858 ; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
859 ; AVX256-NEXT: ret void
861 ; AVX512-LABEL: @sitofp_16i32_16f32(
862 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @src32 to <16 x i32>*), align 64
863 ; AVX512-NEXT: [[TMP2:%.*]] = sitofp <16 x i32> [[TMP1]] to <16 x float>
864 ; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
865 ; AVX512-NEXT: ret void
867 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0 ), align 64
868 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1 ), align 4
869 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2 ), align 8
870 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3 ), align 4
871 %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4 ), align 16
872 %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5 ), align 4
873 %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6 ), align 8
874 %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7 ), align 4
875 %ld8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8 ), align 32
876 %ld9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 9 ), align 4
877 %ld10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 10), align 8
878 %ld11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 11), align 4
879 %ld12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12), align 16
880 %ld13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 13), align 4
881 %ld14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 14), align 8
882 %ld15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 15), align 4
883 %cvt0 = sitofp i32 %ld0 to float
884 %cvt1 = sitofp i32 %ld1 to float
885 %cvt2 = sitofp i32 %ld2 to float
886 %cvt3 = sitofp i32 %ld3 to float
887 %cvt4 = sitofp i32 %ld4 to float
888 %cvt5 = sitofp i32 %ld5 to float
889 %cvt6 = sitofp i32 %ld6 to float
890 %cvt7 = sitofp i32 %ld7 to float
891 %cvt8 = sitofp i32 %ld8 to float
892 %cvt9 = sitofp i32 %ld9 to float
893 %cvt10 = sitofp i32 %ld10 to float
894 %cvt11 = sitofp i32 %ld11 to float
895 %cvt12 = sitofp i32 %ld12 to float
896 %cvt13 = sitofp i32 %ld13 to float
897 %cvt14 = sitofp i32 %ld14 to float
898 %cvt15 = sitofp i32 %ld15 to float
899 store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
900 store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
901 store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
902 store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
903 store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
904 store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
905 store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
906 store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
907 store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
908 store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
909 store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
910 store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
911 store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
912 store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
913 store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
914 store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
918 define void @sitofp_4i16_4f32() #0 {
919 ; SSE-LABEL: @sitofp_4i16_4f32(
920 ; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
921 ; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
922 ; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
923 ; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
924 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float
925 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float
926 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float
927 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float
928 ; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
929 ; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
930 ; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
931 ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
934 ; AVX-LABEL: @sitofp_4i16_4f32(
935 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
936 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
937 ; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
940 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
941 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
942 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
943 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
944 %cvt0 = sitofp i16 %ld0 to float
945 %cvt1 = sitofp i16 %ld1 to float
946 %cvt2 = sitofp i16 %ld2 to float
947 %cvt3 = sitofp i16 %ld3 to float
948 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
949 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
950 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
951 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
955 define void @sitofp_8i16_8f32() #0 {
956 ; SSE-LABEL: @sitofp_8i16_8f32(
957 ; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
958 ; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
959 ; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
960 ; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
961 ; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
962 ; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
963 ; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
964 ; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
965 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float
966 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float
967 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float
968 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float
969 ; SSE-NEXT: [[CVT4:%.*]] = sitofp i16 [[LD4]] to float
970 ; SSE-NEXT: [[CVT5:%.*]] = sitofp i16 [[LD5]] to float
971 ; SSE-NEXT: [[CVT6:%.*]] = sitofp i16 [[LD6]] to float
972 ; SSE-NEXT: [[CVT7:%.*]] = sitofp i16 [[LD7]] to float
973 ; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
974 ; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
975 ; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
976 ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
977 ; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
978 ; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
979 ; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
980 ; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
983 ; AVX-LABEL: @sitofp_8i16_8f32(
984 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
985 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
986 ; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
989 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
990 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
991 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
992 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
993 %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
994 %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
995 %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
996 %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
997 %cvt0 = sitofp i16 %ld0 to float
998 %cvt1 = sitofp i16 %ld1 to float
999 %cvt2 = sitofp i16 %ld2 to float
1000 %cvt3 = sitofp i16 %ld3 to float
1001 %cvt4 = sitofp i16 %ld4 to float
1002 %cvt5 = sitofp i16 %ld5 to float
1003 %cvt6 = sitofp i16 %ld6 to float
1004 %cvt7 = sitofp i16 %ld7 to float
1005 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
1006 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
1007 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
1008 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
1009 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
1010 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
1011 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
1012 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
1016 define void @sitofp_16i16_16f32() #0 {
1017 ; SSE-LABEL: @sitofp_16i16_16f32(
1018 ; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
1019 ; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
1020 ; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
1021 ; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
1022 ; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
1023 ; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
1024 ; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
1025 ; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
1026 ; SSE-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8), align 16
1027 ; SSE-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9), align 2
1028 ; SSE-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4
1029 ; SSE-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2
1030 ; SSE-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8
1031 ; SSE-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2
1032 ; SSE-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4
1033 ; SSE-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2
1034 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float
1035 ; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float
1036 ; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float
1037 ; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float
1038 ; SSE-NEXT: [[CVT4:%.*]] = sitofp i16 [[LD4]] to float
1039 ; SSE-NEXT: [[CVT5:%.*]] = sitofp i16 [[LD5]] to float
1040 ; SSE-NEXT: [[CVT6:%.*]] = sitofp i16 [[LD6]] to float
1041 ; SSE-NEXT: [[CVT7:%.*]] = sitofp i16 [[LD7]] to float
1042 ; SSE-NEXT: [[CVT8:%.*]] = sitofp i16 [[LD8]] to float
1043 ; SSE-NEXT: [[CVT9:%.*]] = sitofp i16 [[LD9]] to float
1044 ; SSE-NEXT: [[CVT10:%.*]] = sitofp i16 [[LD10]] to float
1045 ; SSE-NEXT: [[CVT11:%.*]] = sitofp i16 [[LD11]] to float
1046 ; SSE-NEXT: [[CVT12:%.*]] = sitofp i16 [[LD12]] to float
1047 ; SSE-NEXT: [[CVT13:%.*]] = sitofp i16 [[LD13]] to float
1048 ; SSE-NEXT: [[CVT14:%.*]] = sitofp i16 [[LD14]] to float
1049 ; SSE-NEXT: [[CVT15:%.*]] = sitofp i16 [[LD15]] to float
1050 ; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
1051 ; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
1052 ; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
1053 ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
1054 ; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
1055 ; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
1056 ; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
1057 ; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
1058 ; SSE-NEXT: store float [[CVT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 32
1059 ; SSE-NEXT: store float [[CVT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
1060 ; SSE-NEXT: store float [[CVT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
1061 ; SSE-NEXT: store float [[CVT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
1062 ; SSE-NEXT: store float [[CVT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
1063 ; SSE-NEXT: store float [[CVT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
1064 ; SSE-NEXT: store float [[CVT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
1065 ; SSE-NEXT: store float [[CVT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
1066 ; SSE-NEXT: ret void
1068 ; AVX256-LABEL: @sitofp_16i16_16f32(
1069 ; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
1070 ; AVX256-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <8 x i16>*), align 16
1071 ; AVX256-NEXT: [[TMP3:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
1072 ; AVX256-NEXT: [[TMP4:%.*]] = sitofp <8 x i16> [[TMP2]] to <8 x float>
1073 ; AVX256-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
1074 ; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
1075 ; AVX256-NEXT: ret void
1077 ; AVX512-LABEL: @sitofp_16i16_16f32(
1078 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @src16 to <16 x i16>*), align 64
1079 ; AVX512-NEXT: [[TMP2:%.*]] = sitofp <16 x i16> [[TMP1]] to <16 x float>
1080 ; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
1081 ; AVX512-NEXT: ret void
1083 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0 ), align 64
1084 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1 ), align 2
1085 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2 ), align 4
1086 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3 ), align 2
1087 %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4 ), align 8
1088 %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5 ), align 2
1089 %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6 ), align 4
1090 %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7 ), align 2
1091 %ld8 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8 ), align 16
1092 %ld9 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9 ), align 2
1093 %ld10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4
1094 %ld11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2
1095 %ld12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8
1096 %ld13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2
1097 %ld14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4
1098 %ld15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2
1099 %cvt0 = sitofp i16 %ld0 to float
1100 %cvt1 = sitofp i16 %ld1 to float
1101 %cvt2 = sitofp i16 %ld2 to float
1102 %cvt3 = sitofp i16 %ld3 to float
1103 %cvt4 = sitofp i16 %ld4 to float
1104 %cvt5 = sitofp i16 %ld5 to float
1105 %cvt6 = sitofp i16 %ld6 to float
1106 %cvt7 = sitofp i16 %ld7 to float
1107 %cvt8 = sitofp i16 %ld8 to float
1108 %cvt9 = sitofp i16 %ld9 to float
1109 %cvt10 = sitofp i16 %ld10 to float
1110 %cvt11 = sitofp i16 %ld11 to float
1111 %cvt12 = sitofp i16 %ld12 to float
1112 %cvt13 = sitofp i16 %ld13 to float
1113 %cvt14 = sitofp i16 %ld14 to float
1114 %cvt15 = sitofp i16 %ld15 to float
1115 store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
1116 store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
1117 store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
1118 store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
1119 store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
1120 store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
1121 store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
1122 store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
1123 store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
1124 store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
1125 store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
1126 store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
1127 store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
1128 store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
1129 store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
1130 store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
1134 define void @sitofp_4i8_4f32() #0 {
1135 ; CHECK-LABEL: @sitofp_4i8_4f32(
1136 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
1137 ; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
1138 ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
1139 ; CHECK-NEXT: ret void
1141 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
1142 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
1143 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
1144 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
1145 %cvt0 = sitofp i8 %ld0 to float
1146 %cvt1 = sitofp i8 %ld1 to float
1147 %cvt2 = sitofp i8 %ld2 to float
1148 %cvt3 = sitofp i8 %ld3 to float
1149 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
1150 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
1151 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
1152 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
1156 define void @sitofp_8i8_8f32() #0 {
1157 ; SSE-LABEL: @sitofp_8i8_8f32(
1158 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
1159 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
1160 ; SSE-NEXT: [[TMP3:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
1161 ; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i8> [[TMP2]] to <4 x float>
1162 ; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
1163 ; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
1164 ; SSE-NEXT: ret void
1166 ; AVX-LABEL: @sitofp_8i8_8f32(
1167 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
1168 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
1169 ; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
1170 ; AVX-NEXT: ret void
1172 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
1173 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
1174 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
1175 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
1176 %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
1177 %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
1178 %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
1179 %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
1180 %cvt0 = sitofp i8 %ld0 to float
1181 %cvt1 = sitofp i8 %ld1 to float
1182 %cvt2 = sitofp i8 %ld2 to float
1183 %cvt3 = sitofp i8 %ld3 to float
1184 %cvt4 = sitofp i8 %ld4 to float
1185 %cvt5 = sitofp i8 %ld5 to float
1186 %cvt6 = sitofp i8 %ld6 to float
1187 %cvt7 = sitofp i8 %ld7 to float
1188 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
1189 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
1190 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
1191 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
1192 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
1193 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
1194 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
1195 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
1199 define void @sitofp_16i8_16f32() #0 {
1200 ; SSE-LABEL: @sitofp_16i8_16f32(
1201 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
1202 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
1203 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <4 x i8>*), align 8
1204 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12) to <4 x i8>*), align 4
1205 ; SSE-NEXT: [[TMP5:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
1206 ; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i8> [[TMP2]] to <4 x float>
1207 ; SSE-NEXT: [[TMP7:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x float>
1208 ; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i8> [[TMP4]] to <4 x float>
1209 ; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
1210 ; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
1211 ; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
1212 ; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
1213 ; SSE-NEXT: ret void
1215 ; AVX256-LABEL: @sitofp_16i8_16f32(
1216 ; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
1217 ; AVX256-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <8 x i8>*), align 8
1218 ; AVX256-NEXT: [[TMP3:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
1219 ; AVX256-NEXT: [[TMP4:%.*]] = sitofp <8 x i8> [[TMP2]] to <8 x float>
1220 ; AVX256-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
1221 ; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
1222 ; AVX256-NEXT: ret void
1224 ; AVX512-LABEL: @sitofp_16i8_16f32(
1225 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @src8 to <16 x i8>*), align 64
1226 ; AVX512-NEXT: [[TMP2:%.*]] = sitofp <16 x i8> [[TMP1]] to <16 x float>
1227 ; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
1228 ; AVX512-NEXT: ret void
1230 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0 ), align 64
1231 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1 ), align 1
1232 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2 ), align 2
1233 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3 ), align 1
1234 %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4 ), align 4
1235 %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5 ), align 1
1236 %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6 ), align 2
1237 %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7 ), align 1
1238 %ld8 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8 ), align 8
1239 %ld9 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 9 ), align 1
1240 %ld10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 10), align 2
1241 %ld11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 11), align 1
1242 %ld12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12), align 4
1243 %ld13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 13), align 1
1244 %ld14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 14), align 2
1245 %ld15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 15), align 1
1246 %cvt0 = sitofp i8 %ld0 to float
1247 %cvt1 = sitofp i8 %ld1 to float
1248 %cvt2 = sitofp i8 %ld2 to float
1249 %cvt3 = sitofp i8 %ld3 to float
1250 %cvt4 = sitofp i8 %ld4 to float
1251 %cvt5 = sitofp i8 %ld5 to float
1252 %cvt6 = sitofp i8 %ld6 to float
1253 %cvt7 = sitofp i8 %ld7 to float
1254 %cvt8 = sitofp i8 %ld8 to float
1255 %cvt9 = sitofp i8 %ld9 to float
1256 %cvt10 = sitofp i8 %ld10 to float
1257 %cvt11 = sitofp i8 %ld11 to float
1258 %cvt12 = sitofp i8 %ld12 to float
1259 %cvt13 = sitofp i8 %ld13 to float
1260 %cvt14 = sitofp i8 %ld14 to float
1261 %cvt15 = sitofp i8 %ld15 to float
1262 store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
1263 store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
1264 store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
1265 store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
1266 store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
1267 store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
1268 store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
1269 store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
1270 store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
1271 store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
1272 store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
1273 store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
1274 store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
1275 store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
1276 store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
1277 store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
1282 ; SITOFP BUILDVECTOR
1285 define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
1286 ; CHECK-LABEL: @sitofp_4xi32_4f64(
1287 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to double
1288 ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to double
1289 ; CHECK-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to double
1290 ; CHECK-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to double
1291 ; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x double> undef, double [[CVT0]], i32 0
1292 ; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x double> [[RES0]], double [[CVT1]], i32 1
1293 ; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x double> [[RES1]], double [[CVT2]], i32 2
1294 ; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x double> [[RES2]], double [[CVT3]], i32 3
1295 ; CHECK-NEXT: ret <4 x double> [[RES3]]
1297 %cvt0 = sitofp i32 %a0 to double
1298 %cvt1 = sitofp i32 %a1 to double
1299 %cvt2 = sitofp i32 %a2 to double
1300 %cvt3 = sitofp i32 %a3 to double
1301 %res0 = insertelement <4 x double> undef, double %cvt0, i32 0
1302 %res1 = insertelement <4 x double> %res0, double %cvt1, i32 1
1303 %res2 = insertelement <4 x double> %res1, double %cvt2, i32 2
1304 %res3 = insertelement <4 x double> %res2, double %cvt3, i32 3
1305 ret <4 x double> %res3
1308 define <4 x float> @sitofp_4xi32_4f32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
1309 ; CHECK-LABEL: @sitofp_4xi32_4f32(
1310 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to float
1311 ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to float
1312 ; CHECK-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to float
1313 ; CHECK-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to float
1314 ; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x float> undef, float [[CVT0]], i32 0
1315 ; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x float> [[RES0]], float [[CVT1]], i32 1
1316 ; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x float> [[RES1]], float [[CVT2]], i32 2
1317 ; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x float> [[RES2]], float [[CVT3]], i32 3
1318 ; CHECK-NEXT: ret <4 x float> [[RES3]]
1320 %cvt0 = sitofp i32 %a0 to float
1321 %cvt1 = sitofp i32 %a1 to float
1322 %cvt2 = sitofp i32 %a2 to float
1323 %cvt3 = sitofp i32 %a3 to float
1324 %res0 = insertelement <4 x float> undef, float %cvt0, i32 0
1325 %res1 = insertelement <4 x float> %res0, float %cvt1, i32 1
1326 %res2 = insertelement <4 x float> %res1, float %cvt2, i32 2
1327 %res3 = insertelement <4 x float> %res2, float %cvt3, i32 3
1328 ret <4 x float> %res3
1331 attributes #0 = { nounwind }