1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256DQ
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11 @src64 = common global [8 x double] zeroinitializer, align 64
12 @src32 = common global [16 x float] zeroinitializer, align 64
13 @dst64 = common global [8 x i64] zeroinitializer, align 64
14 @dst32 = common global [16 x i32] zeroinitializer, align 64
15 @dst16 = common global [32 x i16] zeroinitializer, align 64
16 @dst8 = common global [64 x i8] zeroinitializer, align 64
22 define void @fptoui_8f64_8i64() #0 {
23 ; SSE-LABEL: @fptoui_8f64_8i64(
24 ; SSE-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
25 ; SSE-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
26 ; SSE-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
27 ; SSE-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
28 ; SSE-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
29 ; SSE-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
30 ; SSE-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
31 ; SSE-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
32 ; SSE-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i64
33 ; SSE-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i64
34 ; SSE-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i64
35 ; SSE-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i64
36 ; SSE-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i64
37 ; SSE-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i64
38 ; SSE-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i64
39 ; SSE-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i64
40 ; SSE-NEXT: store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
41 ; SSE-NEXT: store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
42 ; SSE-NEXT: store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
43 ; SSE-NEXT: store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
44 ; SSE-NEXT: store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
45 ; SSE-NEXT: store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
46 ; SSE-NEXT: store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
47 ; SSE-NEXT: store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
50 ; AVX256NODQ-LABEL: @fptoui_8f64_8i64(
51 ; AVX256NODQ-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
52 ; AVX256NODQ-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
53 ; AVX256NODQ-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
54 ; AVX256NODQ-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
55 ; AVX256NODQ-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
56 ; AVX256NODQ-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
57 ; AVX256NODQ-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
58 ; AVX256NODQ-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
59 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i64
60 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i64
61 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i64
62 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i64
63 ; AVX256NODQ-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i64
64 ; AVX256NODQ-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i64
65 ; AVX256NODQ-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i64
66 ; AVX256NODQ-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i64
67 ; AVX256NODQ-NEXT: store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
68 ; AVX256NODQ-NEXT: store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
69 ; AVX256NODQ-NEXT: store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
70 ; AVX256NODQ-NEXT: store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
71 ; AVX256NODQ-NEXT: store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
72 ; AVX256NODQ-NEXT: store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
73 ; AVX256NODQ-NEXT: store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
74 ; AVX256NODQ-NEXT: store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
75 ; AVX256NODQ-NEXT: ret void
77 ; AVX512-LABEL: @fptoui_8f64_8i64(
78 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
79 ; AVX512-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i64>
80 ; AVX512-NEXT: store <8 x i64> [[TMP2]], <8 x i64>* bitcast ([8 x i64]* @dst64 to <8 x i64>*), align 8
81 ; AVX512-NEXT: ret void
83 ; AVX256DQ-LABEL: @fptoui_8f64_8i64(
84 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
85 ; AVX256DQ-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
86 ; AVX256DQ-NEXT: [[TMP3:%.*]] = fptoui <4 x double> [[TMP1]] to <4 x i64>
87 ; AVX256DQ-NEXT: [[TMP4:%.*]] = fptoui <4 x double> [[TMP2]] to <4 x i64>
88 ; AVX256DQ-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @dst64 to <4 x i64>*), align 8
89 ; AVX256DQ-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4) to <4 x i64>*), align 8
90 ; AVX256DQ-NEXT: ret void
92 %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
93 %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
94 %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
95 %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
96 %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
97 %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
98 %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
99 %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
100 %cvt0 = fptoui double %a0 to i64
101 %cvt1 = fptoui double %a1 to i64
102 %cvt2 = fptoui double %a2 to i64
103 %cvt3 = fptoui double %a3 to i64
104 %cvt4 = fptoui double %a4 to i64
105 %cvt5 = fptoui double %a5 to i64
106 %cvt6 = fptoui double %a6 to i64
107 %cvt7 = fptoui double %a7 to i64
108 store i64 %cvt0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
109 store i64 %cvt1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
110 store i64 %cvt2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
111 store i64 %cvt3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
112 store i64 %cvt4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
113 store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
114 store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
115 store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
119 define void @fptoui_8f64_8i32() #0 {
120 ; SSE-LABEL: @fptoui_8f64_8i32(
121 ; SSE-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
122 ; SSE-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
123 ; SSE-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
124 ; SSE-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
125 ; SSE-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
126 ; SSE-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
127 ; SSE-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
128 ; SSE-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
129 ; SSE-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i32
130 ; SSE-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i32
131 ; SSE-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i32
132 ; SSE-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i32
133 ; SSE-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i32
134 ; SSE-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i32
135 ; SSE-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i32
136 ; SSE-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i32
137 ; SSE-NEXT: store i32 [[CVT0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
138 ; SSE-NEXT: store i32 [[CVT1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
139 ; SSE-NEXT: store i32 [[CVT2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
140 ; SSE-NEXT: store i32 [[CVT3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
141 ; SSE-NEXT: store i32 [[CVT4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
142 ; SSE-NEXT: store i32 [[CVT5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
143 ; SSE-NEXT: store i32 [[CVT6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
144 ; SSE-NEXT: store i32 [[CVT7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
147 ; AVX256NODQ-LABEL: @fptoui_8f64_8i32(
148 ; AVX256NODQ-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
149 ; AVX256NODQ-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
150 ; AVX256NODQ-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
151 ; AVX256NODQ-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
152 ; AVX256NODQ-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
153 ; AVX256NODQ-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
154 ; AVX256NODQ-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
155 ; AVX256NODQ-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
156 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i32
157 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i32
158 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i32
159 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i32
160 ; AVX256NODQ-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i32
161 ; AVX256NODQ-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i32
162 ; AVX256NODQ-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i32
163 ; AVX256NODQ-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i32
164 ; AVX256NODQ-NEXT: store i32 [[CVT0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
165 ; AVX256NODQ-NEXT: store i32 [[CVT1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
166 ; AVX256NODQ-NEXT: store i32 [[CVT2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
167 ; AVX256NODQ-NEXT: store i32 [[CVT3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
168 ; AVX256NODQ-NEXT: store i32 [[CVT4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
169 ; AVX256NODQ-NEXT: store i32 [[CVT5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
170 ; AVX256NODQ-NEXT: store i32 [[CVT6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
171 ; AVX256NODQ-NEXT: store i32 [[CVT7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
172 ; AVX256NODQ-NEXT: ret void
174 ; AVX512-LABEL: @fptoui_8f64_8i32(
175 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
176 ; AVX512-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i32>
177 ; AVX512-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
178 ; AVX512-NEXT: ret void
180 ; AVX256DQ-LABEL: @fptoui_8f64_8i32(
181 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
182 ; AVX256DQ-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i32>
183 ; AVX256DQ-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
184 ; AVX256DQ-NEXT: ret void
186 %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
187 %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
188 %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
189 %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
190 %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
191 %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
192 %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
193 %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
194 %cvt0 = fptoui double %a0 to i32
195 %cvt1 = fptoui double %a1 to i32
196 %cvt2 = fptoui double %a2 to i32
197 %cvt3 = fptoui double %a3 to i32
198 %cvt4 = fptoui double %a4 to i32
199 %cvt5 = fptoui double %a5 to i32
200 %cvt6 = fptoui double %a6 to i32
201 %cvt7 = fptoui double %a7 to i32
202 store i32 %cvt0, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
203 store i32 %cvt1, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
204 store i32 %cvt2, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
205 store i32 %cvt3, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
206 store i32 %cvt4, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
207 store i32 %cvt5, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
208 store i32 %cvt6, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
209 store i32 %cvt7, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
213 define void @fptoui_8f64_8i16() #0 {
214 ; SSE-LABEL: @fptoui_8f64_8i16(
215 ; SSE-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
216 ; SSE-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
217 ; SSE-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
218 ; SSE-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
219 ; SSE-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
220 ; SSE-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
221 ; SSE-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
222 ; SSE-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
223 ; SSE-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i16
224 ; SSE-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i16
225 ; SSE-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i16
226 ; SSE-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i16
227 ; SSE-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i16
228 ; SSE-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i16
229 ; SSE-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i16
230 ; SSE-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i16
231 ; SSE-NEXT: store i16 [[CVT0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
232 ; SSE-NEXT: store i16 [[CVT1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
233 ; SSE-NEXT: store i16 [[CVT2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
234 ; SSE-NEXT: store i16 [[CVT3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
235 ; SSE-NEXT: store i16 [[CVT4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
236 ; SSE-NEXT: store i16 [[CVT5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
237 ; SSE-NEXT: store i16 [[CVT6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
238 ; SSE-NEXT: store i16 [[CVT7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
241 ; AVX256NODQ-LABEL: @fptoui_8f64_8i16(
242 ; AVX256NODQ-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
243 ; AVX256NODQ-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
244 ; AVX256NODQ-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
245 ; AVX256NODQ-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
246 ; AVX256NODQ-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
247 ; AVX256NODQ-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
248 ; AVX256NODQ-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
249 ; AVX256NODQ-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
250 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i16
251 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i16
252 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i16
253 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i16
254 ; AVX256NODQ-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i16
255 ; AVX256NODQ-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i16
256 ; AVX256NODQ-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i16
257 ; AVX256NODQ-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i16
258 ; AVX256NODQ-NEXT: store i16 [[CVT0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
259 ; AVX256NODQ-NEXT: store i16 [[CVT1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
260 ; AVX256NODQ-NEXT: store i16 [[CVT2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
261 ; AVX256NODQ-NEXT: store i16 [[CVT3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
262 ; AVX256NODQ-NEXT: store i16 [[CVT4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
263 ; AVX256NODQ-NEXT: store i16 [[CVT5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
264 ; AVX256NODQ-NEXT: store i16 [[CVT6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
265 ; AVX256NODQ-NEXT: store i16 [[CVT7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
266 ; AVX256NODQ-NEXT: ret void
268 ; AVX512-LABEL: @fptoui_8f64_8i16(
269 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
270 ; AVX512-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16>
271 ; AVX512-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
272 ; AVX512-NEXT: ret void
274 ; AVX256DQ-LABEL: @fptoui_8f64_8i16(
275 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
276 ; AVX256DQ-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16>
277 ; AVX256DQ-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
278 ; AVX256DQ-NEXT: ret void
280 %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
281 %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
282 %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
283 %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
284 %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
285 %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
286 %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
287 %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
288 %cvt0 = fptoui double %a0 to i16
289 %cvt1 = fptoui double %a1 to i16
290 %cvt2 = fptoui double %a2 to i16
291 %cvt3 = fptoui double %a3 to i16
292 %cvt4 = fptoui double %a4 to i16
293 %cvt5 = fptoui double %a5 to i16
294 %cvt6 = fptoui double %a6 to i16
295 %cvt7 = fptoui double %a7 to i16
296 store i16 %cvt0, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
297 store i16 %cvt1, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
298 store i16 %cvt2, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
299 store i16 %cvt3, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
300 store i16 %cvt4, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
301 store i16 %cvt5, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
302 store i16 %cvt6, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
303 store i16 %cvt7, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
307 define void @fptoui_8f64_8i8() #0 {
308 ; SSE-LABEL: @fptoui_8f64_8i8(
309 ; SSE-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
310 ; SSE-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
311 ; SSE-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
312 ; SSE-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
313 ; SSE-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
314 ; SSE-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
315 ; SSE-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
316 ; SSE-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
317 ; SSE-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i8
318 ; SSE-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i8
319 ; SSE-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i8
320 ; SSE-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i8
321 ; SSE-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i8
322 ; SSE-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i8
323 ; SSE-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i8
324 ; SSE-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i8
325 ; SSE-NEXT: store i8 [[CVT0]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
326 ; SSE-NEXT: store i8 [[CVT1]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
327 ; SSE-NEXT: store i8 [[CVT2]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
328 ; SSE-NEXT: store i8 [[CVT3]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
329 ; SSE-NEXT: store i8 [[CVT4]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
330 ; SSE-NEXT: store i8 [[CVT5]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
331 ; SSE-NEXT: store i8 [[CVT6]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
332 ; SSE-NEXT: store i8 [[CVT7]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
335 ; AVX256NODQ-LABEL: @fptoui_8f64_8i8(
336 ; AVX256NODQ-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
337 ; AVX256NODQ-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
338 ; AVX256NODQ-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
339 ; AVX256NODQ-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
340 ; AVX256NODQ-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
341 ; AVX256NODQ-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
342 ; AVX256NODQ-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
343 ; AVX256NODQ-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
344 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i8
345 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i8
346 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i8
347 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i8
348 ; AVX256NODQ-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i8
349 ; AVX256NODQ-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i8
350 ; AVX256NODQ-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i8
351 ; AVX256NODQ-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i8
352 ; AVX256NODQ-NEXT: store i8 [[CVT0]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
353 ; AVX256NODQ-NEXT: store i8 [[CVT1]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
354 ; AVX256NODQ-NEXT: store i8 [[CVT2]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
355 ; AVX256NODQ-NEXT: store i8 [[CVT3]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
356 ; AVX256NODQ-NEXT: store i8 [[CVT4]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
357 ; AVX256NODQ-NEXT: store i8 [[CVT5]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
358 ; AVX256NODQ-NEXT: store i8 [[CVT6]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
359 ; AVX256NODQ-NEXT: store i8 [[CVT7]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
360 ; AVX256NODQ-NEXT: ret void
362 ; AVX512-LABEL: @fptoui_8f64_8i8(
363 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
364 ; AVX512-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i8>
365 ; AVX512-NEXT: store <8 x i8> [[TMP2]], <8 x i8>* bitcast ([64 x i8]* @dst8 to <8 x i8>*), align 1
366 ; AVX512-NEXT: ret void
368 ; AVX256DQ-LABEL: @fptoui_8f64_8i8(
369 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
370 ; AVX256DQ-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i8>
371 ; AVX256DQ-NEXT: store <8 x i8> [[TMP2]], <8 x i8>* bitcast ([64 x i8]* @dst8 to <8 x i8>*), align 1
372 ; AVX256DQ-NEXT: ret void
374 %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
375 %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
376 %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
377 %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
378 %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
379 %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
380 %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
381 %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
382 %cvt0 = fptoui double %a0 to i8
383 %cvt1 = fptoui double %a1 to i8
384 %cvt2 = fptoui double %a2 to i8
385 %cvt3 = fptoui double %a3 to i8
386 %cvt4 = fptoui double %a4 to i8
387 %cvt5 = fptoui double %a5 to i8
388 %cvt6 = fptoui double %a6 to i8
389 %cvt7 = fptoui double %a7 to i8
390 store i8 %cvt0, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
391 store i8 %cvt1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
392 store i8 %cvt2, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
393 store i8 %cvt3, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
394 store i8 %cvt4, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
395 store i8 %cvt5, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
396 store i8 %cvt6, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
397 store i8 %cvt7, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
405 define void @fptoui_8f32_8i64() #0 {
406 ; SSE-LABEL: @fptoui_8f32_8i64(
407 ; SSE-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
408 ; SSE-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
409 ; SSE-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
410 ; SSE-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
411 ; SSE-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
412 ; SSE-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
413 ; SSE-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
414 ; SSE-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
415 ; SSE-NEXT: [[CVT0:%.*]] = fptoui float [[A0]] to i64
416 ; SSE-NEXT: [[CVT1:%.*]] = fptoui float [[A1]] to i64
417 ; SSE-NEXT: [[CVT2:%.*]] = fptoui float [[A2]] to i64
418 ; SSE-NEXT: [[CVT3:%.*]] = fptoui float [[A3]] to i64
419 ; SSE-NEXT: [[CVT4:%.*]] = fptoui float [[A4]] to i64
420 ; SSE-NEXT: [[CVT5:%.*]] = fptoui float [[A5]] to i64
421 ; SSE-NEXT: [[CVT6:%.*]] = fptoui float [[A6]] to i64
422 ; SSE-NEXT: [[CVT7:%.*]] = fptoui float [[A7]] to i64
423 ; SSE-NEXT: store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
424 ; SSE-NEXT: store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
425 ; SSE-NEXT: store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
426 ; SSE-NEXT: store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
427 ; SSE-NEXT: store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
428 ; SSE-NEXT: store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
429 ; SSE-NEXT: store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
430 ; SSE-NEXT: store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
433 ; AVX256NODQ-LABEL: @fptoui_8f32_8i64(
434 ; AVX256NODQ-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
435 ; AVX256NODQ-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
436 ; AVX256NODQ-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
437 ; AVX256NODQ-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
438 ; AVX256NODQ-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
439 ; AVX256NODQ-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
440 ; AVX256NODQ-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
441 ; AVX256NODQ-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
442 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = fptoui float [[A0]] to i64
443 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = fptoui float [[A1]] to i64
444 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = fptoui float [[A2]] to i64
445 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = fptoui float [[A3]] to i64
446 ; AVX256NODQ-NEXT: [[CVT4:%.*]] = fptoui float [[A4]] to i64
447 ; AVX256NODQ-NEXT: [[CVT5:%.*]] = fptoui float [[A5]] to i64
448 ; AVX256NODQ-NEXT: [[CVT6:%.*]] = fptoui float [[A6]] to i64
449 ; AVX256NODQ-NEXT: [[CVT7:%.*]] = fptoui float [[A7]] to i64
450 ; AVX256NODQ-NEXT: store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
451 ; AVX256NODQ-NEXT: store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
452 ; AVX256NODQ-NEXT: store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
453 ; AVX256NODQ-NEXT: store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
454 ; AVX256NODQ-NEXT: store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
455 ; AVX256NODQ-NEXT: store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
456 ; AVX256NODQ-NEXT: store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
457 ; AVX256NODQ-NEXT: store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
458 ; AVX256NODQ-NEXT: ret void
460 ; AVX512-LABEL: @fptoui_8f32_8i64(
461 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
462 ; AVX512-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i64>
463 ; AVX512-NEXT: store <8 x i64> [[TMP2]], <8 x i64>* bitcast ([8 x i64]* @dst64 to <8 x i64>*), align 8
464 ; AVX512-NEXT: ret void
466 ; AVX256DQ-LABEL: @fptoui_8f32_8i64(
467 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
468 ; AVX256DQ-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
469 ; AVX256DQ-NEXT: [[TMP3:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i64>
470 ; AVX256DQ-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP2]] to <4 x i64>
471 ; AVX256DQ-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @dst64 to <4 x i64>*), align 8
472 ; AVX256DQ-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4) to <4 x i64>*), align 8
473 ; AVX256DQ-NEXT: ret void
475 %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
476 %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
477 %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
478 %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
479 %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
480 %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
481 %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
482 %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
483 %cvt0 = fptoui float %a0 to i64
484 %cvt1 = fptoui float %a1 to i64
485 %cvt2 = fptoui float %a2 to i64
486 %cvt3 = fptoui float %a3 to i64
487 %cvt4 = fptoui float %a4 to i64
488 %cvt5 = fptoui float %a5 to i64
489 %cvt6 = fptoui float %a6 to i64
490 %cvt7 = fptoui float %a7 to i64
491 store i64 %cvt0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
492 store i64 %cvt1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
493 store i64 %cvt2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
494 store i64 %cvt3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
495 store i64 %cvt4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
496 store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
497 store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
498 store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
502 define void @fptoui_8f32_8i32() #0 {
503 ; SSE-LABEL: @fptoui_8f32_8i32(
504 ; SSE-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
505 ; SSE-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
506 ; SSE-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
507 ; SSE-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
508 ; SSE-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
509 ; SSE-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
510 ; SSE-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
511 ; SSE-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
512 ; SSE-NEXT: [[CVT0:%.*]] = fptoui float [[A0]] to i32
513 ; SSE-NEXT: [[CVT1:%.*]] = fptoui float [[A1]] to i32
514 ; SSE-NEXT: [[CVT2:%.*]] = fptoui float [[A2]] to i32
515 ; SSE-NEXT: [[CVT3:%.*]] = fptoui float [[A3]] to i32
516 ; SSE-NEXT: [[CVT4:%.*]] = fptoui float [[A4]] to i32
517 ; SSE-NEXT: [[CVT5:%.*]] = fptoui float [[A5]] to i32
518 ; SSE-NEXT: [[CVT6:%.*]] = fptoui float [[A6]] to i32
519 ; SSE-NEXT: [[CVT7:%.*]] = fptoui float [[A7]] to i32
520 ; SSE-NEXT: store i32 [[CVT0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
521 ; SSE-NEXT: store i32 [[CVT1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
522 ; SSE-NEXT: store i32 [[CVT2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
523 ; SSE-NEXT: store i32 [[CVT3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
524 ; SSE-NEXT: store i32 [[CVT4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
525 ; SSE-NEXT: store i32 [[CVT5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
526 ; SSE-NEXT: store i32 [[CVT6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
527 ; SSE-NEXT: store i32 [[CVT7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
530 ; AVX256NODQ-LABEL: @fptoui_8f32_8i32(
531 ; AVX256NODQ-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
532 ; AVX256NODQ-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
533 ; AVX256NODQ-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
534 ; AVX256NODQ-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
535 ; AVX256NODQ-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
536 ; AVX256NODQ-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
537 ; AVX256NODQ-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
538 ; AVX256NODQ-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
539 ; AVX256NODQ-NEXT: [[CVT0:%.*]] = fptoui float [[A0]] to i32
540 ; AVX256NODQ-NEXT: [[CVT1:%.*]] = fptoui float [[A1]] to i32
541 ; AVX256NODQ-NEXT: [[CVT2:%.*]] = fptoui float [[A2]] to i32
542 ; AVX256NODQ-NEXT: [[CVT3:%.*]] = fptoui float [[A3]] to i32
543 ; AVX256NODQ-NEXT: [[CVT4:%.*]] = fptoui float [[A4]] to i32
544 ; AVX256NODQ-NEXT: [[CVT5:%.*]] = fptoui float [[A5]] to i32
545 ; AVX256NODQ-NEXT: [[CVT6:%.*]] = fptoui float [[A6]] to i32
546 ; AVX256NODQ-NEXT: [[CVT7:%.*]] = fptoui float [[A7]] to i32
547 ; AVX256NODQ-NEXT: store i32 [[CVT0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
548 ; AVX256NODQ-NEXT: store i32 [[CVT1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
549 ; AVX256NODQ-NEXT: store i32 [[CVT2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
550 ; AVX256NODQ-NEXT: store i32 [[CVT3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
551 ; AVX256NODQ-NEXT: store i32 [[CVT4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
552 ; AVX256NODQ-NEXT: store i32 [[CVT5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
553 ; AVX256NODQ-NEXT: store i32 [[CVT6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
554 ; AVX256NODQ-NEXT: store i32 [[CVT7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
555 ; AVX256NODQ-NEXT: ret void
557 ; AVX512-LABEL: @fptoui_8f32_8i32(
558 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
559 ; AVX512-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i32>
560 ; AVX512-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
561 ; AVX512-NEXT: ret void
563 ; AVX256DQ-LABEL: @fptoui_8f32_8i32(
564 ; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
565 ; AVX256DQ-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i32>
566 ; AVX256DQ-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
567 ; AVX256DQ-NEXT: ret void
569 %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
570 %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
571 %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
572 %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
573 %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
574 %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
575 %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
576 %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
577 %cvt0 = fptoui float %a0 to i32
578 %cvt1 = fptoui float %a1 to i32
579 %cvt2 = fptoui float %a2 to i32
580 %cvt3 = fptoui float %a3 to i32
581 %cvt4 = fptoui float %a4 to i32
582 %cvt5 = fptoui float %a5 to i32
583 %cvt6 = fptoui float %a6 to i32
584 %cvt7 = fptoui float %a7 to i32
585 store i32 %cvt0, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
586 store i32 %cvt1, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
587 store i32 %cvt2, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
588 store i32 %cvt3, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
589 store i32 %cvt4, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
590 store i32 %cvt5, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
591 store i32 %cvt6, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
592 store i32 %cvt7, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
596 define void @fptoui_8f32_8i16() #0 {
597 ; SSE-LABEL: @fptoui_8f32_8i16(
598 ; SSE-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
599 ; SSE-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
600 ; SSE-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
601 ; SSE-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
602 ; SSE-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
603 ; SSE-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
604 ; SSE-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
605 ; SSE-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
606 ; SSE-NEXT: [[CVT0:%.*]] = fptoui float [[A0]] to i16
607 ; SSE-NEXT: [[CVT1:%.*]] = fptoui float [[A1]] to i16
608 ; SSE-NEXT: [[CVT2:%.*]] = fptoui float [[A2]] to i16
609 ; SSE-NEXT: [[CVT3:%.*]] = fptoui float [[A3]] to i16
610 ; SSE-NEXT: [[CVT4:%.*]] = fptoui float [[A4]] to i16
611 ; SSE-NEXT: [[CVT5:%.*]] = fptoui float [[A5]] to i16
612 ; SSE-NEXT: [[CVT6:%.*]] = fptoui float [[A6]] to i16
613 ; SSE-NEXT: [[CVT7:%.*]] = fptoui float [[A7]] to i16
614 ; SSE-NEXT: store i16 [[CVT0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
615 ; SSE-NEXT: store i16 [[CVT1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
616 ; SSE-NEXT: store i16 [[CVT2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
617 ; SSE-NEXT: store i16 [[CVT3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
618 ; SSE-NEXT: store i16 [[CVT4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
619 ; SSE-NEXT: store i16 [[CVT5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
620 ; SSE-NEXT: store i16 [[CVT6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
621 ; SSE-NEXT: store i16 [[CVT7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
624 ; AVX-LABEL: @fptoui_8f32_8i16(
625 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
626 ; AVX-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i16>
627 ; AVX-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
630 %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
631 %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
632 %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
633 %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
634 %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
635 %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
636 %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
637 %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
638 %cvt0 = fptoui float %a0 to i16
639 %cvt1 = fptoui float %a1 to i16
640 %cvt2 = fptoui float %a2 to i16
641 %cvt3 = fptoui float %a3 to i16
642 %cvt4 = fptoui float %a4 to i16
643 %cvt5 = fptoui float %a5 to i16
644 %cvt6 = fptoui float %a6 to i16
645 %cvt7 = fptoui float %a7 to i16
646 store i16 %cvt0, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
647 store i16 %cvt1, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
648 store i16 %cvt2, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
649 store i16 %cvt3, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
650 store i16 %cvt4, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
651 store i16 %cvt5, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
652 store i16 %cvt6, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
653 store i16 %cvt7, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
657 define void @fptoui_8f32_8i8() #0 {
658 ; SSE-LABEL: @fptoui_8f32_8i8(
659 ; SSE-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
660 ; SSE-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
661 ; SSE-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
662 ; SSE-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
663 ; SSE-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
664 ; SSE-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
665 ; SSE-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
666 ; SSE-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
667 ; SSE-NEXT: [[CVT0:%.*]] = fptoui float [[A0]] to i8
668 ; SSE-NEXT: [[CVT1:%.*]] = fptoui float [[A1]] to i8
669 ; SSE-NEXT: [[CVT2:%.*]] = fptoui float [[A2]] to i8
670 ; SSE-NEXT: [[CVT3:%.*]] = fptoui float [[A3]] to i8
671 ; SSE-NEXT: [[CVT4:%.*]] = fptoui float [[A4]] to i8
672 ; SSE-NEXT: [[CVT5:%.*]] = fptoui float [[A5]] to i8
673 ; SSE-NEXT: [[CVT6:%.*]] = fptoui float [[A6]] to i8
674 ; SSE-NEXT: [[CVT7:%.*]] = fptoui float [[A7]] to i8
675 ; SSE-NEXT: store i8 [[CVT0]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
676 ; SSE-NEXT: store i8 [[CVT1]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
677 ; SSE-NEXT: store i8 [[CVT2]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
678 ; SSE-NEXT: store i8 [[CVT3]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
679 ; SSE-NEXT: store i8 [[CVT4]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
680 ; SSE-NEXT: store i8 [[CVT5]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
681 ; SSE-NEXT: store i8 [[CVT6]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
682 ; SSE-NEXT: store i8 [[CVT7]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
685 ; AVX-LABEL: @fptoui_8f32_8i8(
686 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
687 ; AVX-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i8>
688 ; AVX-NEXT: store <8 x i8> [[TMP2]], <8 x i8>* bitcast ([64 x i8]* @dst8 to <8 x i8>*), align 1
691 %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
692 %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
693 %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
694 %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
695 %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
696 %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
697 %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
698 %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
699 %cvt0 = fptoui float %a0 to i8
700 %cvt1 = fptoui float %a1 to i8
701 %cvt2 = fptoui float %a2 to i8
702 %cvt3 = fptoui float %a3 to i8
703 %cvt4 = fptoui float %a4 to i8
704 %cvt5 = fptoui float %a5 to i8
705 %cvt6 = fptoui float %a6 to i8
706 %cvt7 = fptoui float %a7 to i8
707 store i8 %cvt0, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
708 store i8 %cvt1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
709 store i8 %cvt2, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
710 store i8 %cvt3, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
711 store i8 %cvt4, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
712 store i8 %cvt5, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
713 store i8 %cvt6, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
714 store i8 %cvt7, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
718 attributes #0 = { nounwind }