1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
3 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41
5 define void @zext_v4i8_to_v4i64(<4 x i8>* %a) {
6 ; SSE2-LABEL: 'zext_v4i8_to_v4i64'
7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a
8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <4 x i8> %1 to <4 x i64>
9 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
10 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
12 ; SSE41-LABEL: 'zext_v4i8_to_v4i64'
13 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a
14 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <4 x i8> %1 to <4 x i64>
15 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
16 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
18 %1 = load <4 x i8>, <4 x i8>* %a
19 %2 = zext <4 x i8> %1 to <4 x i64>
20 store <4 x i64> %2, <4 x i64>* undef, align 4
24 define void @sext_v4i8_to_v4i64(<4 x i8>* %a) {
25 ; SSE2-LABEL: 'sext_v4i8_to_v4i64'
26 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a
27 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = sext <4 x i8> %1 to <4 x i64>
28 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
29 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
31 ; SSE41-LABEL: 'sext_v4i8_to_v4i64'
32 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a
33 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <4 x i8> %1 to <4 x i64>
34 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
35 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
37 %1 = load <4 x i8>, <4 x i8>* %a
38 %2 = sext <4 x i8> %1 to <4 x i64>
39 store <4 x i64> %2, <4 x i64>* undef, align 4
43 define void @zext_v4i16_to_v4i64(<4 x i16>* %a) {
44 ; SSE2-LABEL: 'zext_v4i16_to_v4i64'
45 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a
46 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = zext <4 x i16> %1 to <4 x i64>
47 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
48 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
50 ; SSE41-LABEL: 'zext_v4i16_to_v4i64'
51 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a
52 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <4 x i16> %1 to <4 x i64>
53 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
54 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
56 %1 = load <4 x i16>, <4 x i16>* %a
57 %2 = zext <4 x i16> %1 to <4 x i64>
58 store <4 x i64> %2, <4 x i64>* undef, align 4
62 define void @sext_v4i16_to_v4i64(<4 x i16>* %a) {
63 ; SSE2-LABEL: 'sext_v4i16_to_v4i64'
64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a
65 ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %2 = sext <4 x i16> %1 to <4 x i64>
66 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
67 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
69 ; SSE41-LABEL: 'sext_v4i16_to_v4i64'
70 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a
71 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <4 x i16> %1 to <4 x i64>
72 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
73 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
75 %1 = load <4 x i16>, <4 x i16>* %a
76 %2 = sext <4 x i16> %1 to <4 x i64>
77 store <4 x i64> %2, <4 x i64>* undef, align 4
82 define void @zext_v4i32_to_v4i64(<4 x i32>* %a) {
83 ; SSE2-LABEL: 'zext_v4i32_to_v4i64'
84 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a
85 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = zext <4 x i32> %1 to <4 x i64>
86 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
87 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
89 ; SSE41-LABEL: 'zext_v4i32_to_v4i64'
90 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a
91 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <4 x i32> %1 to <4 x i64>
92 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
93 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
95 %1 = load <4 x i32>, <4 x i32>* %a
96 %2 = zext <4 x i32> %1 to <4 x i64>
97 store <4 x i64> %2, <4 x i64>* undef, align 4
101 define void @sext_v4i32_to_v4i64(<4 x i32>* %a) {
102 ; SSE2-LABEL: 'sext_v4i32_to_v4i64'
103 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a
104 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = sext <4 x i32> %1 to <4 x i64>
105 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
106 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
108 ; SSE41-LABEL: 'sext_v4i32_to_v4i64'
109 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a
110 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <4 x i32> %1 to <4 x i64>
111 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
112 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
114 %1 = load <4 x i32>, <4 x i32>* %a
115 %2 = sext <4 x i32> %1 to <4 x i64>
116 store <4 x i64> %2, <4 x i64>* undef, align 4
120 define void @zext_v16i16_to_v16i32(<16 x i16>* %a) {
121 ; SSE2-LABEL: 'zext_v16i16_to_v16i32'
122 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a
123 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = zext <16 x i16> %1 to <16 x i32>
124 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
125 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
127 ; SSE41-LABEL: 'zext_v16i16_to_v16i32'
128 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a
129 ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <16 x i16> %1 to <16 x i32>
130 ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
131 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
133 %1 = load <16 x i16>, <16 x i16>* %a
134 %2 = zext <16 x i16> %1 to <16 x i32>
135 store <16 x i32> %2, <16 x i32>* undef, align 4
139 define void @sext_v16i16_to_v16i32(<16 x i16>* %a) {
140 ; SSE2-LABEL: 'sext_v16i16_to_v16i32'
141 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a
142 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = sext <16 x i16> %1 to <16 x i32>
143 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
144 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
146 ; SSE41-LABEL: 'sext_v16i16_to_v16i32'
147 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a
148 ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = sext <16 x i16> %1 to <16 x i32>
149 ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
150 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
152 %1 = load <16 x i16>, <16 x i16>* %a
153 %2 = sext <16 x i16> %1 to <16 x i32>
154 store <16 x i32> %2, <16 x i32>* undef, align 4
158 define void @zext_v8i16_to_v8i32(<8 x i16>* %a) {
159 ; SSE2-LABEL: 'zext_v8i16_to_v8i32'
160 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a
161 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = zext <8 x i16> %1 to <8 x i32>
162 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
163 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
165 ; SSE41-LABEL: 'zext_v8i16_to_v8i32'
166 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a
167 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <8 x i16> %1 to <8 x i32>
168 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
169 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
171 %1 = load <8 x i16>, <8 x i16>* %a
172 %2 = zext <8 x i16> %1 to <8 x i32>
173 store <8 x i32> %2, <8 x i32>* undef, align 4
177 define void @sext_v8i16_to_v8i32(<8 x i16>* %a) {
178 ; SSE2-LABEL: 'sext_v8i16_to_v8i32'
179 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a
180 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = sext <8 x i16> %1 to <8 x i32>
181 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
182 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
184 ; SSE41-LABEL: 'sext_v8i16_to_v8i32'
185 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a
186 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <8 x i16> %1 to <8 x i32>
187 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
188 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
190 %1 = load <8 x i16>, <8 x i16>* %a
191 %2 = sext <8 x i16> %1 to <8 x i32>
192 store <8 x i32> %2, <8 x i32>* undef, align 4
196 define void @zext_v4i16_to_v4i32(<4 x i16>* %a) {
197 ; CHECK-LABEL: 'zext_v4i16_to_v4i32'
198 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a
199 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <4 x i16> %1 to <4 x i32>
200 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
201 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
203 %1 = load <4 x i16>, <4 x i16>* %a
204 %2 = zext <4 x i16> %1 to <4 x i32>
205 store <4 x i32> %2, <4 x i32>* undef, align 4
209 define void @sext_v4i16_to_v4i32(<4 x i16>* %a) {
210 ; SSE2-LABEL: 'sext_v4i16_to_v4i32'
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <4 x i16> %1 to <4 x i32>
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
216 ; SSE41-LABEL: 'sext_v4i16_to_v4i32'
217 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a
218 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <4 x i16> %1 to <4 x i32>
219 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
220 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
222 %1 = load <4 x i16>, <4 x i16>* %a
223 %2 = sext <4 x i16> %1 to <4 x i32>
224 store <4 x i32> %2, <4 x i32>* undef, align 4
228 define void @zext_v16i8_to_v16i32(<16 x i8>* %a) {
229 ; SSE2-LABEL: 'zext_v16i8_to_v16i32'
230 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a
231 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = zext <16 x i8> %1 to <16 x i32>
232 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
233 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
235 ; SSE41-LABEL: 'zext_v16i8_to_v16i32'
236 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a
237 ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <16 x i8> %1 to <16 x i32>
238 ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
239 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
241 %1 = load <16 x i8>, <16 x i8>* %a
242 %2 = zext <16 x i8> %1 to <16 x i32>
243 store <16 x i32> %2, <16 x i32>* undef, align 4
247 define void @sext_v16i8_to_v16i32(<16 x i8>* %a) {
248 ; SSE2-LABEL: 'sext_v16i8_to_v16i32'
249 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a
250 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = sext <16 x i8> %1 to <16 x i32>
251 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
252 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
254 ; SSE41-LABEL: 'sext_v16i8_to_v16i32'
255 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a
256 ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = sext <16 x i8> %1 to <16 x i32>
257 ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
258 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
260 %1 = load <16 x i8>, <16 x i8>* %a
261 %2 = sext <16 x i8> %1 to <16 x i32>
262 store <16 x i32> %2, <16 x i32>* undef, align 4
266 define void @zext_v8i8_to_v8i32(<8 x i8>* %a) {
267 ; SSE2-LABEL: 'zext_v8i8_to_v8i32'
268 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a
269 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = zext <8 x i8> %1 to <8 x i32>
270 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
271 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
273 ; SSE41-LABEL: 'zext_v8i8_to_v8i32'
274 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a
275 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <8 x i8> %1 to <8 x i32>
276 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
277 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
279 %1 = load <8 x i8>, <8 x i8>* %a
280 %2 = zext <8 x i8> %1 to <8 x i32>
281 store <8 x i32> %2, <8 x i32>* undef, align 4
285 define void @sext_v8i8_to_v8i32(<8 x i8>* %a) {
286 ; SSE2-LABEL: 'sext_v8i8_to_v8i32'
287 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a
288 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = sext <8 x i8> %1 to <8 x i32>
289 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
290 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
292 ; SSE41-LABEL: 'sext_v8i8_to_v8i32'
293 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a
294 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <8 x i8> %1 to <8 x i32>
295 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
296 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
298 %1 = load <8 x i8>, <8 x i8>* %a
299 %2 = sext <8 x i8> %1 to <8 x i32>
300 store <8 x i32> %2, <8 x i32>* undef, align 4
304 define void @zext_v4i8_to_v4i32(<4 x i8>* %a) {
305 ; SSE2-LABEL: 'zext_v4i8_to_v4i32'
306 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a
307 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <4 x i8> %1 to <4 x i32>
308 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
309 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
311 ; SSE41-LABEL: 'zext_v4i8_to_v4i32'
312 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a
313 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <4 x i8> %1 to <4 x i32>
314 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
315 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
317 %1 = load <4 x i8>, <4 x i8>* %a
318 %2 = zext <4 x i8> %1 to <4 x i32>
319 store <4 x i32> %2, <4 x i32>* undef, align 4
323 define void @sext_v4i8_to_v4i32(<4 x i8>* %a) {
324 ; SSE2-LABEL: 'sext_v4i8_to_v4i32'
325 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a
326 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = sext <4 x i8> %1 to <4 x i32>
327 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
328 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
330 ; SSE41-LABEL: 'sext_v4i8_to_v4i32'
331 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a
332 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <4 x i8> %1 to <4 x i32>
333 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
334 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
336 %1 = load <4 x i8>, <4 x i8>* %a
337 %2 = sext <4 x i8> %1 to <4 x i32>
338 store <4 x i32> %2, <4 x i32>* undef, align 4
342 define void @zext_v16i8_to_v16i16(<16 x i8>* %a) {
343 ; SSE2-LABEL: 'zext_v16i8_to_v16i16'
344 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a
345 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = zext <16 x i8> %1 to <16 x i16>
346 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
347 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
349 ; SSE41-LABEL: 'zext_v16i8_to_v16i16'
350 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a
351 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <16 x i8> %1 to <16 x i16>
352 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
353 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
355 %1 = load <16 x i8>, <16 x i8>* %a
356 %2 = zext <16 x i8> %1 to <16 x i16>
357 store <16 x i16> %2, <16 x i16>* undef, align 4
361 define void @sext_v16i8_to_v16i16(<16 x i8>* %a) {
362 ; SSE2-LABEL: 'sext_v16i8_to_v16i16'
363 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a
364 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = sext <16 x i8> %1 to <16 x i16>
365 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
366 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
368 ; SSE41-LABEL: 'sext_v16i8_to_v16i16'
369 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a
370 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <16 x i8> %1 to <16 x i16>
371 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
372 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
374 %1 = load <16 x i8>, <16 x i8>* %a
375 %2 = sext <16 x i8> %1 to <16 x i16>
376 store <16 x i16> %2, <16 x i16>* undef, align 4
380 define void @zext_v8i8_to_v8i16(<8 x i8>* %a) {
381 ; CHECK-LABEL: 'zext_v8i8_to_v8i16'
382 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a
383 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <8 x i8> %1 to <8 x i16>
384 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, <8 x i16>* undef, align 4
385 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
387 %1 = load <8 x i8>, <8 x i8>* %a
388 %2 = zext <8 x i8> %1 to <8 x i16>
389 store <8 x i16> %2, <8 x i16>* undef, align 4
393 define void @sext_v8i8_to_v8i16(<8 x i8>* %a) {
394 ; SSE2-LABEL: 'sext_v8i8_to_v8i16'
395 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a
396 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <8 x i8> %1 to <8 x i16>
397 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, <8 x i16>* undef, align 4
398 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
400 ; SSE41-LABEL: 'sext_v8i8_to_v8i16'
401 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a
402 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <8 x i8> %1 to <8 x i16>
403 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, <8 x i16>* undef, align 4
404 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
406 %1 = load <8 x i8>, <8 x i8>* %a
407 %2 = sext <8 x i8> %1 to <8 x i16>
408 store <8 x i16> %2, <8 x i16>* undef, align 4
412 define void @zext_v4i8_to_v4i16(<4 x i8>* %a) {
413 ; CHECK-LABEL: 'zext_v4i8_to_v4i16'
414 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a
415 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <4 x i8> %1 to <4 x i16>
416 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, <4 x i16>* undef, align 4
417 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
419 %1 = load <4 x i8>, <4 x i8>* %a
420 %2 = zext <4 x i8> %1 to <4 x i16>
421 store <4 x i16> %2, <4 x i16>* undef, align 4
425 define void @sext_v4i8_to_v4i16(<4 x i8>* %a) {
426 ; SSE2-LABEL: 'sext_v4i8_to_v4i16'
427 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a
428 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = sext <4 x i8> %1 to <4 x i16>
429 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, <4 x i16>* undef, align 4
430 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
432 ; SSE41-LABEL: 'sext_v4i8_to_v4i16'
433 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a
434 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <4 x i8> %1 to <4 x i16>
435 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, <4 x i16>* undef, align 4
436 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
438 %1 = load <4 x i8>, <4 x i8>* %a
439 %2 = sext <4 x i8> %1 to <4 x i16>
440 store <4 x i16> %2, <4 x i16>* undef, align 4
444 define void @truncate_v16i32_to_v16i16(<16 x i32>* %a) {
445 ; SSE2-LABEL: 'truncate_v16i32_to_v16i16'
446 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load <16 x i32>, <16 x i32>* %a
447 ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %2 = trunc <16 x i32> %1 to <16 x i16>
448 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
449 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
451 ; SSE41-LABEL: 'truncate_v16i32_to_v16i16'
452 ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load <16 x i32>, <16 x i32>* %a
453 ; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = trunc <16 x i32> %1 to <16 x i16>
454 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
455 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
457 %1 = load <16 x i32>, <16 x i32>* %a
458 %2 = trunc <16 x i32> %1 to <16 x i16>
459 store <16 x i16> %2, <16 x i16>* undef, align 4
463 define void @truncate_v8i32_to_v8i16(<8 x i32>* %a) {
464 ; SSE2-LABEL: 'truncate_v8i32_to_v8i16'
465 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <8 x i32>, <8 x i32>* %a
466 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = trunc <8 x i32> %1 to <8 x i16>
467 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, <8 x i16>* undef, align 4
468 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
470 ; SSE41-LABEL: 'truncate_v8i32_to_v8i16'
471 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <8 x i32>, <8 x i32>* %a
472 ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = trunc <8 x i32> %1 to <8 x i16>
473 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, <8 x i16>* undef, align 4
474 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
476 %1 = load <8 x i32>, <8 x i32>* %a
477 %2 = trunc <8 x i32> %1 to <8 x i16>
478 store <8 x i16> %2, <8 x i16>* undef, align 4
482 define void @truncate_v4i32_to_v4i16(<4 x i32>* %a) {
483 ; SSE2-LABEL: 'truncate_v4i32_to_v4i16'
484 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a
485 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = trunc <4 x i32> %1 to <4 x i16>
486 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, <4 x i16>* undef, align 4
487 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
489 ; SSE41-LABEL: 'truncate_v4i32_to_v4i16'
490 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a
491 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = trunc <4 x i32> %1 to <4 x i16>
492 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, <4 x i16>* undef, align 4
493 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
495 %1 = load <4 x i32>, <4 x i32>* %a
496 %2 = trunc <4 x i32> %1 to <4 x i16>
497 store <4 x i16> %2, <4 x i16>* undef, align 4
501 define void @truncate_v16i32_to_v16i8(<16 x i32>* %a) {
502 ; CHECK-LABEL: 'truncate_v16i32_to_v16i8'
503 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load <16 x i32>, <16 x i32>* %a
504 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = trunc <16 x i32> %1 to <16 x i8>
505 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %2, <16 x i8>* undef, align 4
506 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
508 %1 = load <16 x i32>, <16 x i32>* %a
509 %2 = trunc <16 x i32> %1 to <16 x i8>
510 store <16 x i8> %2, <16 x i8>* undef, align 4
514 define void @truncate_v8i32_to_v8i8(<8 x i32>* %a) {
515 ; SSE2-LABEL: 'truncate_v8i32_to_v8i8'
516 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <8 x i32>, <8 x i32>* %a
517 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = trunc <8 x i32> %1 to <8 x i8>
518 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %2, <8 x i8>* undef, align 4
519 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
521 ; SSE41-LABEL: 'truncate_v8i32_to_v8i8'
522 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <8 x i32>, <8 x i32>* %a
523 ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = trunc <8 x i32> %1 to <8 x i8>
524 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %2, <8 x i8>* undef, align 4
525 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
527 %1 = load <8 x i32>, <8 x i32>* %a
528 %2 = trunc <8 x i32> %1 to <8 x i8>
529 store <8 x i8> %2, <8 x i8>* undef, align 4
533 define void @truncate_v4i32_to_v4i8(<4 x i32>* %a) {
534 ; SSE2-LABEL: 'truncate_v4i32_to_v4i8'
535 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a
536 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = trunc <4 x i32> %1 to <4 x i8>
537 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, <4 x i8>* undef, align 4
538 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
540 ; SSE41-LABEL: 'truncate_v4i32_to_v4i8'
541 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a
542 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = trunc <4 x i32> %1 to <4 x i8>
543 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, <4 x i8>* undef, align 4
544 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
546 %1 = load <4 x i32>, <4 x i32>* %a
547 %2 = trunc <4 x i32> %1 to <4 x i8>
548 store <4 x i8> %2, <4 x i8>* undef, align 4
552 define void @truncate_v16i16_to_v16i8(<16 x i16>* %a) {
553 ; CHECK-LABEL: 'truncate_v16i16_to_v16i8'
554 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a
555 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = trunc <16 x i16> %1 to <16 x i8>
556 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %2, <16 x i8>* undef, align 4
557 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
559 %1 = load <16 x i16>, <16 x i16>* %a
560 %2 = trunc <16 x i16> %1 to <16 x i8>
561 store <16 x i8> %2, <16 x i8>* undef, align 4
565 define void @truncate_v8i16_to_v8i8(<8 x i16>* %a) {
566 ; SSE2-LABEL: 'truncate_v8i16_to_v8i8'
567 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a
568 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <8 x i16> %1 to <8 x i8>
569 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %2, <8 x i8>* undef, align 4
570 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
572 ; SSE41-LABEL: 'truncate_v8i16_to_v8i8'
573 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a
574 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = trunc <8 x i16> %1 to <8 x i8>
575 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %2, <8 x i8>* undef, align 4
576 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
578 %1 = load <8 x i16>, <8 x i16>* %a
579 %2 = trunc <8 x i16> %1 to <8 x i8>
580 store <8 x i8> %2, <8 x i8>* undef, align 4
584 define void @truncate_v4i16_to_v4i8(<4 x i16>* %a) {
585 ; SSE2-LABEL: 'truncate_v4i16_to_v4i8'
586 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a
587 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = trunc <4 x i16> %1 to <4 x i8>
588 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, <4 x i8>* undef, align 4
589 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
591 ; SSE41-LABEL: 'truncate_v4i16_to_v4i8'
592 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a
593 ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <4 x i16> %1 to <4 x i8>
594 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, <4 x i8>* undef, align 4
595 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
597 %1 = load <4 x i16>, <4 x i16>* %a
598 %2 = trunc <4 x i16> %1 to <4 x i8>
599 store <4 x i8> %2, <4 x i8>* undef, align 4