1 ; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=COST
2 ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
4 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
7 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
8 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16>
10 ; CODE: uaddl v0.8h, v0.8b, v1.8b
11 define <8 x i16> @uaddl_8h(<8 x i8> %a, <8 x i8> %b) {
12 %tmp0 = zext <8 x i8> %a to <8 x i16>
13 %tmp1 = zext <8 x i8> %b to <8 x i16>
14 %tmp2 = add <8 x i16> %tmp0, %tmp1
18 ; COST-LABEL: uaddl_4s
19 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
20 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32>
21 ; CODE-LABEL: uaddl_4s
22 ; CODE: uaddl v0.4s, v0.4h, v1.4h
23 define <4 x i32> @uaddl_4s(<4 x i16> %a, <4 x i16> %b) {
24 %tmp0 = zext <4 x i16> %a to <4 x i32>
25 %tmp1 = zext <4 x i16> %b to <4 x i32>
26 %tmp2 = add <4 x i32> %tmp0, %tmp1
30 ; COST-LABEL: uaddl_2d
31 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
32 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <2 x i32> %b to <2 x i64>
33 ; CODE-LABEL: uaddl_2d
34 ; CODE: uaddl v0.2d, v0.2s, v1.2s
35 define <2 x i64> @uaddl_2d(<2 x i32> %a, <2 x i32> %b) {
36 %tmp0 = zext <2 x i32> %a to <2 x i64>
37 %tmp1 = zext <2 x i32> %b to <2 x i64>
38 %tmp2 = add <2 x i64> %tmp0, %tmp1
42 ; COST-LABEL: uaddl2_8h
43 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
44 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <16 x i8> %b to <16 x i16>
45 ; CODE-LABEL: uaddl2_8h
46 ; CODE: uaddl2 v2.8h, v0.16b, v1.16b
47 ; CODE-NEXT: uaddl v0.8h, v0.8b, v1.8b
48 define <16 x i16> @uaddl2_8h(<16 x i8> %a, <16 x i8> %b) {
49 %tmp0 = zext <16 x i8> %a to <16 x i16>
50 %tmp1 = zext <16 x i8> %b to <16 x i16>
51 %tmp2 = add <16 x i16> %tmp0, %tmp1
55 ; COST-LABEL: uaddl2_4s
56 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
57 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i16> %b to <8 x i32>
58 ; CODE-LABEL: uaddl2_4s
59 ; CODE: uaddl2 v2.4s, v0.8h, v1.8h
60 ; CODE-NEXT: uaddl v0.4s, v0.4h, v1.4h
61 define <8 x i32> @uaddl2_4s(<8 x i16> %a, <8 x i16> %b) {
62 %tmp0 = zext <8 x i16> %a to <8 x i32>
63 %tmp1 = zext <8 x i16> %b to <8 x i32>
64 %tmp2 = add <8 x i32> %tmp0, %tmp1
68 ; COST-LABEL: uaddl2_2d
69 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
70 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i32> %b to <4 x i64>
71 ; CODE-LABEL: uaddl2_2d
72 ; CODE: uaddl2 v2.2d, v0.4s, v1.4s
73 ; CODE-NEXT: uaddl v0.2d, v0.2s, v1.2s
74 define <4 x i64> @uaddl2_2d(<4 x i32> %a, <4 x i32> %b) {
75 %tmp0 = zext <4 x i32> %a to <4 x i64>
76 %tmp1 = zext <4 x i32> %b to <4 x i64>
77 %tmp2 = add <4 x i64> %tmp0, %tmp1
81 ; COST-LABEL: saddl_8h
82 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
83 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i8> %b to <8 x i16>
84 ; CODE-LABEL: saddl_8h
85 ; CODE: saddl v0.8h, v0.8b, v1.8b
86 define <8 x i16> @saddl_8h(<8 x i8> %a, <8 x i8> %b) {
87 %tmp0 = sext <8 x i8> %a to <8 x i16>
88 %tmp1 = sext <8 x i8> %b to <8 x i16>
89 %tmp2 = add <8 x i16> %tmp0, %tmp1
93 ; COST-LABEL: saddl_4s
94 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
95 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i16> %b to <4 x i32>
96 ; CODE-LABEL: saddl_4s
97 ; CODE: saddl v0.4s, v0.4h, v1.4h
98 define <4 x i32> @saddl_4s(<4 x i16> %a, <4 x i16> %b) {
99 %tmp0 = sext <4 x i16> %a to <4 x i32>
100 %tmp1 = sext <4 x i16> %b to <4 x i32>
101 %tmp2 = add <4 x i32> %tmp0, %tmp1
105 ; COST-LABEL: saddl_2d
106 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
107 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <2 x i32> %b to <2 x i64>
108 ; CODE-LABEL: saddl_2d
109 ; CODE: saddl v0.2d, v0.2s, v1.2s
110 define <2 x i64> @saddl_2d(<2 x i32> %a, <2 x i32> %b) {
111 %tmp0 = sext <2 x i32> %a to <2 x i64>
112 %tmp1 = sext <2 x i32> %b to <2 x i64>
113 %tmp2 = add <2 x i64> %tmp0, %tmp1
117 ; COST-LABEL: saddl2_8h
118 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
119 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <16 x i8> %b to <16 x i16>
120 ; CODE-LABEL: saddl2_8h
121 ; CODE: saddl2 v2.8h, v0.16b, v1.16b
122 ; CODE-NEXT: saddl v0.8h, v0.8b, v1.8b
123 define <16 x i16> @saddl2_8h(<16 x i8> %a, <16 x i8> %b) {
124 %tmp0 = sext <16 x i8> %a to <16 x i16>
125 %tmp1 = sext <16 x i8> %b to <16 x i16>
126 %tmp2 = add <16 x i16> %tmp0, %tmp1
130 ; COST-LABEL: saddl2_4s
131 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
132 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i16> %b to <8 x i32>
133 ; CODE-LABEL: saddl2_4s
134 ; CODE: saddl2 v2.4s, v0.8h, v1.8h
135 ; CODE-NEXT: saddl v0.4s, v0.4h, v1.4h
136 define <8 x i32> @saddl2_4s(<8 x i16> %a, <8 x i16> %b) {
137 %tmp0 = sext <8 x i16> %a to <8 x i32>
138 %tmp1 = sext <8 x i16> %b to <8 x i32>
139 %tmp2 = add <8 x i32> %tmp0, %tmp1
143 ; COST-LABEL: saddl2_2d
144 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
145 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i32> %b to <4 x i64>
146 ; CODE-LABEL: saddl2_2d
147 ; CODE: saddl2 v2.2d, v0.4s, v1.4s
148 ; CODE-NEXT: saddl v0.2d, v0.2s, v1.2s
149 define <4 x i64> @saddl2_2d(<4 x i32> %a, <4 x i32> %b) {
150 %tmp0 = sext <4 x i32> %a to <4 x i64>
151 %tmp1 = sext <4 x i32> %b to <4 x i64>
152 %tmp2 = add <4 x i64> %tmp0, %tmp1
156 ; COST-LABEL: usubl_8h
157 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
158 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16>
159 ; CODE-LABEL: usubl_8h
160 ; CODE: usubl v0.8h, v0.8b, v1.8b
161 define <8 x i16> @usubl_8h(<8 x i8> %a, <8 x i8> %b) {
162 %tmp0 = zext <8 x i8> %a to <8 x i16>
163 %tmp1 = zext <8 x i8> %b to <8 x i16>
164 %tmp2 = sub <8 x i16> %tmp0, %tmp1
168 ; COST-LABEL: usubl_4s
169 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
170 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32>
171 ; CODE-LABEL: usubl_4s
172 ; CODE: usubl v0.4s, v0.4h, v1.4h
173 define <4 x i32> @usubl_4s(<4 x i16> %a, <4 x i16> %b) {
174 %tmp0 = zext <4 x i16> %a to <4 x i32>
175 %tmp1 = zext <4 x i16> %b to <4 x i32>
176 %tmp2 = sub <4 x i32> %tmp0, %tmp1
180 ; COST-LABEL: usubl_2d
181 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
182 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <2 x i32> %b to <2 x i64>
183 ; CODE-LABEL: usubl_2d
184 ; CODE: usubl v0.2d, v0.2s, v1.2s
185 define <2 x i64> @usubl_2d(<2 x i32> %a, <2 x i32> %b) {
186 %tmp0 = zext <2 x i32> %a to <2 x i64>
187 %tmp1 = zext <2 x i32> %b to <2 x i64>
188 %tmp2 = sub <2 x i64> %tmp0, %tmp1
192 ; COST-LABEL: usubl2_8h
193 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
194 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <16 x i8> %b to <16 x i16>
195 ; CODE-LABEL: usubl2_8h
196 ; CODE: usubl2 v2.8h, v0.16b, v1.16b
197 ; CODE-NEXT: usubl v0.8h, v0.8b, v1.8b
198 define <16 x i16> @usubl2_8h(<16 x i8> %a, <16 x i8> %b) {
199 %tmp0 = zext <16 x i8> %a to <16 x i16>
200 %tmp1 = zext <16 x i8> %b to <16 x i16>
201 %tmp2 = sub <16 x i16> %tmp0, %tmp1
205 ; COST-LABEL: usubl2_4s
206 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
207 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i16> %b to <8 x i32>
208 ; CODE-LABEL: usubl2_4s
209 ; CODE: usubl2 v2.4s, v0.8h, v1.8h
210 ; CODE-NEXT: usubl v0.4s, v0.4h, v1.4h
211 define <8 x i32> @usubl2_4s(<8 x i16> %a, <8 x i16> %b) {
212 %tmp0 = zext <8 x i16> %a to <8 x i32>
213 %tmp1 = zext <8 x i16> %b to <8 x i32>
214 %tmp2 = sub <8 x i32> %tmp0, %tmp1
218 ; COST-LABEL: usubl2_2d
219 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
220 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i32> %b to <4 x i64>
221 ; CODE-LABEL: usubl2_2d
222 ; CODE: usubl2 v2.2d, v0.4s, v1.4s
223 ; CODE-NEXT: usubl v0.2d, v0.2s, v1.2s
224 define <4 x i64> @usubl2_2d(<4 x i32> %a, <4 x i32> %b) {
225 %tmp0 = zext <4 x i32> %a to <4 x i64>
226 %tmp1 = zext <4 x i32> %b to <4 x i64>
227 %tmp2 = sub <4 x i64> %tmp0, %tmp1
231 ; COST-LABEL: ssubl_8h
232 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
233 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i8> %b to <8 x i16>
234 ; CODE-LABEL: ssubl_8h
235 ; CODE: ssubl v0.8h, v0.8b, v1.8b
236 define <8 x i16> @ssubl_8h(<8 x i8> %a, <8 x i8> %b) {
237 %tmp0 = sext <8 x i8> %a to <8 x i16>
238 %tmp1 = sext <8 x i8> %b to <8 x i16>
239 %tmp2 = sub <8 x i16> %tmp0, %tmp1
243 ; COST-LABEL: ssubl_4s
244 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
245 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i16> %b to <4 x i32>
246 ; CODE-LABEL: ssubl_4s
247 ; CODE: ssubl v0.4s, v0.4h, v1.4h
248 define <4 x i32> @ssubl_4s(<4 x i16> %a, <4 x i16> %b) {
249 %tmp0 = sext <4 x i16> %a to <4 x i32>
250 %tmp1 = sext <4 x i16> %b to <4 x i32>
251 %tmp2 = sub <4 x i32> %tmp0, %tmp1
255 ; COST-LABEL: ssubl_2d
256 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
257 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <2 x i32> %b to <2 x i64>
258 ; CODE-LABEL: ssubl_2d
259 ; CODE: ssubl v0.2d, v0.2s, v1.2s
260 define <2 x i64> @ssubl_2d(<2 x i32> %a, <2 x i32> %b) {
261 %tmp0 = sext <2 x i32> %a to <2 x i64>
262 %tmp1 = sext <2 x i32> %b to <2 x i64>
263 %tmp2 = sub <2 x i64> %tmp0, %tmp1
267 ; COST-LABEL: ssubl2_8h
268 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
269 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <16 x i8> %b to <16 x i16>
270 ; CODE-LABEL: ssubl2_8h
271 ; CODE: ssubl2 v2.8h, v0.16b, v1.16b
272 ; CODE-NEXT: ssubl v0.8h, v0.8b, v1.8b
273 define <16 x i16> @ssubl2_8h(<16 x i8> %a, <16 x i8> %b) {
274 %tmp0 = sext <16 x i8> %a to <16 x i16>
275 %tmp1 = sext <16 x i8> %b to <16 x i16>
276 %tmp2 = sub <16 x i16> %tmp0, %tmp1
280 ; COST-LABEL: ssubl2_4s
281 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
282 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i16> %b to <8 x i32>
283 ; CODE-LABEL: ssubl2_4s
284 ; CODE: ssubl2 v2.4s, v0.8h, v1.8h
285 ; CODE-NEXT: ssubl v0.4s, v0.4h, v1.4h
286 define <8 x i32> @ssubl2_4s(<8 x i16> %a, <8 x i16> %b) {
287 %tmp0 = sext <8 x i16> %a to <8 x i32>
288 %tmp1 = sext <8 x i16> %b to <8 x i32>
289 %tmp2 = sub <8 x i32> %tmp0, %tmp1
293 ; COST-LABEL: ssubl2_2d
294 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
295 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i32> %b to <4 x i64>
296 ; CODE-LABEL: ssubl2_2d
297 ; CODE: ssubl2 v2.2d, v0.4s, v1.4s
298 ; CODE-NEXT: ssubl v0.2d, v0.2s, v1.2s
299 define <4 x i64> @ssubl2_2d(<4 x i32> %a, <4 x i32> %b) {
300 %tmp0 = sext <4 x i32> %a to <4 x i64>
301 %tmp1 = sext <4 x i32> %b to <4 x i64>
302 %tmp2 = sub <4 x i64> %tmp0, %tmp1
306 ; COST-LABEL: uaddw_8h
307 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
308 ; CODE-LABEL: uaddw_8h
309 ; CODE: uaddw v0.8h, v1.8h, v0.8b
310 define <8 x i16> @uaddw_8h(<8 x i8> %a, <8 x i16> %b) {
311 %tmp0 = zext <8 x i8> %a to <8 x i16>
312 %tmp1 = add <8 x i16> %b, %tmp0
316 ; COST-LABEL: uaddw_4s
317 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
318 ; CODE-LABEL: uaddw_4s
319 ; CODE: uaddw v0.4s, v1.4s, v0.4h
320 define <4 x i32> @uaddw_4s(<4 x i16> %a, <4 x i32> %b) {
321 %tmp0 = zext <4 x i16> %a to <4 x i32>
322 %tmp1 = add <4 x i32> %b, %tmp0
326 ; COST-LABEL: uaddw_2d
327 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
328 ; CODE-LABEL: uaddw_2d
329 ; CODE: uaddw v0.2d, v1.2d, v0.2s
330 define <2 x i64> @uaddw_2d(<2 x i32> %a, <2 x i64> %b) {
331 %tmp0 = zext <2 x i32> %a to <2 x i64>
332 %tmp1 = add <2 x i64> %b, %tmp0
336 ; COST-LABEL: uaddw2_8h
337 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
338 ; CODE-LABEL: uaddw2_8h
339 ; CODE: uaddw2 v2.8h, v2.8h, v0.16b
340 ; CODE-NEXT: uaddw v0.8h, v1.8h, v0.8b
341 define <16 x i16> @uaddw2_8h(<16 x i8> %a, <16 x i16> %b) {
342 %tmp0 = zext <16 x i8> %a to <16 x i16>
343 %tmp1 = add <16 x i16> %b, %tmp0
347 ; COST-LABEL: uaddw2_4s
348 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
349 ; CODE-LABEL: uaddw2_4s
350 ; CODE: uaddw2 v2.4s, v2.4s, v0.8h
351 ; CODE-NEXT: uaddw v0.4s, v1.4s, v0.4h
352 define <8 x i32> @uaddw2_4s(<8 x i16> %a, <8 x i32> %b) {
353 %tmp0 = zext <8 x i16> %a to <8 x i32>
354 %tmp1 = add <8 x i32> %b, %tmp0
358 ; COST-LABEL: uaddw2_2d
359 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
360 ; CODE-LABEL: uaddw2_2d
361 ; CODE: uaddw2 v2.2d, v2.2d, v0.4s
362 ; CODE-NEXT: uaddw v0.2d, v1.2d, v0.2s
363 define <4 x i64> @uaddw2_2d(<4 x i32> %a, <4 x i64> %b) {
364 %tmp0 = zext <4 x i32> %a to <4 x i64>
365 %tmp1 = add <4 x i64> %b, %tmp0
369 ; COST-LABEL: saddw_8h
370 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
371 ; CODE-LABEL: saddw_8h
372 ; CODE: saddw v0.8h, v1.8h, v0.8b
373 define <8 x i16> @saddw_8h(<8 x i8> %a, <8 x i16> %b) {
374 %tmp0 = sext <8 x i8> %a to <8 x i16>
375 %tmp1 = add <8 x i16> %b, %tmp0
379 ; COST-LABEL: saddw_4s
380 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
381 ; CODE-LABEL: saddw_4s
382 ; CODE: saddw v0.4s, v1.4s, v0.4h
383 define <4 x i32> @saddw_4s(<4 x i16> %a, <4 x i32> %b) {
384 %tmp0 = sext <4 x i16> %a to <4 x i32>
385 %tmp1 = add <4 x i32> %b, %tmp0
389 ; COST-LABEL: saddw_2d
390 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
391 ; CODE-LABEL: saddw_2d
392 ; CODE: saddw v0.2d, v1.2d, v0.2s
393 define <2 x i64> @saddw_2d(<2 x i32> %a, <2 x i64> %b) {
394 %tmp0 = sext <2 x i32> %a to <2 x i64>
395 %tmp1 = add <2 x i64> %b, %tmp0
399 ; COST-LABEL: saddw2_8h
400 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
401 ; CODE-LABEL: saddw2_8h
402 ; CODE: saddw2 v2.8h, v2.8h, v0.16b
403 ; CODE-NEXT: saddw v0.8h, v1.8h, v0.8b
404 define <16 x i16> @saddw2_8h(<16 x i8> %a, <16 x i16> %b) {
405 %tmp0 = sext <16 x i8> %a to <16 x i16>
406 %tmp1 = add <16 x i16> %b, %tmp0
410 ; COST-LABEL: saddw2_4s
411 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
412 ; CODE-LABEL: saddw2_4s
413 ; CODE: saddw2 v2.4s, v2.4s, v0.8h
414 ; CODE-NEXT: saddw v0.4s, v1.4s, v0.4h
415 define <8 x i32> @saddw2_4s(<8 x i16> %a, <8 x i32> %b) {
416 %tmp0 = sext <8 x i16> %a to <8 x i32>
417 %tmp1 = add <8 x i32> %b, %tmp0
421 ; COST-LABEL: saddw2_2d
422 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
423 ; CODE-LABEL: saddw2_2d
424 ; CODE: saddw2 v2.2d, v2.2d, v0.4s
425 ; CODE-NEXT: saddw v0.2d, v1.2d, v0.2s
426 define <4 x i64> @saddw2_2d(<4 x i32> %a, <4 x i64> %b) {
427 %tmp0 = sext <4 x i32> %a to <4 x i64>
428 %tmp1 = add <4 x i64> %b, %tmp0
432 ; COST-LABEL: usubw_8h
433 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
434 ; CODE-LABEL: usubw_8h
435 ; CODE: usubw v0.8h, v1.8h, v0.8b
436 define <8 x i16> @usubw_8h(<8 x i8> %a, <8 x i16> %b) {
437 %tmp0 = zext <8 x i8> %a to <8 x i16>
438 %tmp1 = sub <8 x i16> %b, %tmp0
442 ; COST-LABEL: usubw_4s
443 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
444 ; CODE-LABEL: usubw_4s
445 ; CODE: usubw v0.4s, v1.4s, v0.4h
446 define <4 x i32> @usubw_4s(<4 x i16> %a, <4 x i32> %b) {
447 %tmp0 = zext <4 x i16> %a to <4 x i32>
448 %tmp1 = sub <4 x i32> %b, %tmp0
452 ; COST-LABEL: usubw_2d
453 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
454 ; CODE-LABEL: usubw_2d
455 ; CODE: usubw v0.2d, v1.2d, v0.2s
456 define <2 x i64> @usubw_2d(<2 x i32> %a, <2 x i64> %b) {
457 %tmp0 = zext <2 x i32> %a to <2 x i64>
458 %tmp1 = sub <2 x i64> %b, %tmp0
462 ; COST-LABEL: usubw2_8h
463 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
464 ; CODE-LABEL: usubw2_8h
465 ; CODE: usubw2 v2.8h, v2.8h, v0.16b
466 ; CODE-NEXT: usubw v0.8h, v1.8h, v0.8b
467 define <16 x i16> @usubw2_8h(<16 x i8> %a, <16 x i16> %b) {
468 %tmp0 = zext <16 x i8> %a to <16 x i16>
469 %tmp1 = sub <16 x i16> %b, %tmp0
473 ; COST-LABEL: usubw2_4s
474 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
475 ; CODE-LABEL: usubw2_4s
476 ; CODE: usubw2 v2.4s, v2.4s, v0.8h
477 ; CODE-NEXT: usubw v0.4s, v1.4s, v0.4h
478 define <8 x i32> @usubw2_4s(<8 x i16> %a, <8 x i32> %b) {
479 %tmp0 = zext <8 x i16> %a to <8 x i32>
480 %tmp1 = sub <8 x i32> %b, %tmp0
484 ; COST-LABEL: usubw2_2d
485 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
486 ; CODE-LABEL: usubw2_2d
487 ; CODE: usubw2 v2.2d, v2.2d, v0.4s
488 ; CODE-NEXT: usubw v0.2d, v1.2d, v0.2s
489 define <4 x i64> @usubw2_2d(<4 x i32> %a, <4 x i64> %b) {
490 %tmp0 = zext <4 x i32> %a to <4 x i64>
491 %tmp1 = sub <4 x i64> %b, %tmp0
495 ; COST-LABEL: ssubw_8h
496 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
497 ; CODE-LABEL: ssubw_8h
498 ; CODE: ssubw v0.8h, v1.8h, v0.8b
499 define <8 x i16> @ssubw_8h(<8 x i8> %a, <8 x i16> %b) {
500 %tmp0 = sext <8 x i8> %a to <8 x i16>
501 %tmp1 = sub <8 x i16> %b, %tmp0
505 ; COST-LABEL: ssubw_4s
506 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
507 ; CODE-LABEL: ssubw_4s
508 ; CODE: ssubw v0.4s, v1.4s, v0.4h
509 define <4 x i32> @ssubw_4s(<4 x i16> %a, <4 x i32> %b) {
510 %tmp0 = sext <4 x i16> %a to <4 x i32>
511 %tmp1 = sub <4 x i32> %b, %tmp0
515 ; COST-LABEL: ssubw_2d
516 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
517 ; CODE-LABEL: ssubw_2d
518 ; CODE: ssubw v0.2d, v1.2d, v0.2s
519 define <2 x i64> @ssubw_2d(<2 x i32> %a, <2 x i64> %b) {
520 %tmp0 = sext <2 x i32> %a to <2 x i64>
521 %tmp1 = sub <2 x i64> %b, %tmp0
525 ; COST-LABEL: ssubw2_8h
526 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
527 ; CODE-LABEL: ssubw2_8h
528 ; CODE: ssubw2 v2.8h, v2.8h, v0.16b
529 ; CODE-NEXT: ssubw v0.8h, v1.8h, v0.8b
530 define <16 x i16> @ssubw2_8h(<16 x i8> %a, <16 x i16> %b) {
531 %tmp0 = sext <16 x i8> %a to <16 x i16>
532 %tmp1 = sub <16 x i16> %b, %tmp0
536 ; COST-LABEL: ssubw2_4s
537 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
538 ; CODE-LABEL: ssubw2_4s
539 ; CODE: ssubw2 v2.4s, v2.4s, v0.8h
540 ; CODE-NEXT: ssubw v0.4s, v1.4s, v0.4h
541 define <8 x i32> @ssubw2_4s(<8 x i16> %a, <8 x i32> %b) {
542 %tmp0 = sext <8 x i16> %a to <8 x i32>
543 %tmp1 = sub <8 x i32> %b, %tmp0
547 ; COST-LABEL: ssubw2_2d
548 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
549 ; CODE-LABEL: ssubw2_2d
550 ; CODE: ssubw2 v2.2d, v2.2d, v0.4s
551 ; CODE-NEXT: ssubw v0.2d, v1.2d, v0.2s
552 define <4 x i64> @ssubw2_2d(<4 x i32> %a, <4 x i64> %b) {
553 %tmp0 = sext <4 x i32> %a to <4 x i64>
554 %tmp1 = sub <4 x i64> %b, %tmp0
558 ; COST-LABEL: neg_wrong_operand_order
559 ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
560 define <8 x i16> @neg_wrong_operand_order(<8 x i8> %a, <8 x i16> %b) {
561 %tmp0 = zext <8 x i8> %a to <8 x i16>
562 %tmp1 = sub <8 x i16> %tmp0, %b
566 ; COST-LABEL: neg_non_widening_op
567 ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
568 define <8 x i16> @neg_non_widening_op(<8 x i8> %a, <8 x i16> %b) {
569 %tmp0 = zext <8 x i8> %a to <8 x i16>
570 %tmp1 = udiv <8 x i16> %b, %tmp0
574 ; COST-LABEL: neg_dissimilar_operand_kind_0
575 ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
576 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16>
577 define <8 x i16> @neg_dissimilar_operand_kind_0(<8 x i8> %a, <8 x i8> %b) {
578 %tmp0 = sext <8 x i8> %a to <8 x i16>
579 %tmp1 = zext <8 x i8> %b to <8 x i16>
580 %tmp2 = add <8 x i16> %tmp0, %tmp1
584 ; COST-LABEL: neg_dissimilar_operand_kind_1
585 ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <4 x i8> %a to <4 x i32>
586 ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32>
587 define <4 x i32> @neg_dissimilar_operand_kind_1(<4 x i8> %a, <4 x i16> %b) {
588 %tmp0 = zext <4 x i8> %a to <4 x i32>
589 %tmp1 = zext <4 x i16> %b to <4 x i32>
590 %tmp2 = add <4 x i32> %tmp0, %tmp1
594 ; COST-LABEL: neg_illegal_vector_type_0
595 ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <16 x i4> %a to <16 x i8>
596 define <16 x i8> @neg_illegal_vector_type_0(<16 x i4> %a, <16 x i8> %b) {
597 %tmp0 = zext <16 x i4> %a to <16 x i8>
598 %tmp1 = sub <16 x i8> %b, %tmp0
602 ; COST-LABEL: neg_llegal_vector_type_1
603 ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <1 x i16> %a to <1 x i32>
604 define <1 x i32> @neg_llegal_vector_type_1(<1 x i16> %a, <1 x i32> %b) {
605 %tmp0 = zext <1 x i16> %a to <1 x i32>
606 %tmp1 = add <1 x i32> %b, %tmp0
610 ; COST-LABEL: neg_llegal_vector_type_2
611 ; COST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i64>
612 define <4 x i64> @neg_llegal_vector_type_2(<4 x i16> %a, <4 x i64> %b) {
613 %tmp0 = zext <4 x i16> %a to <4 x i64>
614 %tmp1 = add <4 x i64> %b, %tmp0
618 ; COST-LABEL: neg_llegal_vector_type_3
619 ; COST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %tmp0 = zext <3 x i34> %a to <3 x i68>
620 define <3 x i68> @neg_llegal_vector_type_3(<3 x i34> %a, <3 x i68> %b) {
621 %tmp0 = zext <3 x i34> %a to <3 x i68>
622 %tmp1 = add <3 x i68> %b, %tmp0