1 ; RUN: opt < %s -cost-model -analyze -mtriple=arm-apple-ios6.0.0 -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
2 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
3 ; Make sure that ARM backend with NEON handles vselect.
5 define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
6 ; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
7 %cmpres = icmp sgt <4 x i32> %a, %b
8 %maxres = select <4 x i1> %cmpres, <4 x i32> %a, <4 x i32> %b
9 store <4 x i32> %maxres, <4 x i32>* %m
13 %T0_10 = type <16 x i16>
14 %T1_10 = type <16 x i1>
15 ; CHECK-LABEL: func_blend10:
16 define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
17 %T1_10* %blend, %T0_10* %storeaddr) {
18 %v0 = load %T0_10, %T0_10* %loadaddr
19 %v1 = load %T0_10, %T0_10* %loadaddr2
20 %c = icmp slt %T0_10 %v0, %v1
24 ; COST: cost of 0 {{.*}} icmp
25 ; COST: cost of 4 {{.*}} select
26 %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
27 store %T0_10 %r, %T0_10* %storeaddr
30 %T0_14 = type <8 x i32>
31 %T1_14 = type <8 x i1>
32 ; CHECK-LABEL: func_blend14:
33 define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
34 %T1_14* %blend, %T0_14* %storeaddr) {
35 %v0 = load %T0_14, %T0_14* %loadaddr
36 %v1 = load %T0_14, %T0_14* %loadaddr2
37 %c = icmp slt %T0_14 %v0, %v1
41 ; COST: cost of 0 {{.*}} icmp
42 ; COST: cost of 4 {{.*}} select
43 %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
44 store %T0_14 %r, %T0_14* %storeaddr
47 %T0_15 = type <16 x i32>
48 %T1_15 = type <16 x i1>
49 ; CHECK-LABEL: func_blend15:
50 define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
51 %T1_15* %blend, %T0_15* %storeaddr) {
54 %v0 = load %T0_15, %T0_15* %loadaddr
55 %v1 = load %T0_15, %T0_15* %loadaddr2
56 %c = icmp slt %T0_15 %v0, %v1
58 ; COST: cost of 0 {{.*}} icmp
59 ; COST: cost of 8 {{.*}} select
60 %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
61 store %T0_15 %r, %T0_15* %storeaddr
65 ; We adjusted the cost model of the following selects. When we improve code
66 ; lowering we also need to adjust the cost.
67 %T0_18 = type <4 x i64>
68 %T1_18 = type <4 x i1>
69 define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
70 %T1_18* %blend, %T0_18* %storeaddr) {
71 ; CHECK-LABEL: func_blend18:
73 ; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
74 ; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
75 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]!
76 ; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]!
77 ; CHECK-NEXT: vmov r4, r6, d16
78 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128]
79 ; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]
80 ; CHECK-NEXT: vmov lr, r12, d18
81 ; CHECK-NEXT: mov r0, #0
82 ; CHECK-NEXT: vmov r2, r1, d20
83 ; CHECK-NEXT: subs r2, r2, lr
84 ; CHECK-NEXT: vmov r7, lr, d17
85 ; CHECK-NEXT: vmov r2, r5, d22
86 ; CHECK-NEXT: sbcs r1, r1, r12
87 ; CHECK-NEXT: mov r1, #0
88 ; CHECK-NEXT: movlt r1, #1
89 ; CHECK-NEXT: cmp r1, #0
90 ; CHECK-NEXT: mvnne r1, #0
91 ; CHECK-NEXT: subs r2, r2, r4
92 ; CHECK-NEXT: sbcs r6, r5, r6
93 ; CHECK-NEXT: vmov r2, r12, d19
94 ; CHECK-NEXT: vmov r5, r4, d21
95 ; CHECK-NEXT: mov r6, #0
96 ; CHECK-NEXT: movlt r6, #1
97 ; CHECK-NEXT: cmp r6, #0
98 ; CHECK-NEXT: mvnne r6, #0
99 ; CHECK-NEXT: subs r2, r5, r2
100 ; CHECK-NEXT: sbcs r4, r4, r12
101 ; CHECK-NEXT: mov r2, #0
102 ; CHECK-NEXT: vmov r4, r5, d23
103 ; CHECK-NEXT: movlt r2, #1
104 ; CHECK-NEXT: subs r7, r4, r7
105 ; CHECK-NEXT: sbcs r7, r5, lr
106 ; CHECK-NEXT: movlt r0, #1
107 ; CHECK-NEXT: cmp r0, #0
108 ; CHECK-NEXT: mvnne r0, #0
109 ; CHECK-NEXT: cmp r2, #0
110 ; CHECK-NEXT: vdup.32 d25, r0
111 ; CHECK-NEXT: mvnne r2, #0
112 ; CHECK-NEXT: vdup.32 d24, r6
113 ; CHECK-NEXT: vdup.32 d27, r2
114 ; CHECK-NEXT: vbit q8, q11, q12
115 ; CHECK-NEXT: vdup.32 d26, r1
116 ; CHECK-NEXT: vbit q9, q10, q13
117 ; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]!
118 ; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128]
119 ; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
120 ; CHECK-NEXT: mov pc, lr
121 %v0 = load %T0_18, %T0_18* %loadaddr
122 %v1 = load %T0_18, %T0_18* %loadaddr2
123 %c = icmp slt %T0_18 %v0, %v1
125 ; COST: cost of 0 {{.*}} icmp
126 ; COST: cost of 21 {{.*}} select
127 %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
128 store %T0_18 %r, %T0_18* %storeaddr
131 %T0_19 = type <8 x i64>
132 %T1_19 = type <8 x i1>
133 define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
134 %T1_19* %blend, %T0_19* %storeaddr) {
135 ; CHECK-LABEL: func_blend19:
137 ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
138 ; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
139 ; CHECK-NEXT: add r2, r1, #48
140 ; CHECK-NEXT: mov r8, #0
141 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2:128]
142 ; CHECK-NEXT: add r2, r0, #48
143 ; CHECK-NEXT: mov lr, #0
144 ; CHECK-NEXT: vld1.64 {d18, d19}, [r2:128]
145 ; CHECK-NEXT: vmov r2, r12, d16
146 ; CHECK-NEXT: vmov r6, r7, d17
147 ; CHECK-NEXT: vmov r4, r5, d18
148 ; CHECK-NEXT: subs r2, r4, r2
149 ; CHECK-NEXT: sbcs r2, r5, r12
150 ; CHECK-NEXT: mov r12, #0
151 ; CHECK-NEXT: vmov r2, r4, d19
152 ; CHECK-NEXT: movlt r12, #1
153 ; CHECK-NEXT: cmp r12, #0
154 ; CHECK-NEXT: mov r5, r1
155 ; CHECK-NEXT: mvnne r12, #0
156 ; CHECK-NEXT: vld1.64 {d24, d25}, [r5:128]!
157 ; CHECK-NEXT: vld1.64 {d20, d21}, [r5:128]
158 ; CHECK-NEXT: subs r2, r2, r6
159 ; CHECK-NEXT: mov r2, r0
160 ; CHECK-NEXT: add r0, r0, #32
161 ; CHECK-NEXT: vld1.64 {d26, d27}, [r2:128]!
162 ; CHECK-NEXT: vld1.64 {d22, d23}, [r2:128]
163 ; CHECK-NEXT: sbcs r2, r4, r7
164 ; CHECK-NEXT: vmov r4, r5, d21
165 ; CHECK-NEXT: movlt r8, #1
166 ; CHECK-NEXT: vmov r6, r7, d23
167 ; CHECK-NEXT: cmp r8, #0
168 ; CHECK-NEXT: mvnne r8, #0
169 ; CHECK-NEXT: vld1.64 {d28, d29}, [r0:128]
170 ; CHECK-NEXT: add r0, r1, #32
171 ; CHECK-NEXT: vld1.64 {d30, d31}, [r0:128]
172 ; CHECK-NEXT: vmov r0, r1, d20
173 ; CHECK-NEXT: vdup.32 d7, r8
174 ; CHECK-NEXT: vdup.32 d6, r12
175 ; CHECK-NEXT: subs r4, r6, r4
176 ; CHECK-NEXT: sbcs r4, r7, r5
177 ; CHECK-NEXT: vmov r5, r6, d24
178 ; CHECK-NEXT: vmov r7, r2, d26
179 ; CHECK-NEXT: mov r4, #0
180 ; CHECK-NEXT: movlt r4, #1
181 ; CHECK-NEXT: cmp r4, #0
182 ; CHECK-NEXT: mvnne r4, #0
183 ; CHECK-NEXT: vdup.32 d5, r4
184 ; CHECK-NEXT: subs r5, r7, r5
185 ; CHECK-NEXT: sbcs r2, r2, r6
186 ; CHECK-NEXT: vmov r7, r6, d27
187 ; CHECK-NEXT: vmov r2, r9, d25
188 ; CHECK-NEXT: mov r5, #0
189 ; CHECK-NEXT: movlt r5, #1
190 ; CHECK-NEXT: cmp r5, #0
191 ; CHECK-NEXT: mvnne r5, #0
192 ; CHECK-NEXT: subs r2, r7, r2
193 ; CHECK-NEXT: sbcs r2, r6, r9
194 ; CHECK-NEXT: vmov r6, r7, d22
195 ; CHECK-NEXT: mov r2, #0
196 ; CHECK-NEXT: movlt r2, #1
197 ; CHECK-NEXT: cmp r2, #0
198 ; CHECK-NEXT: mvnne r2, #0
199 ; CHECK-NEXT: vdup.32 d1, r2
200 ; CHECK-NEXT: vdup.32 d0, r5
201 ; CHECK-NEXT: vbit q12, q13, q0
202 ; CHECK-NEXT: subs r0, r6, r0
203 ; CHECK-NEXT: vmov r2, r6, d28
204 ; CHECK-NEXT: sbcs r0, r7, r1
205 ; CHECK-NEXT: mov r7, #0
206 ; CHECK-NEXT: vmov r0, r1, d30
207 ; CHECK-NEXT: movlt r7, #1
208 ; CHECK-NEXT: subs r0, r2, r0
209 ; CHECK-NEXT: vmov r2, r5, d29
210 ; CHECK-NEXT: sbcs r0, r6, r1
211 ; CHECK-NEXT: mov r6, #0
212 ; CHECK-NEXT: vmov r0, r1, d31
213 ; CHECK-NEXT: movlt r6, #1
214 ; CHECK-NEXT: subs r0, r2, r0
215 ; CHECK-NEXT: sbcs r0, r5, r1
216 ; CHECK-NEXT: movlt lr, #1
217 ; CHECK-NEXT: cmp lr, #0
218 ; CHECK-NEXT: mvnne lr, #0
219 ; CHECK-NEXT: cmp r6, #0
220 ; CHECK-NEXT: mvnne r6, #0
221 ; CHECK-NEXT: vdup.32 d3, lr
222 ; CHECK-NEXT: vdup.32 d2, r6
223 ; CHECK-NEXT: cmp r7, #0
224 ; CHECK-NEXT: vorr q13, q1, q1
225 ; CHECK-NEXT: mvnne r7, #0
226 ; CHECK-NEXT: vdup.32 d4, r7
227 ; CHECK-NEXT: add r0, r3, #32
228 ; CHECK-NEXT: vbsl q13, q14, q15
229 ; CHECK-NEXT: vbit q10, q11, q2
230 ; CHECK-NEXT: vbit q8, q9, q3
231 ; CHECK-NEXT: vst1.64 {d26, d27}, [r0:128]
232 ; CHECK-NEXT: add r0, r3, #48
233 ; CHECK-NEXT: vst1.64 {d24, d25}, [r3:128]!
234 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
235 ; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128]
236 ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr}
237 ; CHECK-NEXT: mov pc, lr
238 %v0 = load %T0_19, %T0_19* %loadaddr
239 %v1 = load %T0_19, %T0_19* %loadaddr2
240 %c = icmp slt %T0_19 %v0, %v1
242 ; COST: cost of 0 {{.*}} icmp
243 ; COST: cost of 54 {{.*}} select
244 %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
245 store %T0_19 %r, %T0_19* %storeaddr
248 %T0_20 = type <16 x i64>
249 %T1_20 = type <16 x i1>
250 define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
251 %T1_20* %blend, %T0_20* %storeaddr) {
252 ; CHECK-LABEL: func_blend20:
254 ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
255 ; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
256 ; CHECK-NEXT: .pad #4
257 ; CHECK-NEXT: sub sp, sp, #4
258 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
259 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
260 ; CHECK-NEXT: .pad #8
261 ; CHECK-NEXT: sub sp, sp, #8
262 ; CHECK-NEXT: add r9, r1, #64
263 ; CHECK-NEXT: mov r2, #32
264 ; CHECK-NEXT: add r8, r0, #64
265 ; CHECK-NEXT: vld1.64 {d16, d17}, [r9:128], r2
266 ; CHECK-NEXT: mov r10, r1
267 ; CHECK-NEXT: mov r11, r0
268 ; CHECK-NEXT: vld1.64 {d18, d19}, [r8:128], r2
269 ; CHECK-NEXT: vmov r7, r5, d17
270 ; CHECK-NEXT: vmov r6, r2, d19
271 ; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
272 ; CHECK-NEXT: vld1.64 {d22, d23}, [r10:128]!
273 ; CHECK-NEXT: subs r7, r6, r7
274 ; CHECK-NEXT: sbcs r2, r2, r5
275 ; CHECK-NEXT: vmov r5, r6, d16
276 ; CHECK-NEXT: vmov r7, r4, d18
277 ; CHECK-NEXT: mov r2, #0
278 ; CHECK-NEXT: movlt r2, #1
279 ; CHECK-NEXT: cmp r2, #0
280 ; CHECK-NEXT: mvnne r2, #0
281 ; CHECK-NEXT: vdup.32 d21, r2
282 ; CHECK-NEXT: subs r5, r7, r5
283 ; CHECK-NEXT: sbcs r4, r4, r6
284 ; CHECK-NEXT: mov r4, #0
285 ; CHECK-NEXT: movlt r4, #1
286 ; CHECK-NEXT: cmp r4, #0
287 ; CHECK-NEXT: mvnne r4, #0
288 ; CHECK-NEXT: vdup.32 d20, r4
289 ; CHECK-NEXT: vmov r2, r4, d23
290 ; CHECK-NEXT: vbit q8, q9, q10
291 ; CHECK-NEXT: vld1.64 {d18, d19}, [r11:128]!
292 ; CHECK-NEXT: vmov r7, r5, d19
293 ; CHECK-NEXT: subs r2, r7, r2
294 ; CHECK-NEXT: sbcs r2, r5, r4
295 ; CHECK-NEXT: vmov r5, r7, d18
296 ; CHECK-NEXT: mov r2, #0
297 ; CHECK-NEXT: movlt r2, #1
298 ; CHECK-NEXT: cmp r2, #0
299 ; CHECK-NEXT: mvnne r2, #0
300 ; CHECK-NEXT: vdup.32 d21, r2
301 ; CHECK-NEXT: vmov r2, r4, d22
302 ; CHECK-NEXT: subs r2, r5, r2
303 ; CHECK-NEXT: sbcs r2, r7, r4
304 ; CHECK-NEXT: mov r2, #0
305 ; CHECK-NEXT: movlt r2, #1
306 ; CHECK-NEXT: cmp r2, #0
307 ; CHECK-NEXT: mvnne r2, #0
308 ; CHECK-NEXT: vdup.32 d20, r2
309 ; CHECK-NEXT: add r2, r0, #48
310 ; CHECK-NEXT: vbif q9, q11, q10
311 ; CHECK-NEXT: vld1.64 {d30, d31}, [r2:128]
312 ; CHECK-NEXT: add r2, r1, #48
313 ; CHECK-NEXT: vld1.64 {d2, d3}, [r2:128]
314 ; CHECK-NEXT: vmov r5, r7, d30
315 ; CHECK-NEXT: vmov r2, r4, d2
316 ; CHECK-NEXT: vld1.64 {d26, d27}, [r11:128]
317 ; CHECK-NEXT: vld1.64 {d0, d1}, [r10:128]
318 ; CHECK-NEXT: vld1.64 {d24, d25}, [r9:128]!
319 ; CHECK-NEXT: vld1.64 {d22, d23}, [r9:128]
320 ; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]!
321 ; CHECK-NEXT: vmov r11, r10, d21
322 ; CHECK-NEXT: subs r2, r5, r2
323 ; CHECK-NEXT: sbcs r2, r7, r4
324 ; CHECK-NEXT: vmov r7, r6, d31
325 ; CHECK-NEXT: vmov r2, r5, d3
326 ; CHECK-NEXT: mov r4, #0
327 ; CHECK-NEXT: movlt r4, #1
328 ; CHECK-NEXT: cmp r4, #0
329 ; CHECK-NEXT: mvnne r4, #0
330 ; CHECK-NEXT: subs r2, r7, r2
331 ; CHECK-NEXT: mov r7, #0
332 ; CHECK-NEXT: sbcs r2, r6, r5
333 ; CHECK-NEXT: vmov r6, r5, d27
334 ; CHECK-NEXT: vmov r2, r9, d1
335 ; CHECK-NEXT: movlt r7, #1
336 ; CHECK-NEXT: cmp r7, #0
337 ; CHECK-NEXT: mvnne r7, #0
338 ; CHECK-NEXT: vdup.32 d7, r7
339 ; CHECK-NEXT: vdup.32 d6, r4
340 ; CHECK-NEXT: subs r2, r6, r2
341 ; CHECK-NEXT: sbcs r2, r5, r9
342 ; CHECK-NEXT: vmov r6, r5, d26
343 ; CHECK-NEXT: mov r2, #0
344 ; CHECK-NEXT: movlt r2, #1
345 ; CHECK-NEXT: cmp r2, #0
346 ; CHECK-NEXT: mvnne r2, #0
347 ; CHECK-NEXT: vdup.32 d5, r2
348 ; CHECK-NEXT: vmov r2, r9, d0
349 ; CHECK-NEXT: subs r2, r6, r2
350 ; CHECK-NEXT: sbcs r2, r5, r9
351 ; CHECK-NEXT: mov r2, #0
352 ; CHECK-NEXT: movlt r2, #1
353 ; CHECK-NEXT: cmp r2, #0
354 ; CHECK-NEXT: mvnne r2, #0
355 ; CHECK-NEXT: vdup.32 d4, r2
356 ; CHECK-NEXT: add r2, r1, #32
357 ; CHECK-NEXT: vld1.64 {d28, d29}, [r2:128]
358 ; CHECK-NEXT: add r2, r0, #32
359 ; CHECK-NEXT: vbif q13, q0, q2
360 ; CHECK-NEXT: add r1, r1, #80
361 ; CHECK-NEXT: vld1.64 {d0, d1}, [r2:128]
362 ; CHECK-NEXT: vmov r4, r5, d28
363 ; CHECK-NEXT: vbif q15, q1, q3
364 ; CHECK-NEXT: add r0, r0, #80
365 ; CHECK-NEXT: vmov r2, r6, d0
366 ; CHECK-NEXT: vld1.64 {d2, d3}, [r8:128]
367 ; CHECK-NEXT: vmov r9, r8, d25
368 ; CHECK-NEXT: vld1.64 {d8, d9}, [r0:128]
369 ; CHECK-NEXT: vld1.64 {d6, d7}, [r1:128]
370 ; CHECK-NEXT: vmov r3, r12, d8
371 ; CHECK-NEXT: subs r2, r2, r4
372 ; CHECK-NEXT: sbcs r2, r6, r5
373 ; CHECK-NEXT: vmov r4, r5, d29
374 ; CHECK-NEXT: vmov r6, r7, d1
375 ; CHECK-NEXT: mov r2, #0
376 ; CHECK-NEXT: movlt r2, #1
377 ; CHECK-NEXT: cmp r2, #0
378 ; CHECK-NEXT: mvnne r2, #0
379 ; CHECK-NEXT: subs r4, r6, r4
380 ; CHECK-NEXT: sbcs r4, r7, r5
381 ; CHECK-NEXT: vmov r5, r6, d2
382 ; CHECK-NEXT: mov r4, #0
383 ; CHECK-NEXT: movlt r4, #1
384 ; CHECK-NEXT: cmp r4, #0
385 ; CHECK-NEXT: mvnne r4, #0
386 ; CHECK-NEXT: vdup.32 d5, r4
387 ; CHECK-NEXT: vdup.32 d4, r2
388 ; CHECK-NEXT: vmov r2, r4, d22
389 ; CHECK-NEXT: vbit q14, q0, q2
390 ; CHECK-NEXT: subs r2, r5, r2
391 ; CHECK-NEXT: sbcs r2, r6, r4
392 ; CHECK-NEXT: vmov r4, r5, d24
393 ; CHECK-NEXT: vmov r6, r7, d20
394 ; CHECK-NEXT: mov r2, #0
395 ; CHECK-NEXT: movlt r2, #1
396 ; CHECK-NEXT: cmp r2, #0
397 ; CHECK-NEXT: mvnne r2, #0
398 ; CHECK-NEXT: subs r1, r6, r4
399 ; CHECK-NEXT: vmov r0, r6, d9
400 ; CHECK-NEXT: sbcs r1, r7, r5
401 ; CHECK-NEXT: vmov r4, r5, d7
402 ; CHECK-NEXT: mov r1, #0
403 ; CHECK-NEXT: movlt r1, #1
404 ; CHECK-NEXT: cmp r1, #0
405 ; CHECK-NEXT: mvnne r1, #0
406 ; CHECK-NEXT: subs r0, r0, r4
407 ; CHECK-NEXT: vmov r7, r4, d23
408 ; CHECK-NEXT: sbcs r0, r6, r5
409 ; CHECK-NEXT: vmov r5, lr, d6
410 ; CHECK-NEXT: mov r0, #0
411 ; CHECK-NEXT: movlt r0, #1
412 ; CHECK-NEXT: cmp r0, #0
413 ; CHECK-NEXT: mvnne r0, #0
414 ; CHECK-NEXT: vdup.32 d11, r0
415 ; CHECK-NEXT: vmov r0, r6, d3
416 ; CHECK-NEXT: subs r0, r0, r7
417 ; CHECK-NEXT: sbcs r0, r6, r4
418 ; CHECK-NEXT: mov r0, #0
419 ; CHECK-NEXT: movlt r0, #1
420 ; CHECK-NEXT: subs r4, r11, r9
421 ; CHECK-NEXT: sbcs r4, r10, r8
422 ; CHECK-NEXT: mov r4, #0
423 ; CHECK-NEXT: movlt r4, #1
424 ; CHECK-NEXT: subs r3, r3, r5
425 ; CHECK-NEXT: sbcs r3, r12, lr
426 ; CHECK-NEXT: mov r3, #0
427 ; CHECK-NEXT: movlt r3, #1
428 ; CHECK-NEXT: cmp r3, #0
429 ; CHECK-NEXT: mvnne r3, #0
430 ; CHECK-NEXT: cmp r4, #0
431 ; CHECK-NEXT: mvnne r4, #0
432 ; CHECK-NEXT: vdup.32 d10, r3
433 ; CHECK-NEXT: vdup.32 d1, r4
434 ; CHECK-NEXT: vorr q2, q5, q5
435 ; CHECK-NEXT: vdup.32 d0, r1
436 ; CHECK-NEXT: cmp r0, #0
437 ; CHECK-NEXT: vbsl q2, q4, q3
438 ; CHECK-NEXT: mvnne r0, #0
439 ; CHECK-NEXT: vbif q10, q12, q0
440 ; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
441 ; CHECK-NEXT: vdup.32 d7, r0
442 ; CHECK-NEXT: add r0, r1, #80
443 ; CHECK-NEXT: vdup.32 d6, r2
444 ; CHECK-NEXT: vbit q11, q1, q3
445 ; CHECK-NEXT: vst1.64 {d4, d5}, [r0:128]
446 ; CHECK-NEXT: add r0, r1, #32
447 ; CHECK-NEXT: vst1.64 {d28, d29}, [r0:128]
448 ; CHECK-NEXT: add r0, r1, #48
449 ; CHECK-NEXT: vst1.64 {d30, d31}, [r0:128]
450 ; CHECK-NEXT: add r0, r1, #64
451 ; CHECK-NEXT: vst1.64 {d18, d19}, [r1:128]!
452 ; CHECK-NEXT: vst1.64 {d26, d27}, [r1:128]
453 ; CHECK-NEXT: mov r1, #32
454 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128], r1
455 ; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]!
456 ; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]
457 ; CHECK-NEXT: add sp, sp, #8
458 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
459 ; CHECK-NEXT: add sp, sp, #4
460 ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
461 ; CHECK-NEXT: mov pc, lr
462 %v0 = load %T0_20, %T0_20* %loadaddr
463 %v1 = load %T0_20, %T0_20* %loadaddr2
464 %c = icmp slt %T0_20 %v0, %v1
466 ; COST: cost of 0 {{.*}} icmp
467 ; COST: cost of 108 {{.*}} select
468 %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
469 store %T0_20 %r, %T0_20* %storeaddr