1 ; RUN: opt < %s -passes='print<cost-model>' -mtriple=arm-apple-ios6.0.0 -mcpu=cortex-a8 2>&1 -disable-output | FileCheck %s --check-prefix=COST
2 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
3 ; Make sure that ARM backend with NEON handles vselect.
5 define void @vmax_v4i32(ptr %m, <4 x i32> %a, <4 x i32> %b) {
6 ; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
7 %cmpres = icmp sgt <4 x i32> %a, %b
8 %maxres = select <4 x i1> %cmpres, <4 x i32> %a, <4 x i32> %b
9 store <4 x i32> %maxres, ptr %m
13 %T0_10 = type <16 x i16>
14 %T1_10 = type <16 x i1>
15 ; CHECK-LABEL: func_blend10:
16 define void @func_blend10(ptr %loadaddr, ptr %loadaddr2,
17 ptr %blend, ptr %storeaddr) {
18 %v0 = load %T0_10, ptr %loadaddr
19 %v1 = load %T0_10, ptr %loadaddr2
20 %c = icmp slt %T0_10 %v0, %v1
24 ; COST: cost of 0 {{.*}} icmp
25 ; COST: cost of 4 {{.*}} select
26 %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
27 store %T0_10 %r, ptr %storeaddr
30 %T0_14 = type <8 x i32>
31 %T1_14 = type <8 x i1>
32 ; CHECK-LABEL: func_blend14:
33 define void @func_blend14(ptr %loadaddr, ptr %loadaddr2,
34 ptr %blend, ptr %storeaddr) {
35 %v0 = load %T0_14, ptr %loadaddr
36 %v1 = load %T0_14, ptr %loadaddr2
37 %c = icmp slt %T0_14 %v0, %v1
41 ; COST: cost of 0 {{.*}} icmp
42 ; COST: cost of 4 {{.*}} select
43 %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
44 store %T0_14 %r, ptr %storeaddr
47 %T0_15 = type <16 x i32>
48 %T1_15 = type <16 x i1>
49 ; CHECK-LABEL: func_blend15:
50 define void @func_blend15(ptr %loadaddr, ptr %loadaddr2,
51 ptr %blend, ptr %storeaddr) {
54 %v0 = load %T0_15, ptr %loadaddr
55 %v1 = load %T0_15, ptr %loadaddr2
56 %c = icmp slt %T0_15 %v0, %v1
58 ; COST: cost of 0 {{.*}} icmp
59 ; COST: cost of 8 {{.*}} select
60 %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
61 store %T0_15 %r, ptr %storeaddr
65 ; We adjusted the cost model of the following selects. When we improve code
66 ; lowering we also need to adjust the cost.
67 %T0_18 = type <4 x i64>
68 %T1_18 = type <4 x i1>
69 define void @func_blend18(ptr %loadaddr, ptr %loadaddr2,
70 ptr %blend, ptr %storeaddr) {
71 ; CHECK-LABEL: func_blend18:
73 ; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
74 ; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
75 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]!
76 ; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]!
77 ; CHECK-NEXT: vmov r4, r6, d16
78 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128]
79 ; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]
80 ; CHECK-NEXT: vmov lr, r12, d18
81 ; CHECK-NEXT: mov r0, #0
82 ; CHECK-NEXT: vmov r2, r1, d20
83 ; CHECK-NEXT: subs r2, r2, lr
84 ; CHECK-NEXT: vmov r7, lr, d17
85 ; CHECK-NEXT: vmov r2, r5, d22
86 ; CHECK-NEXT: sbcs r1, r1, r12
87 ; CHECK-NEXT: mov r1, #0
88 ; CHECK-NEXT: movlt r1, #1
89 ; CHECK-NEXT: cmp r1, #0
90 ; CHECK-NEXT: mvnne r1, #0
91 ; CHECK-NEXT: subs r2, r2, r4
92 ; CHECK-NEXT: sbcs r6, r5, r6
93 ; CHECK-NEXT: vmov r2, r12, d19
94 ; CHECK-NEXT: vmov r5, r4, d21
95 ; CHECK-NEXT: mov r6, #0
96 ; CHECK-NEXT: movlt r6, #1
97 ; CHECK-NEXT: cmp r6, #0
98 ; CHECK-NEXT: mvnne r6, #0
99 ; CHECK-NEXT: subs r2, r5, r2
100 ; CHECK-NEXT: sbcs r4, r4, r12
101 ; CHECK-NEXT: mov r2, #0
102 ; CHECK-NEXT: vmov r4, r5, d23
103 ; CHECK-NEXT: movlt r2, #1
104 ; CHECK-NEXT: subs r7, r4, r7
105 ; CHECK-NEXT: sbcs r7, r5, lr
106 ; CHECK-NEXT: movlt r0, #1
107 ; CHECK-NEXT: cmp r0, #0
108 ; CHECK-NEXT: mvnne r0, #0
109 ; CHECK-NEXT: cmp r2, #0
110 ; CHECK-NEXT: vdup.32 d25, r0
111 ; CHECK-NEXT: mvnne r2, #0
112 ; CHECK-NEXT: vdup.32 d24, r6
113 ; CHECK-NEXT: vdup.32 d27, r2
114 ; CHECK-NEXT: vbit q8, q11, q12
115 ; CHECK-NEXT: vdup.32 d26, r1
116 ; CHECK-NEXT: vbit q9, q10, q13
117 ; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]!
118 ; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128]
119 ; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
120 ; CHECK-NEXT: mov pc, lr
121 %v0 = load %T0_18, ptr %loadaddr
122 %v1 = load %T0_18, ptr %loadaddr2
123 %c = icmp slt %T0_18 %v0, %v1
125 ; COST: cost of 0 {{.*}} icmp
126 ; COST: cost of 21 {{.*}} select
127 %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
128 store %T0_18 %r, ptr %storeaddr
131 %T0_19 = type <8 x i64>
132 %T1_19 = type <8 x i1>
133 define void @func_blend19(ptr %loadaddr, ptr %loadaddr2,
134 ptr %blend, ptr %storeaddr) {
135 ; CHECK-LABEL: func_blend19:
137 ; CHECK-NEXT: .save {r4, r5, r6, lr}
138 ; CHECK-NEXT: push {r4, r5, r6, lr}
139 ; CHECK-NEXT: vld1.64 {d28, d29}, [r1:128]!
140 ; CHECK-NEXT: mov lr, #0
141 ; CHECK-NEXT: vld1.64 {d30, d31}, [r0:128]!
142 ; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]!
143 ; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]!
144 ; CHECK-NEXT: vld1.64 {d22, d23}, [r1:128]!
145 ; CHECK-NEXT: vld1.64 {d26, d27}, [r0:128]!
146 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]
147 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128]
148 ; CHECK-NEXT: vmov r0, r12, d16
149 ; CHECK-NEXT: vmov r1, r2, d18
150 ; CHECK-NEXT: subs r0, r1, r0
151 ; CHECK-NEXT: vmov r1, r4, d25
152 ; CHECK-NEXT: sbcs r0, r2, r12
153 ; CHECK-NEXT: mov r12, #0
154 ; CHECK-NEXT: vmov r2, r0, d21
155 ; CHECK-NEXT: movlt r12, #1
156 ; CHECK-NEXT: cmp r12, #0
157 ; CHECK-NEXT: mvnne r12, #0
158 ; CHECK-NEXT: subs r1, r1, r2
159 ; CHECK-NEXT: sbcs r0, r4, r0
160 ; CHECK-NEXT: vmov r2, r4, d26
161 ; CHECK-NEXT: mov r0, #0
162 ; CHECK-NEXT: movlt r0, #1
163 ; CHECK-NEXT: cmp r0, #0
164 ; CHECK-NEXT: mvnne r0, #0
165 ; CHECK-NEXT: vdup.32 d1, r0
166 ; CHECK-NEXT: vmov r0, r1, d22
167 ; CHECK-NEXT: subs r0, r2, r0
168 ; CHECK-NEXT: mov r2, #0
169 ; CHECK-NEXT: sbcs r0, r4, r1
170 ; CHECK-NEXT: vmov r4, r5, d31
171 ; CHECK-NEXT: vmov r0, r1, d29
172 ; CHECK-NEXT: movlt r2, #1
173 ; CHECK-NEXT: cmp r2, #0
174 ; CHECK-NEXT: mvnne r2, #0
175 ; CHECK-NEXT: subs r0, r4, r0
176 ; CHECK-NEXT: sbcs r0, r5, r1
177 ; CHECK-NEXT: vmov r4, r5, d30
178 ; CHECK-NEXT: mov r0, #0
179 ; CHECK-NEXT: movlt r0, #1
180 ; CHECK-NEXT: cmp r0, #0
181 ; CHECK-NEXT: mvnne r0, #0
182 ; CHECK-NEXT: vdup.32 d3, r0
183 ; CHECK-NEXT: vmov r0, r1, d28
184 ; CHECK-NEXT: subs r0, r4, r0
185 ; CHECK-NEXT: sbcs r0, r5, r1
186 ; CHECK-NEXT: vmov r4, r5, d24
187 ; CHECK-NEXT: mov r0, #0
188 ; CHECK-NEXT: movlt r0, #1
189 ; CHECK-NEXT: cmp r0, #0
190 ; CHECK-NEXT: mvnne r0, #0
191 ; CHECK-NEXT: vdup.32 d2, r0
192 ; CHECK-NEXT: vmov r0, r1, d20
193 ; CHECK-NEXT: vbit q14, q15, q1
194 ; CHECK-NEXT: subs r0, r4, r0
195 ; CHECK-NEXT: sbcs r0, r5, r1
196 ; CHECK-NEXT: vmov r1, r4, d17
197 ; CHECK-NEXT: vmov r5, r6, d19
198 ; CHECK-NEXT: mov r0, #0
199 ; CHECK-NEXT: movlt r0, #1
200 ; CHECK-NEXT: cmp r0, #0
201 ; CHECK-NEXT: mvnne r0, #0
202 ; CHECK-NEXT: vdup.32 d0, r0
203 ; CHECK-NEXT: vbit q10, q12, q0
204 ; CHECK-NEXT: subs r1, r5, r1
205 ; CHECK-NEXT: sbcs r1, r6, r4
206 ; CHECK-NEXT: vmov r4, r5, d27
207 ; CHECK-NEXT: vmov r0, r1, d23
208 ; CHECK-NEXT: mov r6, #0
209 ; CHECK-NEXT: movlt r6, #1
210 ; CHECK-NEXT: subs r0, r4, r0
211 ; CHECK-NEXT: sbcs r0, r5, r1
212 ; CHECK-NEXT: movlt lr, #1
213 ; CHECK-NEXT: cmp lr, #0
214 ; CHECK-NEXT: mvnne lr, #0
215 ; CHECK-NEXT: cmp r6, #0
216 ; CHECK-NEXT: vdup.32 d31, lr
217 ; CHECK-NEXT: mvnne r6, #0
218 ; CHECK-NEXT: vdup.32 d30, r2
219 ; CHECK-NEXT: vdup.32 d3, r6
220 ; CHECK-NEXT: vbit q11, q13, q15
221 ; CHECK-NEXT: vdup.32 d2, r12
222 ; CHECK-NEXT: vst1.64 {d28, d29}, [r3:128]!
223 ; CHECK-NEXT: vbit q8, q9, q1
224 ; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128]!
225 ; CHECK-NEXT: vst1.64 {d22, d23}, [r3:128]!
226 ; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]
227 ; CHECK-NEXT: pop {r4, r5, r6, lr}
228 ; CHECK-NEXT: mov pc, lr
229 %v0 = load %T0_19, ptr %loadaddr
230 %v1 = load %T0_19, ptr %loadaddr2
231 %c = icmp slt %T0_19 %v0, %v1
233 ; COST: cost of 0 {{.*}} icmp
234 ; COST: cost of 54 {{.*}} select
235 %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
236 store %T0_19 %r, ptr %storeaddr
239 %T0_20 = type <16 x i64>
240 %T1_20 = type <16 x i1>
241 define void @func_blend20(ptr %loadaddr, ptr %loadaddr2,
242 ptr %blend, ptr %storeaddr) {
243 ; CHECK-LABEL: func_blend20:
245 ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
246 ; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
247 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
248 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
249 ; CHECK-NEXT: mov r8, r1
250 ; CHECK-NEXT: mov lr, r0
251 ; CHECK-NEXT: vld1.64 {d16, d17}, [r8:128]!
252 ; CHECK-NEXT: add r9, r0, #64
253 ; CHECK-NEXT: add r10, r1, #64
254 ; CHECK-NEXT: mov r12, #0
255 ; CHECK-NEXT: vld1.64 {d22, d23}, [lr:128]!
256 ; CHECK-NEXT: vld1.64 {d18, d19}, [r8:128]!
257 ; CHECK-NEXT: vld1.64 {d20, d21}, [lr:128]!
258 ; CHECK-NEXT: vmov r6, r4, d19
259 ; CHECK-NEXT: vmov r5, r7, d21
260 ; CHECK-NEXT: vld1.64 {d4, d5}, [r9:128]!
261 ; CHECK-NEXT: vld1.64 {d6, d7}, [r10:128]!
262 ; CHECK-NEXT: vld1.64 {d0, d1}, [r10:128]!
263 ; CHECK-NEXT: vld1.64 {d2, d3}, [r9:128]!
264 ; CHECK-NEXT: subs r6, r5, r6
265 ; CHECK-NEXT: sbcs r4, r7, r4
266 ; CHECK-NEXT: vmov r5, r6, d18
267 ; CHECK-NEXT: vmov r7, r2, d20
268 ; CHECK-NEXT: mov r4, #0
269 ; CHECK-NEXT: movlt r4, #1
270 ; CHECK-NEXT: cmp r4, #0
271 ; CHECK-NEXT: mvnne r4, #0
272 ; CHECK-NEXT: vdup.32 d31, r4
273 ; CHECK-NEXT: subs r5, r7, r5
274 ; CHECK-NEXT: sbcs r2, r2, r6
275 ; CHECK-NEXT: vmov r4, r5, d3
276 ; CHECK-NEXT: mov r2, #0
277 ; CHECK-NEXT: movlt r2, #1
278 ; CHECK-NEXT: cmp r2, #0
279 ; CHECK-NEXT: mvnne r2, #0
280 ; CHECK-NEXT: vdup.32 d30, r2
281 ; CHECK-NEXT: vmov r0, r2, d1
282 ; CHECK-NEXT: subs r0, r4, r0
283 ; CHECK-NEXT: sbcs r0, r5, r2
284 ; CHECK-NEXT: vmov r4, r5, d2
285 ; CHECK-NEXT: mov r0, #0
286 ; CHECK-NEXT: movlt r0, #1
287 ; CHECK-NEXT: cmp r0, #0
288 ; CHECK-NEXT: mvnne r0, #0
289 ; CHECK-NEXT: vdup.32 d9, r0
290 ; CHECK-NEXT: vmov r0, r2, d0
291 ; CHECK-NEXT: subs r0, r4, r0
292 ; CHECK-NEXT: sbcs r0, r5, r2
293 ; CHECK-NEXT: vmov r4, r5, d5
294 ; CHECK-NEXT: mov r0, #0
295 ; CHECK-NEXT: movlt r0, #1
296 ; CHECK-NEXT: cmp r0, #0
297 ; CHECK-NEXT: mvnne r0, #0
298 ; CHECK-NEXT: vdup.32 d8, r0
299 ; CHECK-NEXT: vmov r0, r2, d7
300 ; CHECK-NEXT: subs r0, r4, r0
301 ; CHECK-NEXT: sbcs r0, r5, r2
302 ; CHECK-NEXT: vmov r4, r5, d4
303 ; CHECK-NEXT: mov r0, #0
304 ; CHECK-NEXT: movlt r0, #1
305 ; CHECK-NEXT: cmp r0, #0
306 ; CHECK-NEXT: mvnne r0, #0
307 ; CHECK-NEXT: vdup.32 d11, r0
308 ; CHECK-NEXT: vmov r0, r2, d6
309 ; CHECK-NEXT: subs r0, r4, r0
310 ; CHECK-NEXT: sbcs r0, r5, r2
311 ; CHECK-NEXT: vmov r4, r5, d23
312 ; CHECK-NEXT: mov r0, #0
313 ; CHECK-NEXT: movlt r0, #1
314 ; CHECK-NEXT: cmp r0, #0
315 ; CHECK-NEXT: mvnne r0, #0
316 ; CHECK-NEXT: vdup.32 d10, r0
317 ; CHECK-NEXT: vmov r0, r2, d17
318 ; CHECK-NEXT: subs r0, r4, r0
319 ; CHECK-NEXT: sbcs r0, r5, r2
320 ; CHECK-NEXT: vmov r4, r5, d22
321 ; CHECK-NEXT: mov r0, #0
322 ; CHECK-NEXT: movlt r0, #1
323 ; CHECK-NEXT: cmp r0, #0
324 ; CHECK-NEXT: mvnne r0, #0
325 ; CHECK-NEXT: vdup.32 d25, r0
326 ; CHECK-NEXT: vmov r0, r2, d16
327 ; CHECK-NEXT: subs r0, r4, r0
328 ; CHECK-NEXT: sbcs r0, r5, r2
329 ; CHECK-NEXT: mov r0, #0
330 ; CHECK-NEXT: movlt r0, #1
331 ; CHECK-NEXT: cmp r0, #0
332 ; CHECK-NEXT: mvnne r0, #0
333 ; CHECK-NEXT: vdup.32 d24, r0
334 ; CHECK-NEXT: vorr q13, q12, q12
335 ; CHECK-NEXT: vbsl q13, q11, q8
336 ; CHECK-NEXT: vld1.64 {d24, d25}, [r9:128]!
337 ; CHECK-NEXT: vorr q8, q5, q5
338 ; CHECK-NEXT: vld1.64 {d28, d29}, [r10:128]!
339 ; CHECK-NEXT: vbsl q8, q2, q3
340 ; CHECK-NEXT: vld1.64 {d6, d7}, [r8:128]!
341 ; CHECK-NEXT: vld1.64 {d22, d23}, [r8:128]
342 ; CHECK-NEXT: vld1.64 {d4, d5}, [lr:128]!
343 ; CHECK-NEXT: vbif q10, q9, q15
344 ; CHECK-NEXT: vorr q9, q4, q4
345 ; CHECK-NEXT: vmov r0, r2, d22
346 ; CHECK-NEXT: vbsl q9, q1, q0
347 ; CHECK-NEXT: vld1.64 {d30, d31}, [lr:128]
348 ; CHECK-NEXT: mov lr, #0
349 ; CHECK-NEXT: vmov r7, r5, d30
350 ; CHECK-NEXT: vld1.64 {d0, d1}, [r9:128]
351 ; CHECK-NEXT: vld1.64 {d2, d3}, [r10:128]
352 ; CHECK-NEXT: subs r0, r7, r0
353 ; CHECK-NEXT: sbcs r0, r5, r2
354 ; CHECK-NEXT: vmov r5, r4, d24
355 ; CHECK-NEXT: vmov r0, r7, d28
356 ; CHECK-NEXT: movlt lr, #1
357 ; CHECK-NEXT: cmp lr, #0
358 ; CHECK-NEXT: mvnne lr, #0
359 ; CHECK-NEXT: subs r0, r5, r0
360 ; CHECK-NEXT: sbcs r0, r4, r7
361 ; CHECK-NEXT: vmov r7, r5, d29
362 ; CHECK-NEXT: vmov r4, r6, d25
363 ; CHECK-NEXT: mov r0, #0
364 ; CHECK-NEXT: movlt r0, #1
365 ; CHECK-NEXT: cmp r0, #0
366 ; CHECK-NEXT: mvnne r0, #0
367 ; CHECK-NEXT: subs r7, r4, r7
368 ; CHECK-NEXT: mov r4, #0
369 ; CHECK-NEXT: sbcs r7, r6, r5
370 ; CHECK-NEXT: vmov r5, r1, d31
371 ; CHECK-NEXT: vmov r7, r6, d23
372 ; CHECK-NEXT: movlt r4, #1
373 ; CHECK-NEXT: cmp r4, #0
374 ; CHECK-NEXT: mvnne r4, #0
375 ; CHECK-NEXT: subs r7, r5, r7
376 ; CHECK-NEXT: mov r5, #0
377 ; CHECK-NEXT: sbcs r1, r1, r6
378 ; CHECK-NEXT: vmov r6, r2, d5
379 ; CHECK-NEXT: vmov r1, r7, d7
380 ; CHECK-NEXT: movlt r5, #1
381 ; CHECK-NEXT: cmp r5, #0
382 ; CHECK-NEXT: mvnne r5, #0
383 ; CHECK-NEXT: subs r1, r6, r1
384 ; CHECK-NEXT: sbcs r1, r2, r7
385 ; CHECK-NEXT: vmov r6, r7, d4
386 ; CHECK-NEXT: mov r1, #0
387 ; CHECK-NEXT: movlt r1, #1
388 ; CHECK-NEXT: cmp r1, #0
389 ; CHECK-NEXT: mvnne r1, #0
390 ; CHECK-NEXT: vdup.32 d9, r1
391 ; CHECK-NEXT: vmov r1, r2, d6
392 ; CHECK-NEXT: subs r1, r6, r1
393 ; CHECK-NEXT: sbcs r1, r7, r2
394 ; CHECK-NEXT: vmov r6, r7, d0
395 ; CHECK-NEXT: mov r1, #0
396 ; CHECK-NEXT: movlt r1, #1
397 ; CHECK-NEXT: cmp r1, #0
398 ; CHECK-NEXT: mvnne r1, #0
399 ; CHECK-NEXT: vdup.32 d8, r1
400 ; CHECK-NEXT: vmov r1, r2, d2
401 ; CHECK-NEXT: vbif q2, q3, q4
402 ; CHECK-NEXT: vdup.32 d7, r5
403 ; CHECK-NEXT: vdup.32 d9, r4
404 ; CHECK-NEXT: vmov r4, r5, d1
405 ; CHECK-NEXT: vdup.32 d8, r0
406 ; CHECK-NEXT: mov r0, r3
407 ; CHECK-NEXT: vst1.64 {d26, d27}, [r0:128]!
408 ; CHECK-NEXT: vbif q12, q14, q4
409 ; CHECK-NEXT: vdup.32 d6, lr
410 ; CHECK-NEXT: vbit q11, q15, q3
411 ; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]!
412 ; CHECK-NEXT: subs r1, r6, r1
413 ; CHECK-NEXT: mov r6, #0
414 ; CHECK-NEXT: sbcs r1, r7, r2
415 ; CHECK-NEXT: vmov r1, r2, d3
416 ; CHECK-NEXT: movlt r6, #1
417 ; CHECK-NEXT: subs r1, r4, r1
418 ; CHECK-NEXT: sbcs r1, r5, r2
419 ; CHECK-NEXT: movlt r12, #1
420 ; CHECK-NEXT: cmp r12, #0
421 ; CHECK-NEXT: mvnne r12, #0
422 ; CHECK-NEXT: cmp r6, #0
423 ; CHECK-NEXT: vdup.32 d27, r12
424 ; CHECK-NEXT: mvnne r6, #0
425 ; CHECK-NEXT: vdup.32 d26, r6
426 ; CHECK-NEXT: vorr q10, q13, q13
427 ; CHECK-NEXT: vbsl q10, q0, q1
428 ; CHECK-NEXT: vst1.64 {d4, d5}, [r0:128]!
429 ; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]
430 ; CHECK-NEXT: add r0, r3, #64
431 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]!
432 ; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]!
433 ; CHECK-NEXT: vst1.64 {d24, d25}, [r0:128]!
434 ; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]
435 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
436 ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, lr}
437 ; CHECK-NEXT: mov pc, lr
438 %v0 = load %T0_20, ptr %loadaddr
439 %v1 = load %T0_20, ptr %loadaddr2
440 %c = icmp slt %T0_20 %v0, %v1
442 ; COST: cost of 0 {{.*}} icmp
443 ; COST: cost of 108 {{.*}} select
444 %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
445 store %T0_20 %r, ptr %storeaddr