1 ; RUN: llc -march=mips -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s
2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s
4 define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
7 %1 = load <16 x i8>, <16 x i8>* %a
8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
9 %2 = load <16 x i8>, <16 x i8>* %b
10 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
11 %3 = and <16 x i8> %1, %2
12 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
13 store <16 x i8> %3, <16 x i8>* %c
14 ; CHECK-DAG: st.b [[R3]], 0($4)
17 ; CHECK: .size and_v16i8
20 define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
23 %1 = load <8 x i16>, <8 x i16>* %a
24 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
25 %2 = load <8 x i16>, <8 x i16>* %b
26 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
27 %3 = and <8 x i16> %1, %2
28 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
29 store <8 x i16> %3, <8 x i16>* %c
30 ; CHECK-DAG: st.h [[R3]], 0($4)
33 ; CHECK: .size and_v8i16
36 define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
39 %1 = load <4 x i32>, <4 x i32>* %a
40 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
41 %2 = load <4 x i32>, <4 x i32>* %b
42 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
43 %3 = and <4 x i32> %1, %2
44 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
45 store <4 x i32> %3, <4 x i32>* %c
46 ; CHECK-DAG: st.w [[R3]], 0($4)
49 ; CHECK: .size and_v4i32
52 define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
55 %1 = load <2 x i64>, <2 x i64>* %a
56 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
57 %2 = load <2 x i64>, <2 x i64>* %b
58 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
59 %3 = and <2 x i64> %1, %2
60 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
61 store <2 x i64> %3, <2 x i64>* %c
62 ; CHECK-DAG: st.d [[R3]], 0($4)
65 ; CHECK: .size and_v2i64
68 define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
71 %1 = load <16 x i8>, <16 x i8>* %a
72 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
73 %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
74 ; CHECK-DAG: andi.b [[R4:\$w[0-9]+]], [[R1]], 1
75 store <16 x i8> %2, <16 x i8>* %c
76 ; CHECK-DAG: st.b [[R4]], 0($4)
79 ; CHECK: .size and_v16i8_i
82 define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
85 %1 = load <8 x i16>, <8 x i16>* %a
86 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
87 %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
88 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
89 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
90 store <8 x i16> %2, <8 x i16>* %c
91 ; CHECK-DAG: st.h [[R4]], 0($4)
94 ; CHECK: .size and_v8i16_i
97 define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
100 %1 = load <4 x i32>, <4 x i32>* %a
101 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
102 %2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
103 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
104 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
105 store <4 x i32> %2, <4 x i32>* %c
106 ; CHECK-DAG: st.w [[R4]], 0($4)
109 ; CHECK: .size and_v4i32_i
112 define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
113 ; CHECK: and_v2i64_i:
115 %1 = load <2 x i64>, <2 x i64>* %a
116 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
117 %2 = and <2 x i64> %1, <i64 1, i64 1>
118 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
119 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
120 store <2 x i64> %2, <2 x i64>* %c
121 ; CHECK-DAG: st.d [[R4]], 0($4)
124 ; CHECK: .size and_v2i64_i
127 define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
130 %1 = load <16 x i8>, <16 x i8>* %a
131 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
132 %2 = load <16 x i8>, <16 x i8>* %b
133 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
134 %3 = or <16 x i8> %1, %2
135 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
136 store <16 x i8> %3, <16 x i8>* %c
137 ; CHECK-DAG: st.b [[R3]], 0($4)
140 ; CHECK: .size or_v16i8
143 define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
146 %1 = load <8 x i16>, <8 x i16>* %a
147 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
148 %2 = load <8 x i16>, <8 x i16>* %b
149 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
150 %3 = or <8 x i16> %1, %2
151 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
152 store <8 x i16> %3, <8 x i16>* %c
153 ; CHECK-DAG: st.h [[R3]], 0($4)
156 ; CHECK: .size or_v8i16
159 define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
162 %1 = load <4 x i32>, <4 x i32>* %a
163 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
164 %2 = load <4 x i32>, <4 x i32>* %b
165 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
166 %3 = or <4 x i32> %1, %2
167 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
168 store <4 x i32> %3, <4 x i32>* %c
169 ; CHECK-DAG: st.w [[R3]], 0($4)
172 ; CHECK: .size or_v4i32
175 define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
178 %1 = load <2 x i64>, <2 x i64>* %a
179 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
180 %2 = load <2 x i64>, <2 x i64>* %b
181 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
182 %3 = or <2 x i64> %1, %2
183 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
184 store <2 x i64> %3, <2 x i64>* %c
185 ; CHECK-DAG: st.d [[R3]], 0($4)
188 ; CHECK: .size or_v2i64
191 define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
194 %1 = load <16 x i8>, <16 x i8>* %a
195 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
196 %2 = or <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
197 ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 3
198 store <16 x i8> %2, <16 x i8>* %c
199 ; CHECK-DAG: st.b [[R4]], 0($4)
202 ; CHECK: .size or_v16i8_i
205 define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
208 %1 = load <8 x i16>, <8 x i16>* %a
209 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
210 %2 = or <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
211 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
212 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
213 store <8 x i16> %2, <8 x i16>* %c
214 ; CHECK-DAG: st.h [[R4]], 0($4)
217 ; CHECK: .size or_v8i16_i
220 define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
223 %1 = load <4 x i32>, <4 x i32>* %a
224 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
225 %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
226 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
227 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
228 store <4 x i32> %2, <4 x i32>* %c
229 ; CHECK-DAG: st.w [[R4]], 0($4)
232 ; CHECK: .size or_v4i32_i
235 define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
238 %1 = load <2 x i64>, <2 x i64>* %a
239 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
240 %2 = or <2 x i64> %1, <i64 3, i64 3>
241 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
242 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
243 store <2 x i64> %2, <2 x i64>* %c
244 ; CHECK-DAG: st.d [[R4]], 0($4)
247 ; CHECK: .size or_v2i64_i
250 define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
253 %1 = load <16 x i8>, <16 x i8>* %a
254 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
255 %2 = load <16 x i8>, <16 x i8>* %b
256 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
257 %3 = or <16 x i8> %1, %2
258 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
259 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
260 store <16 x i8> %4, <16 x i8>* %c
261 ; CHECK-DAG: st.b [[R3]], 0($4)
264 ; CHECK: .size nor_v16i8
267 define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
270 %1 = load <8 x i16>, <8 x i16>* %a
271 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
272 %2 = load <8 x i16>, <8 x i16>* %b
273 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
274 %3 = or <8 x i16> %1, %2
275 %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
276 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
277 store <8 x i16> %4, <8 x i16>* %c
278 ; CHECK-DAG: st.h [[R3]], 0($4)
281 ; CHECK: .size nor_v8i16
284 define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
287 %1 = load <4 x i32>, <4 x i32>* %a
288 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
289 %2 = load <4 x i32>, <4 x i32>* %b
290 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
291 %3 = or <4 x i32> %1, %2
292 %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
293 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
294 store <4 x i32> %4, <4 x i32>* %c
295 ; CHECK-DAG: st.w [[R3]], 0($4)
298 ; CHECK: .size nor_v4i32
301 define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
304 %1 = load <2 x i64>, <2 x i64>* %a
305 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
306 %2 = load <2 x i64>, <2 x i64>* %b
307 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
308 %3 = or <2 x i64> %1, %2
309 %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
310 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
311 store <2 x i64> %4, <2 x i64>* %c
312 ; CHECK-DAG: st.d [[R3]], 0($4)
315 ; CHECK: .size nor_v2i64
318 define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
319 ; CHECK: nor_v16i8_i:
321 %1 = load <16 x i8>, <16 x i8>* %a
322 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
323 %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
324 %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
325 ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 1
326 store <16 x i8> %3, <16 x i8>* %c
327 ; CHECK-DAG: st.b [[R4]], 0($4)
330 ; CHECK: .size nor_v16i8_i
333 define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
334 ; CHECK: nor_v8i16_i:
336 %1 = load <8 x i16>, <8 x i16>* %a
337 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
338 %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
339 %3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
340 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
341 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
342 store <8 x i16> %3, <8 x i16>* %c
343 ; CHECK-DAG: st.h [[R4]], 0($4)
346 ; CHECK: .size nor_v8i16_i
349 define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
350 ; CHECK: nor_v4i32_i:
352 %1 = load <4 x i32>, <4 x i32>* %a
353 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
354 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
355 %3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1>
356 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
357 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
358 store <4 x i32> %3, <4 x i32>* %c
359 ; CHECK-DAG: st.w [[R4]], 0($4)
362 ; CHECK: .size nor_v4i32_i
365 define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
366 ; CHECK: nor_v2i64_i:
368 %1 = load <2 x i64>, <2 x i64>* %a
369 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
370 %2 = or <2 x i64> %1, <i64 1, i64 1>
371 %3 = xor <2 x i64> %2, <i64 -1, i64 -1>
372 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
373 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
374 store <2 x i64> %3, <2 x i64>* %c
375 ; CHECK-DAG: st.d [[R4]], 0($4)
378 ; CHECK: .size nor_v2i64_i
381 define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
384 %1 = load <16 x i8>, <16 x i8>* %a
385 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
386 %2 = load <16 x i8>, <16 x i8>* %b
387 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
388 %3 = xor <16 x i8> %1, %2
389 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
390 store <16 x i8> %3, <16 x i8>* %c
391 ; CHECK-DAG: st.b [[R3]], 0($4)
394 ; CHECK: .size xor_v16i8
397 define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
400 %1 = load <8 x i16>, <8 x i16>* %a
401 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
402 %2 = load <8 x i16>, <8 x i16>* %b
403 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
404 %3 = xor <8 x i16> %1, %2
405 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
406 store <8 x i16> %3, <8 x i16>* %c
407 ; CHECK-DAG: st.h [[R3]], 0($4)
410 ; CHECK: .size xor_v8i16
413 define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
416 %1 = load <4 x i32>, <4 x i32>* %a
417 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
418 %2 = load <4 x i32>, <4 x i32>* %b
419 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
420 %3 = xor <4 x i32> %1, %2
421 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
422 store <4 x i32> %3, <4 x i32>* %c
423 ; CHECK-DAG: st.w [[R3]], 0($4)
426 ; CHECK: .size xor_v4i32
429 define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
432 %1 = load <2 x i64>, <2 x i64>* %a
433 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
434 %2 = load <2 x i64>, <2 x i64>* %b
435 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
436 %3 = xor <2 x i64> %1, %2
437 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
438 store <2 x i64> %3, <2 x i64>* %c
439 ; CHECK-DAG: st.d [[R3]], 0($4)
442 ; CHECK: .size xor_v2i64
445 define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
446 ; CHECK: xor_v16i8_i:
448 %1 = load <16 x i8>, <16 x i8>* %a
449 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
450 %2 = xor <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
451 ; CHECK-DAG: xori.b [[R4:\$w[0-9]+]], [[R1]], 3
452 store <16 x i8> %2, <16 x i8>* %c
453 ; CHECK-DAG: st.b [[R4]], 0($4)
456 ; CHECK: .size xor_v16i8_i
459 define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
460 ; CHECK: xor_v8i16_i:
462 %1 = load <8 x i16>, <8 x i16>* %a
463 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
464 %2 = xor <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
465 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
466 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
467 store <8 x i16> %2, <8 x i16>* %c
468 ; CHECK-DAG: st.h [[R4]], 0($4)
471 ; CHECK: .size xor_v8i16_i
474 define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
475 ; CHECK: xor_v4i32_i:
477 %1 = load <4 x i32>, <4 x i32>* %a
478 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
479 %2 = xor <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
480 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
481 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
482 store <4 x i32> %2, <4 x i32>* %c
483 ; CHECK-DAG: st.w [[R4]], 0($4)
486 ; CHECK: .size xor_v4i32_i
489 define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
490 ; CHECK: xor_v2i64_i:
492 %1 = load <2 x i64>, <2 x i64>* %a
493 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
494 %2 = xor <2 x i64> %1, <i64 3, i64 3>
495 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
496 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
497 store <2 x i64> %2, <2 x i64>* %c
498 ; CHECK-DAG: st.d [[R4]], 0($4)
501 ; CHECK: .size xor_v2i64_i
504 define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
507 %1 = load <16 x i8>, <16 x i8>* %a
508 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
509 %2 = load <16 x i8>, <16 x i8>* %b
510 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
511 %3 = shl <16 x i8> %1, %2
512 ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
513 store <16 x i8> %3, <16 x i8>* %c
514 ; CHECK-DAG: st.b [[R3]], 0($4)
517 ; CHECK: .size sll_v16i8
520 define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
523 %1 = load <8 x i16>, <8 x i16>* %a
524 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
525 %2 = load <8 x i16>, <8 x i16>* %b
526 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
527 %3 = shl <8 x i16> %1, %2
528 ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
529 store <8 x i16> %3, <8 x i16>* %c
530 ; CHECK-DAG: st.h [[R3]], 0($4)
533 ; CHECK: .size sll_v8i16
536 define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
539 %1 = load <4 x i32>, <4 x i32>* %a
540 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
541 %2 = load <4 x i32>, <4 x i32>* %b
542 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
543 %3 = shl <4 x i32> %1, %2
544 ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
545 store <4 x i32> %3, <4 x i32>* %c
546 ; CHECK-DAG: st.w [[R3]], 0($4)
549 ; CHECK: .size sll_v4i32
552 define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
555 %1 = load <2 x i64>, <2 x i64>* %a
556 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
557 %2 = load <2 x i64>, <2 x i64>* %b
558 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
559 %3 = shl <2 x i64> %1, %2
560 ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
561 store <2 x i64> %3, <2 x i64>* %c
562 ; CHECK-DAG: st.d [[R3]], 0($4)
565 ; CHECK: .size sll_v2i64
568 define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
569 ; CHECK: sll_v16i8_i:
571 %1 = load <16 x i8>, <16 x i8>* %a
572 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
573 %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
574 ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1
575 store <16 x i8> %2, <16 x i8>* %c
576 ; CHECK-DAG: st.b [[R4]], 0($4)
579 ; CHECK: .size sll_v16i8_i
582 define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
583 ; CHECK: sll_v8i16_i:
585 %1 = load <8 x i16>, <8 x i16>* %a
586 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
587 %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
588 ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1
589 store <8 x i16> %2, <8 x i16>* %c
590 ; CHECK-DAG: st.h [[R4]], 0($4)
593 ; CHECK: .size sll_v8i16_i
596 define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
597 ; CHECK: sll_v4i32_i:
599 %1 = load <4 x i32>, <4 x i32>* %a
600 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
601 %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
602 ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1
603 store <4 x i32> %2, <4 x i32>* %c
604 ; CHECK-DAG: st.w [[R4]], 0($4)
607 ; CHECK: .size sll_v4i32_i
610 define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
611 ; CHECK: sll_v2i64_i:
613 %1 = load <2 x i64>, <2 x i64>* %a
614 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
615 %2 = shl <2 x i64> %1, <i64 1, i64 1>
616 ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1
617 store <2 x i64> %2, <2 x i64>* %c
618 ; CHECK-DAG: st.d [[R4]], 0($4)
621 ; CHECK: .size sll_v2i64_i
624 define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
627 %1 = load <16 x i8>, <16 x i8>* %a
628 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
629 %2 = load <16 x i8>, <16 x i8>* %b
630 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
631 %3 = ashr <16 x i8> %1, %2
632 ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
633 store <16 x i8> %3, <16 x i8>* %c
634 ; CHECK-DAG: st.b [[R3]], 0($4)
637 ; CHECK: .size sra_v16i8
640 define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
643 %1 = load <8 x i16>, <8 x i16>* %a
644 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
645 %2 = load <8 x i16>, <8 x i16>* %b
646 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
647 %3 = ashr <8 x i16> %1, %2
648 ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
649 store <8 x i16> %3, <8 x i16>* %c
650 ; CHECK-DAG: st.h [[R3]], 0($4)
653 ; CHECK: .size sra_v8i16
656 define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
659 %1 = load <4 x i32>, <4 x i32>* %a
660 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
661 %2 = load <4 x i32>, <4 x i32>* %b
662 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
663 %3 = ashr <4 x i32> %1, %2
664 ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
665 store <4 x i32> %3, <4 x i32>* %c
666 ; CHECK-DAG: st.w [[R3]], 0($4)
669 ; CHECK: .size sra_v4i32
672 define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
675 %1 = load <2 x i64>, <2 x i64>* %a
676 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
677 %2 = load <2 x i64>, <2 x i64>* %b
678 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
679 %3 = ashr <2 x i64> %1, %2
680 ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
681 store <2 x i64> %3, <2 x i64>* %c
682 ; CHECK-DAG: st.d [[R3]], 0($4)
685 ; CHECK: .size sra_v2i64
688 define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
689 ; CHECK: sra_v16i8_i:
691 %1 = load <16 x i8>, <16 x i8>* %a
692 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
693 %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
694 ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1
695 store <16 x i8> %2, <16 x i8>* %c
696 ; CHECK-DAG: st.b [[R4]], 0($4)
699 ; CHECK: .size sra_v16i8_i
702 define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
703 ; CHECK: sra_v8i16_i:
705 %1 = load <8 x i16>, <8 x i16>* %a
706 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
707 %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
708 ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1
709 store <8 x i16> %2, <8 x i16>* %c
710 ; CHECK-DAG: st.h [[R4]], 0($4)
713 ; CHECK: .size sra_v8i16_i
716 define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
717 ; CHECK: sra_v4i32_i:
719 %1 = load <4 x i32>, <4 x i32>* %a
720 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
721 %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
722 ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1
723 store <4 x i32> %2, <4 x i32>* %c
724 ; CHECK-DAG: st.w [[R4]], 0($4)
727 ; CHECK: .size sra_v4i32_i
730 define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
731 ; CHECK: sra_v2i64_i:
733 %1 = load <2 x i64>, <2 x i64>* %a
734 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
735 %2 = ashr <2 x i64> %1, <i64 1, i64 1>
736 ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1
737 store <2 x i64> %2, <2 x i64>* %c
738 ; CHECK-DAG: st.d [[R4]], 0($4)
741 ; CHECK: .size sra_v2i64_i
744 define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
747 %1 = load <16 x i8>, <16 x i8>* %a
748 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
749 %2 = load <16 x i8>, <16 x i8>* %b
750 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
751 %3 = lshr <16 x i8> %1, %2
752 ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
753 store <16 x i8> %3, <16 x i8>* %c
754 ; CHECK-DAG: st.b [[R3]], 0($4)
757 ; CHECK: .size srl_v16i8
760 define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
763 %1 = load <8 x i16>, <8 x i16>* %a
764 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
765 %2 = load <8 x i16>, <8 x i16>* %b
766 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
767 %3 = lshr <8 x i16> %1, %2
768 ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
769 store <8 x i16> %3, <8 x i16>* %c
770 ; CHECK-DAG: st.h [[R3]], 0($4)
773 ; CHECK: .size srl_v8i16
776 define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
779 %1 = load <4 x i32>, <4 x i32>* %a
780 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
781 %2 = load <4 x i32>, <4 x i32>* %b
782 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
783 %3 = lshr <4 x i32> %1, %2
784 ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
785 store <4 x i32> %3, <4 x i32>* %c
786 ; CHECK-DAG: st.w [[R3]], 0($4)
789 ; CHECK: .size srl_v4i32
792 define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
795 %1 = load <2 x i64>, <2 x i64>* %a
796 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
797 %2 = load <2 x i64>, <2 x i64>* %b
798 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
799 %3 = lshr <2 x i64> %1, %2
800 ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
801 store <2 x i64> %3, <2 x i64>* %c
802 ; CHECK-DAG: st.d [[R3]], 0($4)
805 ; CHECK: .size srl_v2i64
808 define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
809 ; CHECK: srl_v16i8_i:
811 %1 = load <16 x i8>, <16 x i8>* %a
812 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
813 %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
814 ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1
815 store <16 x i8> %2, <16 x i8>* %c
816 ; CHECK-DAG: st.b [[R4]], 0($4)
819 ; CHECK: .size srl_v16i8_i
822 define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
823 ; CHECK: srl_v8i16_i:
825 %1 = load <8 x i16>, <8 x i16>* %a
826 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
827 %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
828 ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1
829 store <8 x i16> %2, <8 x i16>* %c
830 ; CHECK-DAG: st.h [[R4]], 0($4)
833 ; CHECK: .size srl_v8i16_i
836 define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
837 ; CHECK: srl_v4i32_i:
839 %1 = load <4 x i32>, <4 x i32>* %a
840 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
841 %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
842 ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1
843 store <4 x i32> %2, <4 x i32>* %c
844 ; CHECK-DAG: st.w [[R4]], 0($4)
847 ; CHECK: .size srl_v4i32_i
850 define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
851 ; CHECK: srl_v2i64_i:
853 %1 = load <2 x i64>, <2 x i64>* %a
854 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
855 %2 = lshr <2 x i64> %1, <i64 1, i64 1>
856 ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1
857 store <2 x i64> %2, <2 x i64>* %c
858 ; CHECK-DAG: st.d [[R4]], 0($4)
861 ; CHECK: .size srl_v2i64_i
864 define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
865 ; CHECK: ctpop_v16i8:
867 %1 = load <16 x i8>, <16 x i8>* %a
868 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
869 %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
870 ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]]
871 store <16 x i8> %2, <16 x i8>* %c
872 ; CHECK-DAG: st.b [[R3]], 0($4)
875 ; CHECK: .size ctpop_v16i8
878 define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
879 ; CHECK: ctpop_v8i16:
881 %1 = load <8 x i16>, <8 x i16>* %a
882 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
883 %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
884 ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]]
885 store <8 x i16> %2, <8 x i16>* %c
886 ; CHECK-DAG: st.h [[R3]], 0($4)
889 ; CHECK: .size ctpop_v8i16
892 define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
893 ; CHECK: ctpop_v4i32:
895 %1 = load <4 x i32>, <4 x i32>* %a
896 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
897 %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
898 ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]]
899 store <4 x i32> %2, <4 x i32>* %c
900 ; CHECK-DAG: st.w [[R3]], 0($4)
903 ; CHECK: .size ctpop_v4i32
906 define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
907 ; CHECK: ctpop_v2i64:
909 %1 = load <2 x i64>, <2 x i64>* %a
910 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
911 %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
912 ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]]
913 store <2 x i64> %2, <2 x i64>* %c
914 ; CHECK-DAG: st.d [[R3]], 0($4)
917 ; CHECK: .size ctpop_v2i64
920 define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
923 %1 = load <16 x i8>, <16 x i8>* %a
924 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
925 %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
926 ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]]
927 store <16 x i8> %2, <16 x i8>* %c
928 ; CHECK-DAG: st.b [[R3]], 0($4)
931 ; CHECK: .size ctlz_v16i8
934 define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
937 %1 = load <8 x i16>, <8 x i16>* %a
938 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
939 %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
940 ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]]
941 store <8 x i16> %2, <8 x i16>* %c
942 ; CHECK-DAG: st.h [[R3]], 0($4)
945 ; CHECK: .size ctlz_v8i16
948 define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
951 %1 = load <4 x i32>, <4 x i32>* %a
952 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
953 %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
954 ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]]
955 store <4 x i32> %2, <4 x i32>* %c
956 ; CHECK-DAG: st.w [[R3]], 0($4)
959 ; CHECK: .size ctlz_v4i32
962 define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
965 %1 = load <2 x i64>, <2 x i64>* %a
966 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
967 %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
968 ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]]
969 store <2 x i64> %2, <2 x i64>* %c
970 ; CHECK-DAG: st.d [[R3]], 0($4)
973 ; CHECK: .size ctlz_v2i64
976 define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind {
979 %1 = load <16 x i8>, <16 x i8>* %a
980 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
981 %2 = load <16 x i8>, <16 x i8>* %b
982 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
983 %3 = load <16 x i8>, <16 x i8>* %m
984 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
985 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1,
986 i8 -1, i8 -1, i8 -1, i8 -1,
987 i8 -1, i8 -1, i8 -1, i8 -1,
988 i8 -1, i8 -1, i8 -1, i8 -1>
989 %5 = and <16 x i8> %1, %3
990 %6 = and <16 x i8> %2, %4
991 %7 = or <16 x i8> %5, %6
992 ; bmnz is the same operation
993 ; (vselect Mask, IfSet, IfClr) -> (BMNZ IfClr, IfSet, Mask)
994 ; CHECK-DAG: bmnz.v [[R2]], [[R1]], [[R3]]
995 store <16 x i8> %7, <16 x i8>* %c
996 ; CHECK-DAG: st.b [[R2]], 0($4)
999 ; CHECK: .size bsel_v16i8
1002 define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind {
1003 ; CHECK: bsel_v16i8_i:
1005 %1 = load <16 x i8>, <16 x i8>* %a
1006 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1007 %2 = load <16 x i8>, <16 x i8>* %m
1008 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6)
1009 %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1,
1010 i8 -1, i8 -1, i8 -1, i8 -1,
1011 i8 -1, i8 -1, i8 -1, i8 -1,
1012 i8 -1, i8 -1, i8 -1, i8 -1>
1013 %4 = and <16 x i8> %1, %3
1014 %5 = and <16 x i8> <i8 6, i8 6, i8 6, i8 6,
1015 i8 6, i8 6, i8 6, i8 6,
1016 i8 6, i8 6, i8 6, i8 6,
1017 i8 6, i8 6, i8 6, i8 6>, %2
1018 %6 = or <16 x i8> %4, %5
1019 ; CHECK-DAG: bseli.b [[R3]], [[R1]], 6
1020 store <16 x i8> %6, <16 x i8>* %c
1021 ; CHECK-DAG: st.b [[R3]], 0($4)
1024 ; CHECK: .size bsel_v16i8_i
1027 define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1028 ; CHECK: bsel_v8i16:
1030 %1 = load <8 x i16>, <8 x i16>* %a
1031 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1032 %2 = load <8 x i16>, <8 x i16>* %b
1033 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1034 %3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6,
1035 i16 6, i16 6, i16 6, i16 6>
1036 %4 = and <8 x i16> %2, <i16 65529, i16 65529, i16 65529, i16 65529,
1037 i16 65529, i16 65529, i16 65529, i16 65529>
1038 %5 = or <8 x i16> %3, %4
1039 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 6
1040 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
1041 store <8 x i16> %5, <8 x i16>* %c
1042 ; CHECK-DAG: st.h [[R3]], 0($4)
1045 ; CHECK: .size bsel_v8i16
1048 define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1049 ; CHECK: bsel_v4i32:
1051 %1 = load <4 x i32>, <4 x i32>* %a
1052 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1053 %2 = load <4 x i32>, <4 x i32>* %b
1054 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1055 %3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6>
1056 %4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289>
1057 %5 = or <4 x i32> %3, %4
1058 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 6
1059 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
1060 store <4 x i32> %5, <4 x i32>* %c
1061 ; CHECK-DAG: st.w [[R3]], 0($4)
1064 ; CHECK: .size bsel_v4i32
1067 define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1068 ; CHECK: bsel_v2i64:
1070 %1 = load <2 x i64>, <2 x i64>* %a
1071 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1072 %2 = load <2 x i64>, <2 x i64>* %b
1073 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1074 %3 = and <2 x i64> %1, <i64 6, i64 6>
1075 %4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609>
1076 %5 = or <2 x i64> %3, %4
1077 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 6
1078 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
1079 store <2 x i64> %5, <2 x i64>* %c
1080 ; CHECK-DAG: st.d [[R3]], 0($4)
1083 ; CHECK: .size bsel_v2i64
1086 define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1087 ; CHECK: binsl_v16i8_i:
1089 %1 = load <16 x i8>, <16 x i8>* %a
1090 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1091 %2 = load <16 x i8>, <16 x i8>* %b
1092 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1093 %3 = and <16 x i8> %1, <i8 192, i8 192, i8 192, i8 192,
1094 i8 192, i8 192, i8 192, i8 192,
1095 i8 192, i8 192, i8 192, i8 192,
1096 i8 192, i8 192, i8 192, i8 192>
1097 %4 = and <16 x i8> %2, <i8 63, i8 63, i8 63, i8 63,
1098 i8 63, i8 63, i8 63, i8 63,
1099 i8 63, i8 63, i8 63, i8 63,
1100 i8 63, i8 63, i8 63, i8 63>
1101 %5 = or <16 x i8> %3, %4
1102 ; CHECK-DAG: binsli.b [[R2]], [[R1]], 1
1103 store <16 x i8> %5, <16 x i8>* %c
1104 ; CHECK-DAG: st.b [[R2]], 0($4)
1107 ; CHECK: .size binsl_v16i8_i
1110 define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1111 ; CHECK: binsl_v8i16_i:
1113 %1 = load <8 x i16>, <8 x i16>* %a
1114 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1115 %2 = load <8 x i16>, <8 x i16>* %b
1116 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1117 %3 = and <8 x i16> %1, <i16 49152, i16 49152, i16 49152, i16 49152,
1118 i16 49152, i16 49152, i16 49152, i16 49152>
1119 %4 = and <8 x i16> %2, <i16 16383, i16 16383, i16 16383, i16 16383,
1120 i16 16383, i16 16383, i16 16383, i16 16383>
1121 %5 = or <8 x i16> %3, %4
1122 ; CHECK-DAG: binsli.h [[R2]], [[R1]], 1
1123 store <8 x i16> %5, <8 x i16>* %c
1124 ; CHECK-DAG: st.h [[R2]], 0($4)
1127 ; CHECK: .size binsl_v8i16_i
1130 define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1131 ; CHECK: binsl_v4i32_i:
1133 %1 = load <4 x i32>, <4 x i32>* %a
1134 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1135 %2 = load <4 x i32>, <4 x i32>* %b
1136 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1137 %3 = and <4 x i32> %1, <i32 3221225472, i32 3221225472, i32 3221225472, i32 3221225472>
1138 %4 = and <4 x i32> %2, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
1139 %5 = or <4 x i32> %3, %4
1140 ; CHECK-DAG: binsli.w [[R2]], [[R1]], 1
1141 store <4 x i32> %5, <4 x i32>* %c
1142 ; CHECK-DAG: st.w [[R2]], 0($4)
1145 ; CHECK: .size binsl_v4i32_i
1148 define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1149 ; CHECK: binsl_v2i64_i:
1151 %1 = load <2 x i64>, <2 x i64>* %a
1152 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1153 %2 = load <2 x i64>, <2 x i64>* %b
1154 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1155 %3 = and <2 x i64> %1, <i64 18446744073709551608, i64 18446744073709551608>
1156 %4 = and <2 x i64> %2, <i64 7, i64 7>
1157 %5 = or <2 x i64> %3, %4
1158 ; TODO: We use a particularly wide mask here to work around a legalization
1159 ; issue. If the mask doesn't fit within a 10-bit immediate, it gets
1160 ; legalized into a constant pool. We should add a test to cover the
1161 ; other cases once they correctly select binsli.d.
1162 ; CHECK-DAG: binsli.d [[R2]], [[R1]], 60
1163 store <2 x i64> %5, <2 x i64>* %c
1164 ; CHECK-DAG: st.d [[R2]], 0($4)
1167 ; CHECK: .size binsl_v2i64_i
1170 define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1171 ; CHECK: binsr_v16i8_i:
1173 %1 = load <16 x i8>, <16 x i8>* %a
1174 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1175 %2 = load <16 x i8>, <16 x i8>* %b
1176 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1177 %3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
1178 i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
1179 %4 = and <16 x i8> %2, <i8 252, i8 252, i8 252, i8 252,
1180 i8 252, i8 252, i8 252, i8 252,
1181 i8 252, i8 252, i8 252, i8 252,
1182 i8 252, i8 252, i8 252, i8 252>
1183 %5 = or <16 x i8> %3, %4
1184 ; CHECK-DAG: binsri.b [[R2]], [[R1]], 1
1185 store <16 x i8> %5, <16 x i8>* %c
1186 ; CHECK-DAG: st.b [[R2]], 0($4)
1189 ; CHECK: .size binsr_v16i8_i
1192 define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1193 ; CHECK: binsr_v8i16_i:
1195 %1 = load <8 x i16>, <8 x i16>* %a
1196 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1197 %2 = load <8 x i16>, <8 x i16>* %b
1198 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1199 %3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3,
1200 i16 3, i16 3, i16 3, i16 3>
1201 %4 = and <8 x i16> %2, <i16 65532, i16 65532, i16 65532, i16 65532,
1202 i16 65532, i16 65532, i16 65532, i16 65532>
1203 %5 = or <8 x i16> %3, %4
1204 ; CHECK-DAG: binsri.h [[R2]], [[R1]], 1
1205 store <8 x i16> %5, <8 x i16>* %c
1206 ; CHECK-DAG: st.h [[R2]], 0($4)
1209 ; CHECK: .size binsr_v8i16_i
1212 define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1213 ; CHECK: binsr_v4i32_i:
1215 %1 = load <4 x i32>, <4 x i32>* %a
1216 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1217 %2 = load <4 x i32>, <4 x i32>* %b
1218 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1219 %3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
1220 %4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292>
1221 %5 = or <4 x i32> %3, %4
1222 ; CHECK-DAG: binsri.w [[R2]], [[R1]], 1
1223 store <4 x i32> %5, <4 x i32>* %c
1224 ; CHECK-DAG: st.w [[R2]], 0($4)
1227 ; CHECK: .size binsr_v4i32_i
1230 define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1231 ; CHECK: binsr_v2i64_i:
1233 %1 = load <2 x i64>, <2 x i64>* %a
1234 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1235 %2 = load <2 x i64>, <2 x i64>* %b
1236 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1237 %3 = and <2 x i64> %1, <i64 3, i64 3>
1238 %4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612>
1239 %5 = or <2 x i64> %3, %4
1240 ; CHECK-DAG: binsri.d [[R2]], [[R1]], 1
1241 store <2 x i64> %5, <2 x i64>* %c
1242 ; CHECK-DAG: st.d [[R2]], 0($4)
1245 ; CHECK: .size binsr_v2i64_i
1248 define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1249 ; CHECK: bclr_v16i8:
1251 %1 = load <16 x i8>, <16 x i8>* %a
1252 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1253 %2 = load <16 x i8>, <16 x i8>* %b
1254 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1255 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1256 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1257 %5 = and <16 x i8> %1, %4
1258 ; CHECK-DAG: bclr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1259 store <16 x i8> %5, <16 x i8>* %c
1260 ; CHECK-DAG: st.b [[R3]], 0($4)
1263 ; CHECK: .size bclr_v16i8
1266 define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1267 ; CHECK: bclr_v8i16:
1269 %1 = load <8 x i16>, <8 x i16>* %a
1270 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1271 %2 = load <8 x i16>, <8 x i16>* %b
1272 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1273 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1274 %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1275 %5 = and <8 x i16> %1, %4
1276 ; CHECK-DAG: bclr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1277 store <8 x i16> %5, <8 x i16>* %c
1278 ; CHECK-DAG: st.h [[R3]], 0($4)
1281 ; CHECK: .size bclr_v8i16
1284 define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1285 ; CHECK: bclr_v4i32:
1287 %1 = load <4 x i32>, <4 x i32>* %a
1288 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1289 %2 = load <4 x i32>, <4 x i32>* %b
1290 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1291 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1292 %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
1293 %5 = and <4 x i32> %1, %4
1294 ; CHECK-DAG: bclr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1295 store <4 x i32> %5, <4 x i32>* %c
1296 ; CHECK-DAG: st.w [[R3]], 0($4)
1299 ; CHECK: .size bclr_v4i32
1302 define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1303 ; CHECK: bclr_v2i64:
1305 %1 = load <2 x i64>, <2 x i64>* %a
1306 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1307 %2 = load <2 x i64>, <2 x i64>* %b
1308 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1309 %3 = shl <2 x i64> <i64 1, i64 1>, %2
1310 %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
1311 %5 = and <2 x i64> %1, %4
1312 ; CHECK-DAG: bclr.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1313 store <2 x i64> %5, <2 x i64>* %c
1314 ; CHECK-DAG: st.d [[R3]], 0($4)
1317 ; CHECK: .size bclr_v2i64
1320 define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1321 ; CHECK: bset_v16i8:
1323 %1 = load <16 x i8>, <16 x i8>* %a
1324 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1325 %2 = load <16 x i8>, <16 x i8>* %b
1326 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1327 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1328 %4 = or <16 x i8> %1, %3
1329 ; CHECK-DAG: bset.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1330 store <16 x i8> %4, <16 x i8>* %c
1331 ; CHECK-DAG: st.b [[R3]], 0($4)
1334 ; CHECK: .size bset_v16i8
1337 define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1338 ; CHECK: bset_v8i16:
1340 %1 = load <8 x i16>, <8 x i16>* %a
1341 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1342 %2 = load <8 x i16>, <8 x i16>* %b
1343 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1344 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1345 %4 = or <8 x i16> %1, %3
1346 ; CHECK-DAG: bset.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1347 store <8 x i16> %4, <8 x i16>* %c
1348 ; CHECK-DAG: st.h [[R3]], 0($4)
1351 ; CHECK: .size bset_v8i16
1354 define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1355 ; CHECK: bset_v4i32:
1357 %1 = load <4 x i32>, <4 x i32>* %a
1358 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1359 %2 = load <4 x i32>, <4 x i32>* %b
1360 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1361 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1362 %4 = or <4 x i32> %1, %3
1363 ; CHECK-DAG: bset.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1364 store <4 x i32> %4, <4 x i32>* %c
1365 ; CHECK-DAG: st.w [[R3]], 0($4)
1368 ; CHECK: .size bset_v4i32
1371 define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1372 ; CHECK: bset_v2i64:
1374 %1 = load <2 x i64>, <2 x i64>* %a
1375 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1376 %2 = load <2 x i64>, <2 x i64>* %b
1377 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1378 %3 = shl <2 x i64> <i64 1, i64 1>, %2
1379 %4 = or <2 x i64> %1, %3
1380 ; CHECK-DAG: bset.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1381 store <2 x i64> %4, <2 x i64>* %c
1382 ; CHECK-DAG: st.d [[R3]], 0($4)
1385 ; CHECK: .size bset_v2i64
1388 define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1389 ; CHECK: bneg_v16i8:
1391 %1 = load <16 x i8>, <16 x i8>* %a
1392 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1393 %2 = load <16 x i8>, <16 x i8>* %b
1394 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1395 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1396 %4 = xor <16 x i8> %1, %3
1397 ; CHECK-DAG: bneg.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1398 store <16 x i8> %4, <16 x i8>* %c
1399 ; CHECK-DAG: st.b [[R3]], 0($4)
1402 ; CHECK: .size bneg_v16i8
1405 define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1406 ; CHECK: bneg_v8i16:
1408 %1 = load <8 x i16>, <8 x i16>* %a
1409 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1410 %2 = load <8 x i16>, <8 x i16>* %b
1411 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1412 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1413 %4 = xor <8 x i16> %1, %3
1414 ; CHECK-DAG: bneg.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1415 store <8 x i16> %4, <8 x i16>* %c
1416 ; CHECK-DAG: st.h [[R3]], 0($4)
1419 ; CHECK: .size bneg_v8i16
1422 define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1423 ; CHECK: bneg_v4i32:
1425 %1 = load <4 x i32>, <4 x i32>* %a
1426 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1427 %2 = load <4 x i32>, <4 x i32>* %b
1428 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1429 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1430 %4 = xor <4 x i32> %1, %3
1431 ; CHECK-DAG: bneg.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1432 store <4 x i32> %4, <4 x i32>* %c
1433 ; CHECK-DAG: st.w [[R3]], 0($4)
1436 ; CHECK: .size bneg_v4i32
1439 define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1440 ; CHECK: bneg_v2i64:
1442 %1 = load <2 x i64>, <2 x i64>* %a
1443 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1444 %2 = load <2 x i64>, <2 x i64>* %b
1445 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1446 %3 = shl <2 x i64> <i64 1, i64 1>, %2
1447 %4 = xor <2 x i64> %1, %3
1448 ; CHECK-DAG: bneg.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1449 store <2 x i64> %4, <2 x i64>* %c
1450 ; CHECK-DAG: st.d [[R3]], 0($4)
1453 ; CHECK: .size bneg_v2i64
1456 define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1457 ; CHECK: bclri_v16i8:
1459 %1 = load <16 x i8>, <16 x i8>* %a
1460 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1461 %2 = xor <16 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>,
1462 <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1463 %3 = and <16 x i8> %1, %2
1464 ; bclri.b and andi.b are exactly equivalent.
1465 ; CHECK-DAG: andi.b [[R3:\$w[0-9]+]], [[R1]], 247
1466 store <16 x i8> %3, <16 x i8>* %c
1467 ; CHECK-DAG: st.b [[R3]], 0($4)
1470 ; CHECK: .size bclri_v16i8
1473 define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1474 ; CHECK: bclri_v8i16:
1476 %1 = load <8 x i16>, <8 x i16>* %a
1477 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1478 %2 = xor <8 x i16> <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>,
1479 <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1480 %3 = and <8 x i16> %1, %2
1481 ; CHECK-DAG: bclri.h [[R3:\$w[0-9]+]], [[R1]], 3
1482 store <8 x i16> %3, <8 x i16>* %c
1483 ; CHECK-DAG: st.h [[R3]], 0($4)
1486 ; CHECK: .size bclri_v8i16
1489 define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1490 ; CHECK: bclri_v4i32:
1492 %1 = load <4 x i32>, <4 x i32>* %a
1493 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1494 %2 = xor <4 x i32> <i32 8, i32 8, i32 8, i32 8>,
1495 <i32 -1, i32 -1, i32 -1, i32 -1>
1496 %3 = and <4 x i32> %1, %2
1497 ; CHECK-DAG: bclri.w [[R3:\$w[0-9]+]], [[R1]], 3
1498 store <4 x i32> %3, <4 x i32>* %c
1499 ; CHECK-DAG: st.w [[R3]], 0($4)
1502 ; CHECK: .size bclri_v4i32
1505 define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1506 ; CHECK: bclri_v2i64:
1508 %1 = load <2 x i64>, <2 x i64>* %a
1509 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1510 %2 = xor <2 x i64> <i64 8, i64 8>,
1512 %3 = and <2 x i64> %1, %2
1513 ; CHECK-DAG: bclri.d [[R3:\$w[0-9]+]], [[R1]], 3
1514 store <2 x i64> %3, <2 x i64>* %c
1515 ; CHECK-DAG: st.d [[R3]], 0($4)
1518 ; CHECK: .size bclri_v2i64
1521 define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1522 ; CHECK: bseti_v16i8:
1524 %1 = load <16 x i8>, <16 x i8>* %a
1525 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1526 %2 = or <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
1527 ; CHECK-DAG: bseti.b [[R3:\$w[0-9]+]], [[R1]], 3
1528 store <16 x i8> %2, <16 x i8>* %c
1529 ; CHECK-DAG: st.b [[R3]], 0($4)
1532 ; CHECK: .size bseti_v16i8
1535 define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1536 ; CHECK: bseti_v8i16:
1538 %1 = load <8 x i16>, <8 x i16>* %a
1539 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1540 %2 = or <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1541 ; CHECK-DAG: bseti.h [[R3:\$w[0-9]+]], [[R1]], 3
1542 store <8 x i16> %2, <8 x i16>* %c
1543 ; CHECK-DAG: st.h [[R3]], 0($4)
1546 ; CHECK: .size bseti_v8i16
1549 define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1550 ; CHECK: bseti_v4i32:
1552 %1 = load <4 x i32>, <4 x i32>* %a
1553 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1554 %2 = or <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
1555 ; CHECK-DAG: bseti.w [[R3:\$w[0-9]+]], [[R1]], 3
1556 store <4 x i32> %2, <4 x i32>* %c
1557 ; CHECK-DAG: st.w [[R3]], 0($4)
1560 ; CHECK: .size bseti_v4i32
1563 define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1564 ; CHECK: bseti_v2i64:
1566 %1 = load <2 x i64>, <2 x i64>* %a
1567 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1568 %2 = or <2 x i64> %1, <i64 8, i64 8>
1569 ; CHECK-DAG: bseti.d [[R3:\$w[0-9]+]], [[R1]], 3
1570 store <2 x i64> %2, <2 x i64>* %c
1571 ; CHECK-DAG: st.d [[R3]], 0($4)
1574 ; CHECK: .size bseti_v2i64
1577 define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1578 ; CHECK: bnegi_v16i8:
1580 %1 = load <16 x i8>, <16 x i8>* %a
1581 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1582 %2 = xor <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
1583 ; CHECK-DAG: bnegi.b [[R3:\$w[0-9]+]], [[R1]], 3
1584 store <16 x i8> %2, <16 x i8>* %c
1585 ; CHECK-DAG: st.b [[R3]], 0($4)
1588 ; CHECK: .size bnegi_v16i8
1591 define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1592 ; CHECK: bnegi_v8i16:
1594 %1 = load <8 x i16>, <8 x i16>* %a
1595 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1596 %2 = xor <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1597 ; CHECK-DAG: bnegi.h [[R3:\$w[0-9]+]], [[R1]], 3
1598 store <8 x i16> %2, <8 x i16>* %c
1599 ; CHECK-DAG: st.h [[R3]], 0($4)
1602 ; CHECK: .size bnegi_v8i16
1605 define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1606 ; CHECK: bnegi_v4i32:
1608 %1 = load <4 x i32>, <4 x i32>* %a
1609 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1610 %2 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
1611 ; CHECK-DAG: bnegi.w [[R3:\$w[0-9]+]], [[R1]], 3
1612 store <4 x i32> %2, <4 x i32>* %c
1613 ; CHECK-DAG: st.w [[R3]], 0($4)
1616 ; CHECK: .size bnegi_v4i32
1619 define void @bnegi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1620 ; CHECK: bnegi_v2i64:
1622 %1 = load <2 x i64>, <2 x i64>* %a
1623 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1624 %2 = xor <2 x i64> %1, <i64 8, i64 8>
1625 ; CHECK-DAG: bnegi.d [[R3:\$w[0-9]+]], [[R1]], 3
1626 store <2 x i64> %2, <2 x i64>* %c
1627 ; CHECK-DAG: st.d [[R3]], 0($4)
1630 ; CHECK: .size bnegi_v2i64
1633 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val)
1634 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val)
1635 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val)
1636 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val)
1637 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val)
1638 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val)
1639 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val)
1640 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val)