1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=hexagon -mattr=+hvxv60,+hvx-length128b,-packets < %s | FileCheck --check-prefix=V60 %s
3 ; RUN: llc -march=hexagon -mattr=+hvxv62,+hvx-length128b,-packets < %s | FileCheck --check-prefix=V62 %s
4 ; RUN: llc -march=hexagon -mattr=+hvxv66,+hvx-length128b,-packets < %s | FileCheck --check-prefix=V66 %s
6 define <128 x i8> @f0(<128 x i8> %a0, <128 x i8> %a1, i8 %a2) #0 {
10 ; V60-NEXT: r0 = vsplatb(r0)
13 ; V60-NEXT: r1 = ##117901063
16 ; V60-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
19 ; V60-NEXT: v2 = vsplat(r0)
22 ; V60-NEXT: v3 = vsplat(r1)
25 ; V60-NEXT: v2 = vand(v2,v3)
28 ; V60-NEXT: v3:2.uh = vzxt(v2.ub)
31 ; V60-NEXT: v0.h = vasl(v0.h,v2.h)
34 ; V60-NEXT: v1.h = vasl(v1.h,v3.h)
37 ; V60-NEXT: v0.b = vshuffo(v1.b,v0.b)
49 ; V62-NEXT: v2.b = vsplat(r0)
52 ; V62-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
55 ; V62-NEXT: v3.b = vsplat(r2)
58 ; V62-NEXT: v2 = vand(v2,v3)
61 ; V62-NEXT: v3:2.uh = vzxt(v2.ub)
64 ; V62-NEXT: v0.h = vasl(v0.h,v2.h)
67 ; V62-NEXT: v1.h = vasl(v1.h,v3.h)
70 ; V62-NEXT: v0.b = vshuffo(v1.b,v0.b)
82 ; V66-NEXT: v2.b = vsplat(r0)
85 ; V66-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
88 ; V66-NEXT: v3.b = vsplat(r2)
91 ; V66-NEXT: v2 = vand(v2,v3)
94 ; V66-NEXT: v3:2.uh = vzxt(v2.ub)
97 ; V66-NEXT: v0.h = vasl(v0.h,v2.h)
100 ; V66-NEXT: v1.h = vasl(v1.h,v3.h)
103 ; V66-NEXT: v0.b = vshuffo(v1.b,v0.b)
106 ; V66-NEXT: jumpr r31
108 %v0 = insertelement <128 x i8> undef, i8 %a2, i32 0
109 %v1 = shufflevector <128 x i8> %v0, <128 x i8> undef, <128 x i32> zeroinitializer
110 %v2 = call <128 x i8> @llvm.fshl.v128i8(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %v1)
114 define <64 x i16> @f1(<64 x i16> %a0, <64 x i16> %a1, i16 %a2) #0 {
118 ; V60-NEXT: r1 = and(r0,#15)
121 ; V60-NEXT: p0 = bitsclr(r0,#15)
124 ; V60-NEXT: v2.h = vasl(v0.h,r1)
127 ; V60-NEXT: r1 = sub(#16,r1)
130 ; V60-NEXT: v1.uh = vlsr(v1.uh,r1)
133 ; V60-NEXT: v1 = vor(v2,v1)
136 ; V60-NEXT: if (!p0) v0 = v1
139 ; V60-NEXT: jumpr r31
145 ; V62-NEXT: r1 = and(r0,#15)
148 ; V62-NEXT: p0 = bitsclr(r0,#15)
151 ; V62-NEXT: v2.h = vasl(v0.h,r1)
154 ; V62-NEXT: r1 = sub(#16,r1)
157 ; V62-NEXT: v1.uh = vlsr(v1.uh,r1)
160 ; V62-NEXT: v1 = vor(v2,v1)
163 ; V62-NEXT: if (!p0) v0 = v1
166 ; V62-NEXT: jumpr r31
172 ; V66-NEXT: r1 = and(r0,#15)
175 ; V66-NEXT: p0 = bitsclr(r0,#15)
178 ; V66-NEXT: v2.h = vasl(v0.h,r1)
181 ; V66-NEXT: r1 = sub(#16,r1)
184 ; V66-NEXT: v1.uh = vlsr(v1.uh,r1)
187 ; V66-NEXT: v1 = vor(v2,v1)
190 ; V66-NEXT: if (!p0) v0 = v1
193 ; V66-NEXT: jumpr r31
195 %v0 = insertelement <64 x i16> undef, i16 %a2, i32 0
196 %v1 = shufflevector <64 x i16> %v0, <64 x i16> undef, <64 x i32> zeroinitializer
197 %v2 = call <64 x i16> @llvm.fshl.v64i16(<64 x i16> %a0, <64 x i16> %a1, <64 x i16> %v1)
201 define <32 x i32> @f2(<32 x i32> %a0, <32 x i32> %a1, i32 %a2) #0 {
205 ; V60-NEXT: r1 = and(r0,#31)
208 ; V60-NEXT: p0 = bitsclr(r0,#31)
211 ; V60-NEXT: v2.w = vasl(v0.w,r1)
214 ; V60-NEXT: r1 = sub(#32,r1)
217 ; V60-NEXT: v1.uw = vlsr(v1.uw,r1)
220 ; V60-NEXT: v1 = vor(v2,v1)
223 ; V60-NEXT: if (!p0) v0 = v1
226 ; V60-NEXT: jumpr r31
232 ; V62-NEXT: r1 = and(r0,#31)
235 ; V62-NEXT: p0 = bitsclr(r0,#31)
238 ; V62-NEXT: v2.w = vasl(v0.w,r1)
241 ; V62-NEXT: r1 = sub(#32,r1)
244 ; V62-NEXT: v1.uw = vlsr(v1.uw,r1)
247 ; V62-NEXT: v1 = vor(v2,v1)
250 ; V62-NEXT: if (!p0) v0 = v1
253 ; V62-NEXT: jumpr r31
259 ; V66-NEXT: r0 = and(r0,#31)
265 ; V66-NEXT: v4 = vxor(v4,v4)
268 ; V66-NEXT: v2 = vsplat(r0)
271 ; V66-NEXT: v3 = vsplat(r1)
274 ; V66-NEXT: v3.w = vsub(v3.w,v2.w)
277 ; V66-NEXT: v2.w = vsub(v4.w,v2.w)
280 ; V66-NEXT: v4.w = vlsr(v1.w,v3.w)
283 ; V66-NEXT: v5:4.w = vasrinto(v0.w,v2.w)
289 ; V66-NEXT: jumpr r31
291 %v0 = insertelement <32 x i32> undef, i32 %a2, i32 0
292 %v1 = shufflevector <32 x i32> %v0, <32 x i32> undef, <32 x i32> zeroinitializer
293 %v2 = call <32 x i32> @llvm.fshl.v32i32(<32 x i32> %a0, <32 x i32> %a1, <32 x i32> %v1)
297 define <128 x i8> @f3(<128 x i8> %a0, <128 x i8> %a1, i8 %a2) #0 {
301 ; V60-NEXT: r0 = vsplatb(r0)
304 ; V60-NEXT: r1 = ##117901063
307 ; V60-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
310 ; V60-NEXT: v2 = vsplat(r0)
313 ; V60-NEXT: v3 = vsplat(r1)
316 ; V60-NEXT: v2 = vand(v2,v3)
319 ; V60-NEXT: v3:2.uh = vzxt(v2.ub)
322 ; V60-NEXT: v0.h = vlsr(v0.h,v2.h)
325 ; V60-NEXT: v1.h = vlsr(v1.h,v3.h)
328 ; V60-NEXT: v0.b = vshuffe(v1.b,v0.b)
331 ; V60-NEXT: jumpr r31
340 ; V62-NEXT: v2.b = vsplat(r0)
343 ; V62-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
346 ; V62-NEXT: v3.b = vsplat(r2)
349 ; V62-NEXT: v2 = vand(v2,v3)
352 ; V62-NEXT: v3:2.uh = vzxt(v2.ub)
355 ; V62-NEXT: v0.h = vlsr(v0.h,v2.h)
358 ; V62-NEXT: v1.h = vlsr(v1.h,v3.h)
361 ; V62-NEXT: v0.b = vshuffe(v1.b,v0.b)
364 ; V62-NEXT: jumpr r31
373 ; V66-NEXT: v2.b = vsplat(r0)
376 ; V66-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
379 ; V66-NEXT: v3.b = vsplat(r2)
382 ; V66-NEXT: v2 = vand(v2,v3)
385 ; V66-NEXT: v3:2.uh = vzxt(v2.ub)
388 ; V66-NEXT: v0.h = vlsr(v0.h,v2.h)
391 ; V66-NEXT: v1.h = vlsr(v1.h,v3.h)
394 ; V66-NEXT: v0.b = vshuffe(v1.b,v0.b)
397 ; V66-NEXT: jumpr r31
399 %v0 = insertelement <128 x i8> undef, i8 %a2, i32 0
400 %v1 = shufflevector <128 x i8> %v0, <128 x i8> undef, <128 x i32> zeroinitializer
401 %v2 = call <128 x i8> @llvm.fshr.v128i8(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %v1)
405 define <64 x i16> @f4(<64 x i16> %a0, <64 x i16> %a1, i16 %a2) #0 {
409 ; V60-NEXT: r1 = and(r0,#15)
412 ; V60-NEXT: p0 = bitsclr(r0,#15)
415 ; V60-NEXT: v2.uh = vlsr(v1.uh,r1)
418 ; V60-NEXT: r1 = sub(#16,r1)
421 ; V60-NEXT: v0.h = vasl(v0.h,r1)
424 ; V60-NEXT: v0 = vor(v0,v2)
427 ; V60-NEXT: if (p0) v0 = v1
430 ; V60-NEXT: jumpr r31
436 ; V62-NEXT: r1 = and(r0,#15)
439 ; V62-NEXT: p0 = bitsclr(r0,#15)
442 ; V62-NEXT: v2.uh = vlsr(v1.uh,r1)
445 ; V62-NEXT: r1 = sub(#16,r1)
448 ; V62-NEXT: v0.h = vasl(v0.h,r1)
451 ; V62-NEXT: v0 = vor(v0,v2)
454 ; V62-NEXT: if (p0) v0 = v1
457 ; V62-NEXT: jumpr r31
463 ; V66-NEXT: r1 = and(r0,#15)
466 ; V66-NEXT: p0 = bitsclr(r0,#15)
469 ; V66-NEXT: v2.uh = vlsr(v1.uh,r1)
472 ; V66-NEXT: r1 = sub(#16,r1)
475 ; V66-NEXT: v0.h = vasl(v0.h,r1)
478 ; V66-NEXT: v0 = vor(v0,v2)
481 ; V66-NEXT: if (p0) v0 = v1
484 ; V66-NEXT: jumpr r31
486 %v0 = insertelement <64 x i16> undef, i16 %a2, i32 0
487 %v1 = shufflevector <64 x i16> %v0, <64 x i16> undef, <64 x i32> zeroinitializer
488 %v2 = call <64 x i16> @llvm.fshr.v64i16(<64 x i16> %a0, <64 x i16> %a1, <64 x i16> %v1)
492 define <32 x i32> @f5(<32 x i32> %a0, <32 x i32> %a1, i32 %a2) #0 {
496 ; V60-NEXT: r1 = and(r0,#31)
499 ; V60-NEXT: p0 = bitsclr(r0,#31)
502 ; V60-NEXT: v2.uw = vlsr(v1.uw,r1)
505 ; V60-NEXT: r1 = sub(#32,r1)
508 ; V60-NEXT: v0.w = vasl(v0.w,r1)
511 ; V60-NEXT: v0 = vor(v0,v2)
514 ; V60-NEXT: if (p0) v0 = v1
517 ; V60-NEXT: jumpr r31
523 ; V62-NEXT: r1 = and(r0,#31)
526 ; V62-NEXT: p0 = bitsclr(r0,#31)
529 ; V62-NEXT: v2.uw = vlsr(v1.uw,r1)
532 ; V62-NEXT: r1 = sub(#32,r1)
535 ; V62-NEXT: v0.w = vasl(v0.w,r1)
538 ; V62-NEXT: v0 = vor(v0,v2)
541 ; V62-NEXT: if (p0) v0 = v1
544 ; V62-NEXT: jumpr r31
550 ; V66-NEXT: r0 = and(r0,#31)
553 ; V66-NEXT: v3 = vsplat(r0)
556 ; V66-NEXT: v2.w = vlsr(v1.w,v3.w)
559 ; V66-NEXT: v3:2.w = vasrinto(v0.w,v3.w)
565 ; V66-NEXT: jumpr r31
567 %v0 = insertelement <32 x i32> undef, i32 %a2, i32 0
568 %v1 = shufflevector <32 x i32> %v0, <32 x i32> undef, <32 x i32> zeroinitializer
569 %v2 = call <32 x i32> @llvm.fshr.v32i32(<32 x i32> %a0, <32 x i32> %a1, <32 x i32> %v1)
573 define <128 x i8> @f6(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2) #0 {
577 ; V60-NEXT: r0 = ##117901063
580 ; V60-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
583 ; V60-NEXT: v3 = vsplat(r0)
586 ; V60-NEXT: v2 = vand(v2,v3)
589 ; V60-NEXT: v3:2.uh = vzxt(v2.ub)
592 ; V60-NEXT: v0.h = vasl(v0.h,v2.h)
595 ; V60-NEXT: v1.h = vasl(v1.h,v3.h)
598 ; V60-NEXT: v0.b = vshuffo(v1.b,v0.b)
601 ; V60-NEXT: jumpr r31
610 ; V62-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
613 ; V62-NEXT: v3.b = vsplat(r0)
616 ; V62-NEXT: v2 = vand(v2,v3)
619 ; V62-NEXT: v3:2.uh = vzxt(v2.ub)
622 ; V62-NEXT: v0.h = vasl(v0.h,v2.h)
625 ; V62-NEXT: v1.h = vasl(v1.h,v3.h)
628 ; V62-NEXT: v0.b = vshuffo(v1.b,v0.b)
631 ; V62-NEXT: jumpr r31
640 ; V66-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
643 ; V66-NEXT: v3.b = vsplat(r0)
646 ; V66-NEXT: v2 = vand(v2,v3)
649 ; V66-NEXT: v3:2.uh = vzxt(v2.ub)
652 ; V66-NEXT: v0.h = vasl(v0.h,v2.h)
655 ; V66-NEXT: v1.h = vasl(v1.h,v3.h)
658 ; V66-NEXT: v0.b = vshuffo(v1.b,v0.b)
661 ; V66-NEXT: jumpr r31
663 %v0 = call <128 x i8> @llvm.fshl.v128i8(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2)
667 define <64 x i16> @f7(<64 x i16> %a0, <64 x i16> %a1, <64 x i16> %a2) #0 {
671 ; V60-NEXT: r0 = ##983055
674 ; V60-NEXT: r1 = ##1048592
677 ; V60-NEXT: v3 = vxor(v3,v3)
680 ; V60-NEXT: v4 = vsplat(r0)
683 ; V60-NEXT: v5 = vsplat(r1)
686 ; V60-NEXT: v2 = vand(v2,v4)
689 ; V60-NEXT: v4.h = vsub(v5.h,v2.h)
692 ; V60-NEXT: q0 = vcmp.eq(v2.h,v3.h)
695 ; V60-NEXT: v31.h = vasl(v0.h,v2.h)
698 ; V60-NEXT: v1.h = vlsr(v1.h,v4.h)
701 ; V60-NEXT: v1 = vor(v31,v1)
704 ; V60-NEXT: v0 = vmux(q0,v0,v1)
707 ; V60-NEXT: jumpr r31
713 ; V62-NEXT: r1:0 = combine(#16,#15)
716 ; V62-NEXT: v3.h = vsplat(r0)
719 ; V62-NEXT: v4.h = vsplat(r1)
722 ; V62-NEXT: v2 = vand(v2,v3)
725 ; V62-NEXT: v3.h = vsub(v4.h,v2.h)
728 ; V62-NEXT: v0.h = vasl(v0.h,v2.h)
731 ; V62-NEXT: v1.h = vlsr(v1.h,v3.h)
734 ; V62-NEXT: v0 = vor(v0,v1)
737 ; V62-NEXT: jumpr r31
743 ; V66-NEXT: r1:0 = combine(#16,#15)
746 ; V66-NEXT: v3.h = vsplat(r0)
749 ; V66-NEXT: v4.h = vsplat(r1)
752 ; V66-NEXT: v2 = vand(v2,v3)
755 ; V66-NEXT: v3.h = vsub(v4.h,v2.h)
758 ; V66-NEXT: v0.h = vasl(v0.h,v2.h)
761 ; V66-NEXT: v1.h = vlsr(v1.h,v3.h)
764 ; V66-NEXT: v0 = vor(v0,v1)
767 ; V66-NEXT: jumpr r31
769 %v0 = call <64 x i16> @llvm.fshl.v64i16(<64 x i16> %a0, <64 x i16> %a1, <64 x i16> %a2)
773 define <32 x i32> @f8(<32 x i32> %a0, <32 x i32> %a1, <32 x i32> %a2) #0 {
777 ; V60-NEXT: r1:0 = combine(#32,#31)
780 ; V60-NEXT: v3 = vxor(v3,v3)
783 ; V60-NEXT: v4 = vsplat(r0)
786 ; V60-NEXT: v5 = vsplat(r1)
789 ; V60-NEXT: v2 = vand(v2,v4)
792 ; V60-NEXT: v5.w = vsub(v5.w,v2.w)
795 ; V60-NEXT: v4 = vand(v2,v4)
798 ; V60-NEXT: v2.w = vasl(v0.w,v2.w)
801 ; V60-NEXT: q0 = vcmp.eq(v4.w,v3.w)
804 ; V60-NEXT: v1.w = vlsr(v1.w,v5.w)
807 ; V60-NEXT: v1 = vor(v2,v1)
810 ; V60-NEXT: v0 = vmux(q0,v0,v1)
813 ; V60-NEXT: jumpr r31
819 ; V62-NEXT: r1:0 = combine(#32,#31)
822 ; V62-NEXT: v3 = vsplat(r0)
825 ; V62-NEXT: v4 = vsplat(r1)
828 ; V62-NEXT: v2 = vand(v2,v3)
831 ; V62-NEXT: v3.w = vsub(v4.w,v2.w)
834 ; V62-NEXT: v0.w = vasl(v0.w,v2.w)
837 ; V62-NEXT: v1.w = vlsr(v1.w,v3.w)
840 ; V62-NEXT: v0 = vor(v0,v1)
843 ; V62-NEXT: jumpr r31
849 ; V66-NEXT: r1:0 = combine(#32,#31)
852 ; V66-NEXT: v31 = vxor(v31,v31)
855 ; V66-NEXT: v3 = vsplat(r0)
858 ; V66-NEXT: v4 = vsplat(r1)
861 ; V66-NEXT: v2 = vand(v2,v3)
864 ; V66-NEXT: v4.w = vsub(v4.w,v2.w)
867 ; V66-NEXT: v2.w = vsub(v31.w,v2.w)
870 ; V66-NEXT: v4.w = vlsr(v1.w,v4.w)
873 ; V66-NEXT: v5:4.w = vasrinto(v0.w,v2.w)
879 ; V66-NEXT: jumpr r31
881 %v0 = call <32 x i32> @llvm.fshl.v32i32(<32 x i32> %a0, <32 x i32> %a1, <32 x i32> %a2)
885 define <128 x i8> @f9(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2) #0 {
889 ; V60-NEXT: r0 = ##117901063
892 ; V60-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
895 ; V60-NEXT: v3 = vsplat(r0)
898 ; V60-NEXT: v2 = vand(v2,v3)
901 ; V60-NEXT: v3:2.uh = vzxt(v2.ub)
904 ; V60-NEXT: v0.h = vlsr(v0.h,v2.h)
907 ; V60-NEXT: v1.h = vlsr(v1.h,v3.h)
910 ; V60-NEXT: v0.b = vshuffe(v1.b,v0.b)
913 ; V60-NEXT: jumpr r31
922 ; V62-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
925 ; V62-NEXT: v3.b = vsplat(r0)
928 ; V62-NEXT: v2 = vand(v2,v3)
931 ; V62-NEXT: v3:2.uh = vzxt(v2.ub)
934 ; V62-NEXT: v0.h = vlsr(v0.h,v2.h)
937 ; V62-NEXT: v1.h = vlsr(v1.h,v3.h)
940 ; V62-NEXT: v0.b = vshuffe(v1.b,v0.b)
943 ; V62-NEXT: jumpr r31
952 ; V66-NEXT: v1:0.b = vshuffoe(v0.b,v1.b)
955 ; V66-NEXT: v3.b = vsplat(r0)
958 ; V66-NEXT: v2 = vand(v2,v3)
961 ; V66-NEXT: v3:2.uh = vzxt(v2.ub)
964 ; V66-NEXT: v0.h = vlsr(v0.h,v2.h)
967 ; V66-NEXT: v1.h = vlsr(v1.h,v3.h)
970 ; V66-NEXT: v0.b = vshuffe(v1.b,v0.b)
973 ; V66-NEXT: jumpr r31
975 %v0 = call <128 x i8> @llvm.fshr.v128i8(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2)
979 define <64 x i16> @f10(<64 x i16> %a0, <64 x i16> %a1, <64 x i16> %a2) #0 {
983 ; V60-NEXT: r0 = ##983055
986 ; V60-NEXT: r1 = ##1048592
989 ; V60-NEXT: v3 = vxor(v3,v3)
992 ; V60-NEXT: v4 = vsplat(r0)
995 ; V60-NEXT: v5 = vsplat(r1)
998 ; V60-NEXT: v2 = vand(v2,v4)
1001 ; V60-NEXT: v4.h = vsub(v5.h,v2.h)
1004 ; V60-NEXT: q0 = vcmp.eq(v2.h,v3.h)
1007 ; V60-NEXT: v31.h = vlsr(v1.h,v2.h)
1010 ; V60-NEXT: v0.h = vasl(v0.h,v4.h)
1013 ; V60-NEXT: v0 = vor(v0,v31)
1016 ; V60-NEXT: v0 = vmux(q0,v1,v0)
1019 ; V60-NEXT: jumpr r31
1025 ; V62-NEXT: r1:0 = combine(#16,#15)
1028 ; V62-NEXT: v3.h = vsplat(r0)
1031 ; V62-NEXT: v4.h = vsplat(r1)
1034 ; V62-NEXT: v2 = vand(v2,v3)
1037 ; V62-NEXT: v3.h = vsub(v2.h,v4.h)
1040 ; V62-NEXT: v1.h = vlsr(v1.h,v2.h)
1043 ; V62-NEXT: v0.h = vlsr(v0.h,v3.h)
1046 ; V62-NEXT: v0 = vor(v0,v1)
1049 ; V62-NEXT: jumpr r31
1055 ; V66-NEXT: r1:0 = combine(#16,#15)
1058 ; V66-NEXT: v3.h = vsplat(r0)
1061 ; V66-NEXT: v4.h = vsplat(r1)
1064 ; V66-NEXT: v2 = vand(v2,v3)
1067 ; V66-NEXT: v3.h = vsub(v2.h,v4.h)
1070 ; V66-NEXT: v1.h = vlsr(v1.h,v2.h)
1073 ; V66-NEXT: v0.h = vlsr(v0.h,v3.h)
1076 ; V66-NEXT: v0 = vor(v0,v1)
1079 ; V66-NEXT: jumpr r31
1081 %v0 = call <64 x i16> @llvm.fshr.v64i16(<64 x i16> %a0, <64 x i16> %a1, <64 x i16> %a2)
1085 define <32 x i32> @f11(<32 x i32> %a0, <32 x i32> %a1, <32 x i32> %a2) #0 {
1089 ; V60-NEXT: r1:0 = combine(#32,#31)
1092 ; V60-NEXT: v3 = vxor(v3,v3)
1095 ; V60-NEXT: v4 = vsplat(r0)
1098 ; V60-NEXT: v5 = vsplat(r1)
1101 ; V60-NEXT: v2 = vand(v2,v4)
1104 ; V60-NEXT: v4.w = vsub(v5.w,v2.w)
1107 ; V60-NEXT: q0 = vcmp.eq(v2.w,v3.w)
1110 ; V60-NEXT: v31.w = vlsr(v1.w,v2.w)
1113 ; V60-NEXT: v0.w = vasl(v0.w,v4.w)
1116 ; V60-NEXT: v0 = vor(v0,v31)
1119 ; V60-NEXT: v0 = vmux(q0,v1,v0)
1122 ; V60-NEXT: jumpr r31
1128 ; V62-NEXT: r1:0 = combine(#32,#31)
1131 ; V62-NEXT: v3 = vsplat(r0)
1134 ; V62-NEXT: v4 = vsplat(r1)
1137 ; V62-NEXT: v2 = vand(v2,v3)
1140 ; V62-NEXT: v3.w = vsub(v2.w,v4.w)
1143 ; V62-NEXT: v1.w = vlsr(v1.w,v2.w)
1146 ; V62-NEXT: v0.w = vlsr(v0.w,v3.w)
1149 ; V62-NEXT: v0 = vor(v0,v1)
1152 ; V62-NEXT: jumpr r31
1158 ; V66-NEXT: r0 = #31
1161 ; V66-NEXT: v3 = vsplat(r0)
1164 ; V66-NEXT: v3 = vand(v2,v3)
1167 ; V66-NEXT: v2.w = vlsr(v1.w,v3.w)
1170 ; V66-NEXT: v3:2.w = vasrinto(v0.w,v3.w)
1176 ; V66-NEXT: jumpr r31
1178 %v0 = call <32 x i32> @llvm.fshr.v32i32(<32 x i32> %a0, <32 x i32> %a1, <32 x i32> %a2)
1182 declare <128 x i8> @llvm.fshl.v128i8(<128 x i8>, <128 x i8>, <128 x i8>)
1183 declare <128 x i8> @llvm.fshr.v128i8(<128 x i8>, <128 x i8>, <128 x i8>)
1184 declare <64 x i16> @llvm.fshl.v64i16(<64 x i16>, <64 x i16>, <64 x i16>)
1185 declare <64 x i16> @llvm.fshr.v64i16(<64 x i16>, <64 x i16>, <64 x i16>)
1186 declare <32 x i32> @llvm.fshl.v32i32(<32 x i32>, <32 x i32>, <32 x i32>)
1187 declare <32 x i32> @llvm.fshr.v32i32(<32 x i32>, <32 x i32>, <32 x i32>)
1189 attributes #0 = { nounwind }