1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
4 define <4 x i16> @usra_v4i16(<8 x i8> %0) {
5 ; CHECK-LABEL: usra_v4i16:
7 ; CHECK-NEXT: ushr v0.8b, v0.8b, #7
8 ; CHECK-NEXT: usra v0.4h, v0.4h, #7
10 %2 = lshr <8 x i8> %0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
11 %3 = bitcast <8 x i8> %2 to <4 x i16>
12 %4 = lshr <4 x i16> %3, <i16 7, i16 7, i16 7, i16 7>
13 %5 = or <4 x i16> %4, %3
17 define <4 x i32> @usra_v4i32(<8 x i16> %0) {
18 ; CHECK-LABEL: usra_v4i32:
20 ; CHECK-NEXT: ushr v0.8h, v0.8h, #15
21 ; CHECK-NEXT: usra v0.4s, v0.4s, #15
23 %2 = lshr <8 x i16> %0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
24 %3 = bitcast <8 x i16> %2 to <4 x i32>
25 %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15>
26 %5 = or <4 x i32> %4, %3
30 define <2 x i64> @usra_v2i64(<4 x i32> %0) {
31 ; CHECK-LABEL: usra_v2i64:
33 ; CHECK-NEXT: ushr v0.4s, v0.4s, #31
34 ; CHECK-NEXT: usra v0.2d, v0.2d, #31
36 %2 = lshr <4 x i32> %0, <i32 31, i32 31, i32 31, i32 31>
37 %3 = bitcast <4 x i32> %2 to <2 x i64>
38 %4 = lshr <2 x i64> %3, <i64 31, i64 31>
39 %5 = or <2 x i64> %4, %3
43 define <1 x i64> @usra_v1i64(<2 x i32> %0) {
44 ; CHECK-LABEL: usra_v1i64:
46 ; CHECK-NEXT: ushr v0.2s, v0.2s, #31
47 ; CHECK-NEXT: usra d0, d0, #31
49 %2 = lshr <2 x i32> %0, <i32 31, i32 31>
50 %3 = bitcast <2 x i32> %2 to <1 x i64>
51 %4 = lshr <1 x i64> %3, <i64 31>
52 %5 = or <1 x i64> %4, %3
56 define <4 x i16> @ssra_v4i16(<4 x i16> %0) {
57 ; CHECK-LABEL: ssra_v4i16:
59 ; CHECK-NEXT: ushr v1.4h, v0.4h, #15
60 ; CHECK-NEXT: bic v0.4h, #64, lsl #8
61 ; CHECK-NEXT: ssra v1.4h, v0.4h, #14
62 ; CHECK-NEXT: fmov d0, d1
64 ; set the 15th bit to zero. e.g. 0b1111111111111111 to 0b1011111111111111
65 %2 = and <4 x i16> %0, <i16 49151, i16 49151,i16 49151,i16 49151>
66 ; the first 15 bits are zero, the last bit can be zero or one. e.g. 0b1011111111111111 to 0b0000000000000001
67 %3 = lshr <4 x i16> %0, <i16 15, i16 15, i16 15, i16 15>
68 ; the first 15 bits maybe 1, and the last bit is zero. 0b1011111111111111 to 0b1111111111111110
69 %4 = ashr <4 x i16> %2, <i16 14, i16 14, i16 14, i16 14>
70 %5 = or <4 x i16> %3, %4
74 define <4 x i32> @ssra_v4i32(<4 x i32> %0) {
75 ; CHECK-LABEL: ssra_v4i32:
77 ; CHECK-NEXT: ushr v1.4s, v0.4s, #31
78 ; CHECK-NEXT: bic v0.4s, #64, lsl #24
79 ; CHECK-NEXT: ssra v1.4s, v0.4s, #30
80 ; CHECK-NEXT: mov v0.16b, v1.16b
82 ; set the 31th bit to zero.
83 %2 = and <4 x i32> %0, <i32 3221225471, i32 3221225471,i32 3221225471,i32 3221225471>
84 ; the first 31 bits are zero, the last bit can be zero or one.
85 %3 = lshr <4 x i32> %0, <i32 31, i32 31, i32 31, i32 31>
86 ; the first 31 bits maybe 1, and the last bit is zero.
87 %4 = ashr <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30>
88 %5 = or <4 x i32> %3, %4
92 define <1 x i64> @ssra_v1i64(<2 x i32> %0) {
93 ; CHECK-LABEL: ssra_v1i64:
95 ; CHECK-NEXT: ushr d1, d0, #63
96 ; CHECK-NEXT: bic v0.2s, #64, lsl #24
97 ; CHECK-NEXT: ssra d1, d0, #62
98 ; CHECK-NEXT: fmov d0, d1
100 %2 = and <2 x i32> %0, <i32 3221225471, i32 3221225471>
101 %3 = bitcast <2 x i32> %2 to <1 x i64>
102 %4 = lshr <1 x i64> %3, <i64 63>
103 %5 = ashr <1 x i64> %3, <i64 62>
104 %6 = or <1 x i64> %4, %5
108 define <2 x i64> @ssra_v2i64(<4 x i32> %0) {
109 ; CHECK-LABEL: ssra_v2i64:
111 ; CHECK-NEXT: ushr v1.2d, v0.2d, #63
112 ; CHECK-NEXT: bic v0.4s, #64, lsl #24
113 ; CHECK-NEXT: ssra v1.2d, v0.2d, #62
114 ; CHECK-NEXT: mov v0.16b, v1.16b
116 %2 = and <4 x i32> %0, <i32 3221225471, i32 3221225471,i32 3221225471,i32 3221225471>
117 %3 = bitcast <4 x i32> %2 to <2 x i64>
118 %4 = lshr <2 x i64> %3, <i64 63, i64 63>
119 %5 = ashr <2 x i64> %3, <i64 62, i64 62>
120 %6 = or <2 x i64> %4, %5
124 ; Expected to be able to deduce movi is generate a vector of integer
125 ; and turn USHR+ORR into USRA.
126 define <8 x i16> @usra_with_movi_v8i16(<16 x i8> %0, <16 x i8> %1) {
127 ; CHECK-LABEL: usra_with_movi_v8i16:
129 ; CHECK-NEXT: movi v2.16b, #1
130 ; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
131 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
132 ; CHECK-NEXT: usra v0.8h, v0.8h, #7
134 %3 = icmp eq <16 x i8> %0, %1
135 %4 = zext <16 x i1> %3 to <16 x i8>
136 %5 = bitcast <16 x i8> %4 to <8 x i16>
137 %6 = lshr <8 x i16> %5, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
138 %7 = or <8 x i16> %6, %5
142 ; Expected to be able to deduce movi is generate a vector of integer
143 ; and turn USHR+ORR into USRA.
144 define <4 x i32> @usra_with_movi_v4i32(<16 x i8> %0, <16 x i8> %1) {
145 ; CHECK-LABEL: usra_with_movi_v4i32:
147 ; CHECK-NEXT: movi v2.16b, #1
148 ; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
149 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
150 ; CHECK-NEXT: usra v0.4s, v0.4s, #15
152 %3 = icmp eq <16 x i8> %0, %1
153 %4 = zext <16 x i1> %3 to <16 x i8>
154 %5 = bitcast <16 x i8> %4 to <4 x i32>
155 %6 = lshr <4 x i32> %5, <i32 15, i32 15, i32 15, i32 15>
156 %7 = or <4 x i32> %6, %5
160 ; Expected to be able to deduce movi is generate a vector of integer
161 ; and turn USHR+ORR into USRA.
162 define <2 x i64> @usra_with_movi_v2i64(<16 x i8> %0, <16 x i8> %1) {
163 ; CHECK-LABEL: usra_with_movi_v2i64:
165 ; CHECK-NEXT: movi v2.16b, #1
166 ; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
167 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
168 ; CHECK-NEXT: usra v0.2d, v0.2d, #31
170 %3 = icmp eq <16 x i8> %0, %1
171 %4 = zext <16 x i1> %3 to <16 x i8>
172 %5 = bitcast <16 x i8> %4 to <2 x i64>
173 %6 = lshr <2 x i64> %5, <i64 31, i64 31>
174 %7 = or <2 x i64> %6, %5