1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=mips64el-linux-gnu -mcpu=mips64r6 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL64R6
3 ; RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL32R5
5 declare <2 x i64> @llvm.mips.slli.d(<2 x i64>, i32)
6 declare <2 x i64> @llvm.mips.srli.d(<2 x i64>, i32)
8 declare <4 x i32> @llvm.mips.slli.w(<4 x i32>, i32)
9 declare <4 x i32> @llvm.mips.srli.w(<4 x i32>, i32)
11 ; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
12 ; MASK_TYPE1 = C2-C1 0s | 1s | ends with C1 0s
13 define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64(<2 x i64>* %a, <2 x i64>* %b) {
14 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64:
15 ; MIPSEL64R6: # %bb.0: # %entry
16 ; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
17 ; MIPSEL64R6-NEXT: srli.d $w0, $w0, 52
18 ; MIPSEL64R6-NEXT: slli.d $w0, $w0, 51
19 ; MIPSEL64R6-NEXT: jr $ra
20 ; MIPSEL64R6-NEXT: st.d $w0, 0($5)
22 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64:
23 ; MIPSEL32R5: # %bb.0: # %entry
24 ; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
25 ; MIPSEL32R5-NEXT: srli.d $w0, $w0, 52
26 ; MIPSEL32R5-NEXT: slli.d $w0, $w0, 51
27 ; MIPSEL32R5-NEXT: jr $ra
28 ; MIPSEL32R5-NEXT: st.d $w0, 0($5)
30 %0 = load <2 x i64>, <2 x i64>* %a
31 %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 52)
32 %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 51)
33 store <2 x i64> %2, <2 x i64>* %b
37 ; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
38 define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
39 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long:
40 ; MIPSEL64R6: # %bb.0: # %entry
41 ; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
42 ; MIPSEL64R6-NEXT: srli.d $w0, $w0, 6
43 ; MIPSEL64R6-NEXT: slli.d $w0, $w0, 4
44 ; MIPSEL64R6-NEXT: jr $ra
45 ; MIPSEL64R6-NEXT: st.d $w0, 0($5)
47 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long:
48 ; MIPSEL32R5: # %bb.0: # %entry
49 ; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
50 ; MIPSEL32R5-NEXT: srli.d $w0, $w0, 6
51 ; MIPSEL32R5-NEXT: slli.d $w0, $w0, 4
52 ; MIPSEL32R5-NEXT: jr $ra
53 ; MIPSEL32R5-NEXT: st.d $w0, 0($5)
55 %0 = load <2 x i64>, <2 x i64>* %a
56 %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 6)
57 %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 4)
58 store <2 x i64> %2, <2 x i64>* %b
62 ; do not fold (shl (srl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) if C1 >= C2
63 ; MASK_TYPE2 = 1s | C1 zeros
64 define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32(<2 x i64>* %a, <2 x i64>* %b) {
65 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32:
66 ; MIPSEL64R6: # %bb.0: # %entry
67 ; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
68 ; MIPSEL64R6-NEXT: srli.d $w0, $w0, 4
69 ; MIPSEL64R6-NEXT: slli.d $w0, $w0, 6
70 ; MIPSEL64R6-NEXT: jr $ra
71 ; MIPSEL64R6-NEXT: st.d $w0, 0($5)
73 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32:
74 ; MIPSEL32R5: # %bb.0: # %entry
75 ; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
76 ; MIPSEL32R5-NEXT: srli.d $w0, $w0, 4
77 ; MIPSEL32R5-NEXT: slli.d $w0, $w0, 6
78 ; MIPSEL32R5-NEXT: jr $ra
79 ; MIPSEL32R5-NEXT: st.d $w0, 0($5)
81 %0 = load <2 x i64>, <2 x i64>* %a
82 %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 4)
83 %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 6)
84 store <2 x i64> %2, <2 x i64>* %b
88 ; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
89 define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long(<4 x i32>* %a, <4 x i32>* %b) {
90 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long:
91 ; MIPSEL64R6: # %bb.0: # %entry
92 ; MIPSEL64R6-NEXT: ld.w $w0, 0($4)
93 ; MIPSEL64R6-NEXT: srli.w $w0, $w0, 7
94 ; MIPSEL64R6-NEXT: slli.w $w0, $w0, 3
95 ; MIPSEL64R6-NEXT: jr $ra
96 ; MIPSEL64R6-NEXT: st.w $w0, 0($5)
98 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long:
99 ; MIPSEL32R5: # %bb.0: # %entry
100 ; MIPSEL32R5-NEXT: ld.w $w0, 0($4)
101 ; MIPSEL32R5-NEXT: srli.w $w0, $w0, 7
102 ; MIPSEL32R5-NEXT: slli.w $w0, $w0, 3
103 ; MIPSEL32R5-NEXT: jr $ra
104 ; MIPSEL32R5-NEXT: st.w $w0, 0($5)
106 %0 = load <4 x i32>, <4 x i32>* %a
107 %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 7)
108 %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 3)
109 store <4 x i32> %2, <4 x i32>* %b
113 ; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
114 define void @avoid_to_combine_shifts_to_and_mask_type2_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
115 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64_long:
116 ; MIPSEL64R6: # %bb.0: # %entry
117 ; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
118 ; MIPSEL64R6-NEXT: srli.d $w0, $w0, 38
119 ; MIPSEL64R6-NEXT: slli.d $w0, $w0, 38
120 ; MIPSEL64R6-NEXT: jr $ra
121 ; MIPSEL64R6-NEXT: st.d $w0, 0($5)
123 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64_long:
124 ; MIPSEL32R5: # %bb.0: # %entry
125 ; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
126 ; MIPSEL32R5-NEXT: srli.d $w0, $w0, 38
127 ; MIPSEL32R5-NEXT: slli.d $w0, $w0, 38
128 ; MIPSEL32R5-NEXT: jr $ra
129 ; MIPSEL32R5-NEXT: st.d $w0, 0($5)
131 %0 = load <2 x i64>, <2 x i64>* %a
132 %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 38)
133 %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 38)
134 store <2 x i64> %2, <2 x i64>* %b
138 ; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
139 define void @avoid_to_combine_shifts_to_and_mask_type2_i64(<2 x i64>* %a, <2 x i64>* %b) {
140 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64:
141 ; MIPSEL64R6: # %bb.0: # %entry
142 ; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
143 ; MIPSEL64R6-NEXT: srli.d $w0, $w0, 3
144 ; MIPSEL64R6-NEXT: slli.d $w0, $w0, 3
145 ; MIPSEL64R6-NEXT: jr $ra
146 ; MIPSEL64R6-NEXT: st.d $w0, 0($5)
148 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64:
149 ; MIPSEL32R5: # %bb.0: # %entry
150 ; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
151 ; MIPSEL32R5-NEXT: srli.d $w0, $w0, 3
152 ; MIPSEL32R5-NEXT: slli.d $w0, $w0, 3
153 ; MIPSEL32R5-NEXT: jr $ra
154 ; MIPSEL32R5-NEXT: st.d $w0, 0($5)
156 %0 = load <2 x i64>, <2 x i64>* %a
157 %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 3)
158 %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 3)
159 store <2 x i64> %2, <2 x i64>* %b
163 ; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
164 define void @avoid_to_combine_shifts_to_and_mask_type1_long_i32_a(<4 x i32>* %a, <4 x i32>* %b) {
165 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_a:
166 ; MIPSEL64R6: # %bb.0: # %entry
167 ; MIPSEL64R6-NEXT: ld.w $w0, 0($4)
168 ; MIPSEL64R6-NEXT: srli.w $w0, $w0, 5
169 ; MIPSEL64R6-NEXT: slli.w $w0, $w0, 5
170 ; MIPSEL64R6-NEXT: jr $ra
171 ; MIPSEL64R6-NEXT: st.w $w0, 0($5)
173 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_a:
174 ; MIPSEL32R5: # %bb.0: # %entry
175 ; MIPSEL32R5-NEXT: ld.w $w0, 0($4)
176 ; MIPSEL32R5-NEXT: srli.w $w0, $w0, 5
177 ; MIPSEL32R5-NEXT: slli.w $w0, $w0, 5
178 ; MIPSEL32R5-NEXT: jr $ra
179 ; MIPSEL32R5-NEXT: st.w $w0, 0($5)
181 %0 = load <4 x i32>, <4 x i32>* %a
182 %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 5)
183 %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 5)
184 store <4 x i32> %2, <4 x i32>* %b
188 ; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
189 define void @avoid_to_combine_shifts_to_and_mask_type1_long_i32_b(<4 x i32>* %a, <4 x i32>* %b) {
190 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_b:
191 ; MIPSEL64R6: # %bb.0: # %entry
192 ; MIPSEL64R6-NEXT: ld.w $w0, 0($4)
193 ; MIPSEL64R6-NEXT: srli.w $w0, $w0, 30
194 ; MIPSEL64R6-NEXT: slli.w $w0, $w0, 30
195 ; MIPSEL64R6-NEXT: jr $ra
196 ; MIPSEL64R6-NEXT: st.w $w0, 0($5)
198 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_b:
199 ; MIPSEL32R5: # %bb.0: # %entry
200 ; MIPSEL32R5-NEXT: ld.w $w0, 0($4)
201 ; MIPSEL32R5-NEXT: srli.w $w0, $w0, 30
202 ; MIPSEL32R5-NEXT: slli.w $w0, $w0, 30
203 ; MIPSEL32R5-NEXT: jr $ra
204 ; MIPSEL32R5-NEXT: st.w $w0, 0($5)
206 %0 = load <4 x i32>, <4 x i32>* %a
207 %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 30)
208 %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 30)
209 store <4 x i32> %2, <4 x i32>* %b