1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3 ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
5 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6 ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
9 ; This test case tests spilling the CR EQ bit on Power10. On Power10, this is
10 ; achieved by setb %reg, %CRREG (eq bit) -> stw %reg, $FI instead of:
11 ; mfocrf %reg, %CRREG -> rlwinm %reg1, %reg, $SH, 0, 0 -> stw %reg1, $FI.
13 ; Without fine-grained control over clobbering individual CR bits,
14 ; it is difficult to produce a concise test case that will ensure a specific
15 ; bit of any CR field is spilled. We need to test the spilling of a CR bit
16 ; other than the LT bit. Hence this test case is rather complex.
18 %0 = type { i32, ptr, ptr, [1 x i8], ptr, ptr, ptr, ptr, i64, i32, [20 x i8] }
19 %1 = type { ptr, ptr, i32 }
20 %2 = type { [200 x i8], [200 x i8], ptr, ptr, ptr, ptr, ptr, ptr, ptr, i64 }
21 %3 = type { i64, i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i64, i32, i32 }
22 %4 = type { i32, i64, ptr, ptr, i16, ptr, ptr, i64, i64 }
24 define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 {
25 ; CHECK-LABEL: P10_Spill_CR_EQ:
26 ; CHECK: # %bb.0: # %bb
27 ; CHECK-NEXT: mfcr r12
28 ; CHECK-NEXT: stw r12, 8(r1)
29 ; CHECK-NEXT: ld r3, 0(r3)
30 ; CHECK-NEXT: ld r4, 0(0)
31 ; CHECK-NEXT: ld r5, 56(0)
32 ; CHECK-NEXT: cmpdi r3, 0
33 ; CHECK-NEXT: cmpdi cr1, r4, 0
34 ; CHECK-NEXT: cmpdi cr5, r5, 0
35 ; CHECK-NEXT: cmpldi cr6, r3, 0
36 ; CHECK-NEXT: beq cr6, .LBB0_3
37 ; CHECK-NEXT: # %bb.1: # %bb10
38 ; CHECK-NEXT: lwz r3, 0(r3)
39 ; CHECK-NEXT: bc 12, 4*cr1+eq, .LBB0_4
40 ; CHECK-NEXT: .LBB0_2: # %bb14
41 ; CHECK-NEXT: lwz r5, 0(r3)
42 ; CHECK-NEXT: b .LBB0_5
43 ; CHECK-NEXT: .LBB0_3:
44 ; CHECK-NEXT: # implicit-def: $r3
45 ; CHECK-NEXT: bc 4, 4*cr1+eq, .LBB0_2
46 ; CHECK-NEXT: .LBB0_4:
47 ; CHECK-NEXT: # implicit-def: $r5
48 ; CHECK-NEXT: .LBB0_5: # %bb16
49 ; CHECK-NEXT: crnot 4*cr1+lt, eq
50 ; CHECK-NEXT: crnot 4*cr5+un, 4*cr5+eq
51 ; CHECK-NEXT: bc 12, 4*cr5+eq, .LBB0_7
52 ; CHECK-NEXT: # %bb.6: # %bb18
53 ; CHECK-NEXT: lwz r4, 0(r3)
54 ; CHECK-NEXT: b .LBB0_8
55 ; CHECK-NEXT: .LBB0_7:
56 ; CHECK-NEXT: # implicit-def: $r4
57 ; CHECK-NEXT: .LBB0_8: # %bb20
58 ; CHECK-NEXT: cmpwi cr2, r3, -1
59 ; CHECK-NEXT: cmpwi cr3, r4, -1
60 ; CHECK-NEXT: cmpwi cr7, r3, 0
61 ; CHECK-NEXT: cmpwi cr6, r4, 0
62 ; CHECK-NEXT: # implicit-def: $x3
63 ; CHECK-NEXT: crand 4*cr5+gt, 4*cr2+gt, 4*cr1+lt
64 ; CHECK-NEXT: crand 4*cr5+lt, 4*cr3+gt, 4*cr5+un
65 ; CHECK-NEXT: bc 4, 4*cr5+gt, .LBB0_10
66 ; CHECK-NEXT: # %bb.9: # %bb34
67 ; CHECK-NEXT: ld r3, 0(r3)
68 ; CHECK-NEXT: .LBB0_10: # %bb36
69 ; CHECK-NEXT: cmpwi cr2, r5, 0
70 ; CHECK-NEXT: # implicit-def: $x4
71 ; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_12
72 ; CHECK-NEXT: # %bb.11: # %bb38
73 ; CHECK-NEXT: ld r4, 0(r3)
74 ; CHECK-NEXT: .LBB0_12: # %bb40
75 ; CHECK-NEXT: crand 4*cr6+gt, 4*cr7+lt, 4*cr1+lt
76 ; CHECK-NEXT: crand 4*cr6+lt, 4*cr6+lt, 4*cr5+un
77 ; CHECK-NEXT: crnot 4*cr6+un, 4*cr1+eq
78 ; CHECK-NEXT: # implicit-def: $x6
79 ; CHECK-NEXT: bc 4, 4*cr6+lt, .LBB0_14
80 ; CHECK-NEXT: # %bb.13: # %bb48
81 ; CHECK-NEXT: ld r6, 0(r3)
82 ; CHECK-NEXT: .LBB0_14: # %bb50
83 ; CHECK-NEXT: cmpwi cr3, r5, -1
84 ; CHECK-NEXT: crand 4*cr7+lt, 4*cr2+lt, 4*cr6+un
85 ; CHECK-NEXT: # implicit-def: $r5
86 ; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_16
87 ; CHECK-NEXT: # %bb.15: # %bb52
88 ; CHECK-NEXT: lwz r5, 0(r3)
89 ; CHECK-NEXT: .LBB0_16: # %bb54
90 ; CHECK-NEXT: mfocrf r7, 128
91 ; CHECK-NEXT: stw r7, -4(r1)
92 ; CHECK-NEXT: # implicit-def: $r7
93 ; CHECK-NEXT: bc 4, 4*cr7+lt, .LBB0_18
94 ; CHECK-NEXT: # %bb.17: # %bb56
95 ; CHECK-NEXT: lwz r7, 0(r3)
96 ; CHECK-NEXT: .LBB0_18: # %bb58
97 ; CHECK-NEXT: lwz r6, 92(r6)
98 ; CHECK-NEXT: crand 4*cr7+un, 4*cr3+gt, 4*cr6+un
99 ; CHECK-NEXT: cmpwi cr3, r5, 1
100 ; CHECK-NEXT: cmpwi cr4, r7, 1
101 ; CHECK-NEXT: crand 4*cr7+gt, 4*cr7+eq, 4*cr1+lt
102 ; CHECK-NEXT: # implicit-def: $x5
103 ; CHECK-NEXT: crand 4*cr6+un, 4*cr2+eq, 4*cr6+un
104 ; CHECK-NEXT: crand 4*cr5+un, 4*cr6+eq, 4*cr5+un
105 ; CHECK-NEXT: crand 4*cr6+gt, 4*cr3+lt, 4*cr6+gt
106 ; CHECK-NEXT: crand 4*cr7+lt, 4*cr4+lt, 4*cr7+lt
107 ; CHECK-NEXT: cmpwi r6, 1
108 ; CHECK-NEXT: crand 4*cr6+lt, lt, 4*cr6+lt
109 ; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_20
110 ; CHECK-NEXT: # %bb.19: # %bb68
111 ; CHECK-NEXT: ld r5, 0(r3)
112 ; CHECK-NEXT: .LBB0_20: # %bb70
113 ; CHECK-NEXT: ld r6, 0(r3)
114 ; CHECK-NEXT: lwz r9, -4(r1)
115 ; CHECK-NEXT: crandc 4*cr5+gt, 4*cr5+gt, 4*cr7+eq
116 ; CHECK-NEXT: crandc 4*cr7+eq, 4*cr7+un, 4*cr2+eq
117 ; CHECK-NEXT: crandc 4*cr5+lt, 4*cr5+lt, 4*cr6+eq
118 ; CHECK-NEXT: setbc r7, 4*cr6+un
119 ; CHECK-NEXT: setbc r8, 4*cr5+un
120 ; CHECK-NEXT: lwz r12, 8(r1)
121 ; CHECK-NEXT: xxlxor f2, f2, f2
122 ; CHECK-NEXT: isel r3, r3, r5, 4*cr5+gt
123 ; CHECK-NEXT: setbc r5, 4*cr7+gt
124 ; CHECK-NEXT: crnor 4*cr5+gt, 4*cr6+gt, 4*cr5+gt
125 ; CHECK-NEXT: crnor 4*cr6+gt, 4*cr7+lt, 4*cr7+eq
126 ; CHECK-NEXT: crnor 4*cr5+lt, 4*cr6+lt, 4*cr5+lt
127 ; CHECK-NEXT: add r5, r7, r5
128 ; CHECK-NEXT: add r5, r8, r5
129 ; CHECK-NEXT: isel r3, 0, r3, 4*cr5+gt
130 ; CHECK-NEXT: isel r4, 0, r4, 4*cr5+lt
131 ; CHECK-NEXT: isel r6, 0, r6, 4*cr6+gt
132 ; CHECK-NEXT: mtocrf 128, r9
133 ; CHECK-NEXT: mtfprd f0, r5
134 ; CHECK-NEXT: isel r4, 0, r4, 4*cr5+eq
135 ; CHECK-NEXT: mtocrf 32, r12
136 ; CHECK-NEXT: mtocrf 16, r12
137 ; CHECK-NEXT: mtocrf 8, r12
138 ; CHECK-NEXT: iseleq r3, 0, r3
139 ; CHECK-NEXT: isel r6, 0, r6, 4*cr1+eq
140 ; CHECK-NEXT: xscvsxddp f0, f0
141 ; CHECK-NEXT: add r3, r6, r3
142 ; CHECK-NEXT: add r3, r4, r3
143 ; CHECK-NEXT: mtfprd f1, r3
144 ; CHECK-NEXT: xsmuldp f0, f0, f2
145 ; CHECK-NEXT: xscvsxddp f1, f1
146 ; CHECK-NEXT: xsadddp f1, f0, f1
149 %tmp = getelementptr inbounds %4, ptr null, i64 undef, i32 7
150 %tmp1 = load i64, ptr undef, align 8
151 %tmp2 = load i64, ptr null, align 8
152 %tmp3 = load i64, ptr %tmp, align 8
153 %tmp4 = icmp eq i64 %tmp1, 0
154 %tmp5 = icmp eq i64 %tmp2, 0
155 %tmp6 = icmp eq i64 %tmp3, 0
156 %tmp7 = xor i1 %tmp4, true
157 %tmp8 = xor i1 %tmp5, true
158 %tmp9 = xor i1 %tmp6, true
159 br i1 %tmp4, label %bb12, label %bb10
162 %tmp11 = load i32, ptr undef, align 8
165 bb12: ; preds = %bb10, %bb
166 %tmp13 = phi i32 [ undef, %bb ], [ %tmp11, %bb10 ]
167 br i1 %tmp5, label %bb16, label %bb14
169 bb14: ; preds = %bb12
170 %tmp15 = load i32, ptr undef, align 8
173 bb16: ; preds = %bb14, %bb12
174 %tmp17 = phi i32 [ undef, %bb12 ], [ %tmp15, %bb14 ]
175 br i1 %tmp6, label %bb20, label %bb18
177 bb18: ; preds = %bb16
178 %tmp19 = load i32, ptr undef, align 8
181 bb20: ; preds = %bb18, %bb16
182 %tmp21 = phi i32 [ undef, %bb16 ], [ %tmp19, %bb18 ]
183 %tmp22 = icmp slt i32 %tmp13, 0
184 %tmp23 = icmp slt i32 %tmp17, 0
185 %tmp24 = icmp slt i32 %tmp21, 0
186 %tmp25 = icmp eq i32 %tmp13, 0
187 %tmp26 = icmp eq i32 %tmp17, 0
188 %tmp27 = icmp eq i32 %tmp21, 0
189 %tmp28 = xor i1 %tmp22, true
190 %tmp29 = xor i1 %tmp23, true
191 %tmp30 = xor i1 %tmp24, true
192 %tmp31 = and i1 %tmp28, %tmp7
193 %tmp32 = and i1 %tmp29, %tmp8
194 %tmp33 = and i1 %tmp30, %tmp9
195 br i1 %tmp31, label %bb34, label %bb36
197 bb34: ; preds = %bb20
198 %tmp35 = load i64, ptr undef, align 8
201 bb36: ; preds = %bb34, %bb20
202 %tmp37 = phi i64 [ undef, %bb20 ], [ %tmp35, %bb34 ]
203 br i1 %tmp33, label %bb38, label %bb40
205 bb38: ; preds = %bb36
206 %tmp39 = load i64, ptr undef, align 8
209 bb40: ; preds = %bb38, %bb36
210 %tmp41 = phi i64 [ undef, %bb36 ], [ %tmp39, %bb38 ]
211 %tmp42 = and i1 %tmp25, %tmp7
212 %tmp43 = and i1 %tmp26, %tmp8
213 %tmp44 = and i1 %tmp27, %tmp9
214 %tmp45 = and i1 %tmp22, %tmp7
215 %tmp46 = and i1 %tmp23, %tmp8
216 %tmp47 = and i1 %tmp24, %tmp9
217 br i1 %tmp47, label %bb48, label %bb50
219 bb48: ; preds = %bb40
220 %tmp49 = load ptr, ptr undef, align 8
223 bb50: ; preds = %bb48, %bb40
224 %tmp51 = phi ptr [ undef, %bb40 ], [ %tmp49, %bb48 ]
225 br i1 %tmp45, label %bb52, label %bb54
227 bb52: ; preds = %bb50
228 %tmp53 = load i32, ptr undef, align 8
231 bb54: ; preds = %bb52, %bb50
232 %tmp55 = phi i32 [ undef, %bb50 ], [ %tmp53, %bb52 ]
233 br i1 %tmp46, label %bb56, label %bb58
235 bb56: ; preds = %bb54
236 %tmp57 = load i32, ptr undef, align 8
239 bb58: ; preds = %bb56, %bb54
240 %tmp59 = phi i32 [ undef, %bb54 ], [ %tmp57, %bb56 ]
241 %tmp60 = getelementptr inbounds %3, ptr %tmp51, i64 0, i32 12
242 %tmp61 = load i32, ptr %tmp60, align 8
243 %tmp62 = icmp slt i32 %tmp55, 1
244 %tmp63 = icmp slt i32 %tmp59, 1
245 %tmp64 = icmp slt i32 %tmp61, 1
246 %tmp65 = and i1 %tmp62, %tmp45
247 %tmp66 = and i1 %tmp63, %tmp46
248 %tmp67 = and i1 %tmp64, %tmp47
249 br i1 %tmp65, label %bb68, label %bb70
251 bb68: ; preds = %bb58
252 %tmp69 = load i64, ptr undef, align 8
255 bb70: ; preds = %bb68, %bb58
256 %tmp71 = phi i64 [ undef, %bb58 ], [ %tmp69, %bb68 ]
257 %tmp72 = load i64, ptr undef, align 8
258 %tmp73 = xor i1 %tmp25, true
259 %tmp74 = xor i1 %tmp26, true
260 %tmp75 = xor i1 %tmp27, true
261 %tmp76 = and i1 %tmp31, %tmp73
262 %tmp77 = and i1 %tmp32, %tmp74
263 %tmp78 = and i1 %tmp33, %tmp75
264 %tmp79 = select i1 %tmp76, i64 %tmp37, i64 %tmp71
265 %tmp80 = select i1 %tmp77, i64 undef, i64 %tmp72
266 %tmp81 = select i1 %tmp78, i64 %tmp41, i64 undef
267 %tmp82 = or i1 %tmp65, %tmp76
268 %tmp83 = or i1 %tmp66, %tmp77
269 %tmp84 = or i1 %tmp67, %tmp78
270 %tmp85 = zext i1 %tmp42 to i64
271 %tmp86 = add i64 0, %tmp85
272 %tmp87 = zext i1 %tmp43 to i64
273 %tmp88 = add i64 0, %tmp87
274 %tmp89 = zext i1 %tmp44 to i64
275 %tmp90 = add i64 0, %tmp89
276 %tmp91 = select i1 %tmp82, i64 %tmp79, i64 0
277 %tmp92 = add i64 0, %tmp91
278 %tmp93 = select i1 %tmp83, i64 %tmp80, i64 0
279 %tmp94 = add i64 0, %tmp93
280 %tmp95 = select i1 %tmp84, i64 %tmp81, i64 0
281 %tmp96 = add i64 0, %tmp95
282 %tmp97 = select i1 %tmp42, i64 undef, i64 %tmp92
283 %tmp98 = select i1 %tmp43, i64 undef, i64 %tmp94
284 %tmp99 = select i1 %tmp44, i64 undef, i64 %tmp96
285 %tmp100 = select i1 %tmp4, i64 0, i64 %tmp97
286 %tmp101 = select i1 %tmp5, i64 0, i64 %tmp98
287 %tmp102 = select i1 %tmp6, i64 0, i64 %tmp99
288 %tmp103 = add i64 %tmp88, %tmp86
289 %tmp104 = add i64 %tmp90, %tmp103
290 %tmp105 = add i64 0, %tmp104
291 %tmp106 = add i64 %tmp101, %tmp100
292 %tmp107 = add i64 %tmp102, %tmp106
293 %tmp108 = add i64 0, %tmp107
294 %tmp109 = sitofp i64 %tmp105 to double
295 %tmp110 = sitofp i64 %tmp108 to double
296 %tmp111 = fmul double %tmp109, 0.000000e+00
297 %tmp112 = fadd double %tmp111, %tmp110