1 # RUN: llc -mtriple=aarch64-linux-gnu -mcpu=falkor -run-pass aarch64-falkor-hwpf-fix-late -o - %s | FileCheck %s
3 # Verify that the tag collision between the loads is resolved for various load opcodes.
5 # CHECK-LABEL: name: hwpf1
6 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
7 # CHECK: LDRWui $[[BASE]], 0
10 tracksRegLiveness: true
15 $w2 = LDRWui $x1, 0 :: ("aarch64-strided-access" load (s32))
18 $w0 = SUBWri $w0, 1, 0
19 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
20 Bcc 9, %bb.0, implicit $nzcv
26 # CHECK-LABEL: name: hwpf2
27 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
28 # CHECK: LD1i64 $q2, 0, $[[BASE]]
29 # CHECK: LDRWui $x1, 0
31 tracksRegLiveness: true
34 liveins: $w0, $x1, $q2
36 $q2 = LD1i64 $q2, 0, $x1 :: ("aarch64-strided-access" load (s32))
39 $w0 = SUBWri $w0, 1, 0
40 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
41 Bcc 9, %bb.0, implicit $nzcv
47 # CHECK-LABEL: name: hwpf3
48 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
49 # CHECK: LD1i8 $q2, 0, $[[BASE]]
50 # CHECK: LDRWui $x1, 0
52 tracksRegLiveness: true
55 liveins: $w0, $x1, $q2
57 $q2 = LD1i8 $q2, 0, $x1 :: ("aarch64-strided-access" load (s32))
60 $w0 = SUBWri $w0, 1, 0
61 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
62 Bcc 9, %bb.0, implicit $nzcv
68 # CHECK-LABEL: name: hwpf4
69 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
70 # CHECK: LD1Onev1d $[[BASE]]
71 # CHECK: LDRWui $x1, 0
73 tracksRegLiveness: true
78 $d2 = LD1Onev1d $x1 :: ("aarch64-strided-access" load (s32))
81 $w0 = SUBWri $w0, 1, 0
82 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
83 Bcc 9, %bb.0, implicit $nzcv
89 # CHECK-LABEL: name: hwpf5
90 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
91 # CHECK: LD1Twov1d $[[BASE]]
92 # CHECK: LDRWui $x1, 0
94 tracksRegLiveness: true
99 $d2_d3 = LD1Twov1d $x1 :: ("aarch64-strided-access" load (s32))
102 $w0 = SUBWri $w0, 1, 0
103 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
104 Bcc 9, %bb.0, implicit $nzcv
110 # CHECK-LABEL: name: hwpf6
111 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
112 # CHECK: LDPQi $[[BASE]]
113 # CHECK: LDRWui $x1, 3
115 tracksRegLiveness: true
120 $q2, $q3 = LDPQi $x1, 3 :: ("aarch64-strided-access" load (s32))
123 $w0 = SUBWri $w0, 1, 0
124 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
125 Bcc 9, %bb.0, implicit $nzcv
131 # CHECK-LABEL: name: hwpf7
132 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
133 # CHECK: LDPXi $[[BASE]]
134 # CHECK: LDRWui $x1, 2
136 tracksRegLiveness: true
141 $x2, $x3 = LDPXi $x1, 3 :: ("aarch64-strided-access" load (s32))
144 $w0 = SUBWri $w0, 1, 0
145 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
146 Bcc 9, %bb.0, implicit $nzcv
152 # Verify that the tag collision between the loads is resolved and written back
153 # for post increment addressing for various load opcodes.
155 # CHECK-LABEL: name: hwpfinc1
156 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
157 # CHECK: LDRWpost $[[BASE]], 0
158 # CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0
159 # CHECK: LDRWui $x1, 1
161 tracksRegLiveness: true
166 $x1, $w2 = LDRWpost $x1, 0 :: ("aarch64-strided-access" load (s32))
169 $w0 = SUBWri $w0, 1, 0
170 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
171 Bcc 9, %bb.0, implicit $nzcv
177 # CHECK-LABEL: name: hwpfinc2
178 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
179 # CHECK: LD1i64_POST $q2, 0, $[[BASE]]
180 # CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0
181 # CHECK: LDRWui $x1, 1
183 tracksRegLiveness: true
186 liveins: $w0, $x1, $q2
188 $x1, $q2 = LD1i64_POST $q2, 0, $x1, $x1 :: ("aarch64-strided-access" load (s32))
189 $w2 = LDRWui $x1, 132
191 $w0 = SUBWri $w0, 1, 0
192 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
193 Bcc 9, %bb.0, implicit $nzcv
199 # CHECK-LABEL: name: hwpfinc3
200 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
201 # CHECK: LD1i8_POST $q2, 0, $[[BASE]]
202 # CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0
203 # CHECK: LDRWui $x1, 132
205 tracksRegLiveness: true
208 liveins: $w0, $x1, $q2
210 $x1, $q2 = LD1i8_POST $q2, 0, $x1, $x1 :: ("aarch64-strided-access" load (s32))
211 $w0 = LDRWui $x1, 132
213 $w0 = SUBWri $w0, 1, 0
214 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
215 Bcc 9, %bb.0, implicit $nzcv
221 # CHECK-LABEL: name: hwpfinc4
222 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
223 # CHECK: LD1Rv1d_POST $[[BASE]]
224 # CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0
225 # CHECK: LDRWui $x1, 252
227 tracksRegLiveness: true
230 liveins: $w0, $x1, $q2
232 $x1, $d2 = LD1Rv1d_POST $x1, $xzr :: ("aarch64-strided-access" load (s32))
233 $w2 = LDRWui $x1, 252
235 $w0 = SUBWri $w0, 1, 0
236 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
237 Bcc 9, %bb.0, implicit $nzcv
243 # CHECK-LABEL: name: hwpfinc5
244 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
245 # CHECK: LD3Threev2s_POST $[[BASE]]
246 # CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0
247 # CHECK: LDRWroX $x17, $x0
249 tracksRegLiveness: true
252 liveins: $w0, $x1, $x17, $q2
254 $x1, $d2_d3_d4 = LD3Threev2s_POST $x1, $x0 :: ("aarch64-strided-access" load (s32))
255 $w0 = LDRWroX $x17, $x0, 0, 0
257 $w0 = SUBWri $w0, 1, 0
258 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
259 Bcc 9, %bb.0, implicit $nzcv
265 # CHECK-LABEL: name: hwpfinc6
266 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
267 # CHECK: LDPDpost $[[BASE]]
268 # CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0
269 # CHECK: LDRWui $x17, 2
271 tracksRegLiveness: true
274 liveins: $w0, $x1, $x17, $q2
276 $x1, $d2, $d3 = LDPDpost $x1, 3 :: ("aarch64-strided-access" load (s32))
277 $w16 = LDRWui $x17, 2
279 $w0 = SUBWri $w0, 1, 0
280 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
281 Bcc 9, %bb.0, implicit $nzcv
287 # CHECK-LABEL: name: hwpfinc7
288 # CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0
289 # CHECK: LDPXpost $[[BASE]]
290 # CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0
291 # CHECK: LDRWui $x17, 2
293 tracksRegLiveness: true
296 liveins: $w0, $x1, $x17, $q2
298 $x1, $x2, $x3 = LDPXpost $x1, 3 :: ("aarch64-strided-access" load (s32))
299 $w18 = LDRWui $x17, 2
301 $w0 = SUBWri $w0, 1, 0
302 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
303 Bcc 9, %bb.0, implicit $nzcv
309 # Check that we handle case of strided load with no HW prefetcher tag correctly.
311 # CHECK-LABEL: name: hwpf_notagbug
312 # CHECK-NOT: ORRXrs $xzr
314 # CHECK-NOT: ORRXrs $xzr
317 tracksRegLiveness: true
320 liveins: $w0, $x1, $x17
322 $w1 = LDARW $x1 :: ("aarch64-strided-access" load (s32))
323 $w1 = LDRWui $x1, 0 :: ("aarch64-strided-access" load (s32))
324 $w17 = LDRWui $x17, 0 :: ("aarch64-strided-access" load (s32))
326 $w0 = SUBWri $w0, 1, 0
327 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
328 Bcc 9, %bb.0, implicit $nzcv
334 # Check that we treat sp based loads as non-prefetching.
336 # CHECK-LABEL: name: hwpf_spbase
337 # CHECK-NOT: ORRXrs $xzr
341 tracksRegLiveness: true
346 $w1 = LDRWui $x15, 0 :: ("aarch64-strided-access" load (s32))
349 $w0 = SUBWri $w0, 1, 0
350 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
351 Bcc 9, %bb.0, implicit $nzcv
357 # Check that non-base registers are considered live when finding a
358 # scratch register by making sure we don't use $x2 for the scratch
359 # register for the inserted ORRXrs.
360 # CHECK-LABEL: name: hwpf_offreg
361 # CHECK: $x3 = ORRXrs $xzr, $x1, 0
362 # CHECK: $w10 = LDRWroX $x3, $x2, 0, 0
364 tracksRegLiveness: true
367 liveins: $w0, $x1, $x2, $x17, $x18
369 $w10 = LDRWroX $x1, $x2, 0, 0 :: ("aarch64-strided-access" load (s32))
371 $x2 = ORRXrs $xzr, $x10, 0
372 $w26 = LDRWroX $x1, $x2, 0, 0
374 $w0 = SUBWri $w0, 1, 0
375 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
376 Bcc 9, %bb.0, implicit $nzcv