1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mcpu cortex-a53 -mtriple=aarch64-eabi | FileCheck %s --check-prefix=A53
4 ; PR26827 - Merge stores causes wrong dependency.
5 %struct1 = type { ptr, ptr, i32, i32, i16, i16, ptr, ptr }
6 @gv0 = internal unnamed_addr global i32 0, align 4
7 @gv1 = internal unnamed_addr global ptr null, align 8
9 define void @test(ptr %fde, i32 %fd, ptr %func, ptr %arg) uwtable {
12 ; A53: // %bb.0: // %entry
13 ; A53-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
14 ; A53-NEXT: .cfi_def_cfa_offset 16
15 ; A53-NEXT: .cfi_offset w19, -8
16 ; A53-NEXT: .cfi_offset w30, -16
17 ; A53-NEXT: .cfi_remember_state
18 ; A53-NEXT: movi v0.2d, #0000000000000000
19 ; A53-NEXT: mov x8, x0
20 ; A53-NEXT: mov x19, x8
21 ; A53-NEXT: mov w0, w1
22 ; A53-NEXT: mov w9, #256
23 ; A53-NEXT: stp x2, x3, [x8, #32]
24 ; A53-NEXT: mov x2, x8
25 ; A53-NEXT: str q0, [x19, #16]!
26 ; A53-NEXT: str w1, [x19]
27 ; A53-NEXT: mov w1, #4
28 ; A53-NEXT: str q0, [x8]
29 ; A53-NEXT: strh w9, [x8, #24]
30 ; A53-NEXT: str wzr, [x8, #20]
32 ; A53-NEXT: adrp x9, gv0
33 ; A53-NEXT: add x9, x9, :lo12:gv0
34 ; A53-NEXT: cmp x19, x9
35 ; A53-NEXT: b.eq .LBB0_4
37 ; A53-NEXT: ldr w8, [x19]
38 ; A53-NEXT: ldr w9, [x9]
39 ; A53-NEXT: .p2align 4, , 8
40 ; A53-NEXT: .LBB0_2: // %while.body.i.split.ver.us
41 ; A53-NEXT: // =>This Inner Loop Header: Depth=1
42 ; A53-NEXT: lsl w9, w9, #1
43 ; A53-NEXT: cmp w9, w8
44 ; A53-NEXT: b.le .LBB0_2
45 ; A53-NEXT: // %bb.3: // %while.end.i
47 ; A53-NEXT: adrp x8, gv1
48 ; A53-NEXT: str x0, [x8, :lo12:gv1]
49 ; A53-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
50 ; A53-NEXT: .cfi_def_cfa_offset 0
51 ; A53-NEXT: .cfi_restore w19
52 ; A53-NEXT: .cfi_restore w30
54 ; A53-NEXT: .p2align 4, , 8
55 ; A53-NEXT: .LBB0_4: // %while.body.i.split
56 ; A53-NEXT: // =>This Inner Loop Header: Depth=1
57 ; A53-NEXT: .cfi_restore_state
60 tail call void @llvm.memset.p0.i64(ptr align 8 %fde, i8 0, i64 40, i1 false)
61 %state = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 4
62 store i16 256, ptr %state, align 8
63 %fd1 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 2
64 store i32 %fd, ptr %fd1, align 8
65 %force_eof = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 3
66 store i32 0, ptr %force_eof, align 4
67 %func2 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 6
68 store ptr %func, ptr %func2, align 8
69 %arg3 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 7
70 store ptr %arg, ptr %arg3, align 8
71 %call = tail call i32 (i32, i32, ...) @fcntl(i32 %fd, i32 4, ptr %fde) #6
72 %0 = load i32, ptr %fd1, align 8
73 %cmp.i = icmp slt i32 %0, 0
74 br i1 %cmp.i, label %if.then.i, label %while.body.i.preheader
78 while.body.i.preheader:
79 %1 = load i32, ptr @gv0, align 4
80 %2 = icmp eq ptr %fd1, @gv0
81 br i1 %2, label %while.body.i.split, label %while.body.i.split.ver.us.preheader
83 while.body.i.split.ver.us.preheader:
84 br label %while.body.i.split.ver.us
86 while.body.i.split.ver.us:
87 %.reg2mem21.0 = phi i32 [ %mul.i.ver.us, %while.body.i.split.ver.us ], [ %1, %while.body.i.split.ver.us.preheader ]
88 %mul.i.ver.us = shl nsw i32 %.reg2mem21.0, 1
89 %3 = icmp sgt i32 %mul.i.ver.us, %0
90 br i1 %3, label %while.end.i, label %while.body.i.split.ver.us
93 br label %while.body.i.split
96 %call.i = tail call ptr @foo()
97 store ptr %call.i, ptr @gv1, align 8
106 define void @rotate16_in_place(ptr %p) {
107 ; A53-LABEL: rotate16_in_place:
109 ; A53-NEXT: ldrb w8, [x0, #1]
110 ; A53-NEXT: ldrb w9, [x0]
111 ; A53-NEXT: strb w8, [x0]
112 ; A53-NEXT: strb w9, [x0, #1]
114 %p1 = getelementptr i8, ptr %p, i64 1
115 %i0 = load i8, ptr %p, align 1
116 %i1 = load i8, ptr %p1, align 1
117 store i8 %i1, ptr %p, align 1
118 store i8 %i0, ptr %p1, align 1
124 define void @rotate16(ptr %p, ptr %q) {
125 ; A53-LABEL: rotate16:
127 ; A53-NEXT: ldrb w8, [x0, #1]
128 ; A53-NEXT: ldrb w9, [x0]
129 ; A53-NEXT: strb w8, [x1]
130 ; A53-NEXT: strb w9, [x1, #1]
132 %p1 = getelementptr i8, ptr %p, i64 1
133 %q1 = getelementptr i8, ptr %q, i64 1
134 %i0 = load i8, ptr %p, align 1
135 %i1 = load i8, ptr %p1, align 1
136 store i8 %i1, ptr %q, align 1
137 store i8 %i0, ptr %q1, align 1
141 define void @rotate32_in_place(ptr %p) {
142 ; A53-LABEL: rotate32_in_place:
144 ; A53-NEXT: ldr w8, [x0]
145 ; A53-NEXT: ror w8, w8, #16
146 ; A53-NEXT: str w8, [x0]
148 %p1 = getelementptr i16, ptr %p, i64 1
149 %i0 = load i16, ptr %p, align 2
150 %i1 = load i16, ptr %p1, align 2
151 store i16 %i1, ptr %p, align 2
152 store i16 %i0, ptr %p1, align 2
156 define void @rotate32(ptr %p) {
157 ; A53-LABEL: rotate32:
159 ; A53-NEXT: ldr w8, [x0]
160 ; A53-NEXT: ror w8, w8, #16
161 ; A53-NEXT: str w8, [x0, #84]
163 %p1 = getelementptr i16, ptr %p, i64 1
164 %p42 = getelementptr i16, ptr %p, i64 42
165 %p43 = getelementptr i16, ptr %p, i64 43
166 %i0 = load i16, ptr %p, align 2
167 %i1 = load i16, ptr %p1, align 2
168 store i16 %i1, ptr %p42, align 2
169 store i16 %i0, ptr %p43, align 2
173 ; Prefer paired memops over rotate.
175 define void @rotate64_in_place(ptr %p) {
176 ; A53-LABEL: rotate64_in_place:
178 ; A53-NEXT: ldp w9, w8, [x0]
179 ; A53-NEXT: stp w8, w9, [x0]
181 %p1 = getelementptr i32, ptr %p, i64 1
182 %i0 = load i32, ptr %p, align 4
183 %i1 = load i32, ptr %p1, align 4
184 store i32 %i1, ptr %p, align 4
185 store i32 %i0, ptr %p1, align 4
189 ; Prefer paired memops over rotate.
191 define void @rotate64(ptr %p) {
192 ; A53-LABEL: rotate64:
194 ; A53-NEXT: ldp w9, w8, [x0]
195 ; A53-NEXT: stp w8, w9, [x0, #8]
197 %p1 = getelementptr i32, ptr %p, i64 1
198 %p2 = getelementptr i32, ptr %p, i64 2
199 %p3 = getelementptr i32, ptr %p, i64 3
200 %i0 = load i32, ptr %p, align 4
201 %i1 = load i32, ptr %p1, align 4
202 store i32 %i1, ptr %p2, align 4
203 store i32 %i0, ptr %p3, align 4
207 declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
208 declare i32 @fcntl(i32, i32, ...)
209 declare noalias ptr @foo()