1 ; Test memory-to-memory ANDs.
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
5 @g1src = dso_local global i8 1
6 @g1dst = dso_local global i8 1
7 @g2src = dso_local global i16 2
8 @g2dst = dso_local global i16 2
10 ; Test the simple i8 case.
11 define dso_local void @f1(i8 *%ptr1) {
13 ; CHECK: nc 1(1,%r2), 0(%r2)
15 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
16 %val = load i8, i8 *%ptr1
17 %old = load i8, i8 *%ptr2
18 %and = and i8 %val, %old
19 store i8 %and, i8 *%ptr2
23 ; ...and again in reverse.
24 define dso_local void @f2(i8 *%ptr1) {
26 ; CHECK: nc 1(1,%r2), 0(%r2)
28 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
29 %val = load i8, i8 *%ptr1
30 %old = load i8, i8 *%ptr2
31 %and = and i8 %old, %val
32 store i8 %and, i8 *%ptr2
36 ; Test i8 cases where one value is zero-extended to 32 bits and the other
38 define dso_local void @f3(i8 *%ptr1) {
40 ; CHECK: nc 1(1,%r2), 0(%r2)
42 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
43 %val = load i8, i8 *%ptr1
44 %extval = zext i8 %val to i32
45 %old = load i8, i8 *%ptr2
46 %extold = sext i8 %old to i32
47 %and = and i32 %extval, %extold
48 %trunc = trunc i32 %and to i8
49 store i8 %trunc, i8 *%ptr2
53 ; ...and again with the extension types reversed.
54 define dso_local void @f4(i8 *%ptr1) {
56 ; CHECK: nc 1(1,%r2), 0(%r2)
58 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
59 %val = load i8, i8 *%ptr1
60 %extval = sext i8 %val to i32
61 %old = load i8, i8 *%ptr2
62 %extold = zext i8 %old to i32
63 %and = and i32 %extval, %extold
64 %trunc = trunc i32 %and to i8
65 store i8 %trunc, i8 *%ptr2
69 ; ...and again with two sign extensions.
70 define dso_local void @f5(i8 *%ptr1) {
72 ; CHECK: nc 1(1,%r2), 0(%r2)
74 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
75 %val = load i8, i8 *%ptr1
76 %extval = sext i8 %val to i32
77 %old = load i8, i8 *%ptr2
78 %extold = sext i8 %old to i32
79 %and = and i32 %extval, %extold
80 %trunc = trunc i32 %and to i8
81 store i8 %trunc, i8 *%ptr2
85 ; ...and again with two zero extensions.
86 define dso_local void @f6(i8 *%ptr1) {
88 ; CHECK: nc 1(1,%r2), 0(%r2)
90 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
91 %val = load i8, i8 *%ptr1
92 %extval = zext i8 %val to i32
93 %old = load i8, i8 *%ptr2
94 %extold = zext i8 %old to i32
95 %and = and i32 %extval, %extold
96 %trunc = trunc i32 %and to i8
97 store i8 %trunc, i8 *%ptr2
101 ; Test i8 cases where the value is extended to 64 bits (just one case
103 define dso_local void @f7(i8 *%ptr1) {
105 ; CHECK: nc 1(1,%r2), 0(%r2)
107 %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
108 %val = load i8, i8 *%ptr1
109 %extval = sext i8 %val to i64
110 %old = load i8, i8 *%ptr2
111 %extold = zext i8 %old to i64
112 %and = and i64 %extval, %extold
113 %trunc = trunc i64 %and to i8
114 store i8 %trunc, i8 *%ptr2
118 ; Test the simple i16 case.
119 define dso_local void @f8(i16 *%ptr1) {
121 ; CHECK: nc 2(2,%r2), 0(%r2)
123 %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
124 %val = load i16, i16 *%ptr1
125 %old = load i16, i16 *%ptr2
126 %and = and i16 %val, %old
127 store i16 %and, i16 *%ptr2
131 ; Test i16 cases where the value is extended to 32 bits.
132 define dso_local void @f9(i16 *%ptr1) {
134 ; CHECK: nc 2(2,%r2), 0(%r2)
136 %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
137 %val = load i16, i16 *%ptr1
138 %extval = zext i16 %val to i32
139 %old = load i16, i16 *%ptr2
140 %extold = sext i16 %old to i32
141 %and = and i32 %extval, %extold
142 %trunc = trunc i32 %and to i16
143 store i16 %trunc, i16 *%ptr2
147 ; Test i16 cases where the value is extended to 64 bits.
148 define dso_local void @f10(i16 *%ptr1) {
150 ; CHECK: nc 2(2,%r2), 0(%r2)
152 %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
153 %val = load i16, i16 *%ptr1
154 %extval = sext i16 %val to i64
155 %old = load i16, i16 *%ptr2
156 %extold = zext i16 %old to i64
157 %and = and i64 %extval, %extold
158 %trunc = trunc i64 %and to i16
159 store i16 %trunc, i16 *%ptr2
163 ; Test the simple i32 case.
164 define dso_local void @f11(i32 *%ptr1) {
166 ; CHECK: nc 4(4,%r2), 0(%r2)
168 %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
169 %val = load i32, i32 *%ptr1
170 %old = load i32, i32 *%ptr2
171 %and = and i32 %old, %val
172 store i32 %and, i32 *%ptr2
176 ; Test i32 cases where the value is extended to 64 bits.
177 define dso_local void @f12(i32 *%ptr1) {
179 ; CHECK: nc 4(4,%r2), 0(%r2)
181 %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
182 %val = load i32, i32 *%ptr1
183 %extval = sext i32 %val to i64
184 %old = load i32, i32 *%ptr2
185 %extold = zext i32 %old to i64
186 %and = and i64 %extval, %extold
187 %trunc = trunc i64 %and to i32
188 store i32 %trunc, i32 *%ptr2
193 define dso_local void @f13(i64 *%ptr1) {
195 ; CHECK: nc 8(8,%r2), 0(%r2)
197 %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
198 %val = load i64, i64 *%ptr1
199 %old = load i64, i64 *%ptr2
200 %and = and i64 %old, %val
201 store i64 %and, i64 *%ptr2
205 ; Make sure that we don't use NC if the first load is volatile.
206 define dso_local void @f14(i64 *%ptr1) {
210 %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
211 %val = load volatile i64, i64 *%ptr1
212 %old = load i64, i64 *%ptr2
213 %and = and i64 %old, %val
214 store i64 %and, i64 *%ptr2
218 ; ...likewise the second.
219 define dso_local void @f15(i64 *%ptr1) {
223 %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
224 %val = load i64, i64 *%ptr1
225 %old = load volatile i64, i64 *%ptr2
226 %and = and i64 %old, %val
227 store i64 %and, i64 *%ptr2
231 ; ...likewise the store.
232 define dso_local void @f16(i64 *%ptr1) {
236 %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
237 %val = load i64, i64 *%ptr1
238 %old = load i64, i64 *%ptr2
239 %and = and i64 %old, %val
240 store volatile i64 %and, i64 *%ptr2
244 ; Test that NC is not used for aligned loads and stores if there is
245 ; no way of telling whether they alias. We don't want to use NC in
246 ; cases where the addresses could be equal.
247 define dso_local void @f17(i64 *%ptr1, i64 *%ptr2) {
251 %val = load i64, i64 *%ptr1
252 %old = load i64, i64 *%ptr2
253 %and = and i64 %old, %val
254 store i64 %and, i64 *%ptr2
258 ; ...but if one of the loads isn't aligned, we can't be sure.
259 define dso_local void @f18(i64 *%ptr1, i64 *%ptr2) {
263 %val = load i64, i64 *%ptr1, align 2
264 %old = load i64, i64 *%ptr2
265 %and = and i64 %old, %val
266 store i64 %and, i64 *%ptr2
270 ; Repeat the previous test with the operands in the opposite order.
271 define dso_local void @f19(i64 *%ptr1, i64 *%ptr2) {
275 %val = load i64, i64 *%ptr1, align 2
276 %old = load i64, i64 *%ptr2
277 %and = and i64 %val, %old
278 store i64 %and, i64 *%ptr2
282 ; ...and again with the other operand being unaligned.
283 define dso_local void @f20(i64 *%ptr1, i64 *%ptr2) {
287 %val = load i64, i64 *%ptr1
288 %old = load i64, i64 *%ptr2, align 2
289 %and = and i64 %val, %old
290 store i64 %and, i64 *%ptr2, align 2
294 ; Test a case where there is definite overlap.
295 define dso_local void @f21(i64 %base) {
299 %add = add i64 %base, 1
300 %ptr1 = inttoptr i64 %base to i64 *
301 %ptr2 = inttoptr i64 %add to i64 *
302 %val = load i64, i64 *%ptr1
303 %old = load i64, i64 *%ptr2, align 1
304 %and = and i64 %old, %val
305 store i64 %and, i64 *%ptr2, align 1
309 ; Test that we can use NC for global addresses for i8.
310 define dso_local void @f22(i8 *%ptr) {
312 ; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src
313 ; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
314 ; CHECK: nc 0(1,[[DST]]), 0([[SRC]])
316 %val = load i8, i8 *@g1src
317 %old = load i8, i8 *@g1dst
318 %and = and i8 %val, %old
319 store i8 %and, i8 *@g1dst
323 ; Test that we use NC even where LHRL and STHRL are available.
324 define dso_local void @f23(i16 *%ptr) {
326 ; CHECK-DAG: larl [[SRC:%r[0-5]]], g2src
327 ; CHECK-DAG: larl [[DST:%r[0-5]]], g2dst
328 ; CHECK: nc 0(2,[[DST]]), 0([[SRC]])
330 %val = load i16, i16 *@g2src
331 %old = load i16, i16 *@g2dst
332 %and = and i16 %val, %old
333 store i16 %and, i16 *@g2dst
337 ; Test a case where offset disambiguation is enough.
338 define dso_local void @f24(i64 *%ptr1) {
340 ; CHECK: nc 8(8,%r2), 0(%r2)
342 %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
343 %val = load i64, i64 *%ptr1, align 1
344 %old = load i64, i64 *%ptr2, align 1
345 %and = and i64 %old, %val
346 store i64 %and, i64 *%ptr2, align 1
350 ; Test a case where TBAA tells us there is no alias.
351 define dso_local void @f25(i64 *%ptr1, i64 *%ptr2) {
353 ; CHECK: nc 0(8,%r3), 0(%r2)
355 %val = load i64, i64 *%ptr1, align 2, !tbaa !3
356 %old = load i64, i64 *%ptr2, align 2, !tbaa !4
357 %and = and i64 %old, %val
358 store i64 %and, i64 *%ptr2, align 2, !tbaa !4
362 ; Test a case where TBAA information is present but doesn't help.
363 define dso_local void @f26(i64 *%ptr1, i64 *%ptr2) {
367 %val = load i64, i64 *%ptr1, align 2, !tbaa !3
368 %old = load i64, i64 *%ptr2, align 2, !tbaa !3
369 %and = and i64 %old, %val
370 store i64 %and, i64 *%ptr2, align 2, !tbaa !3
374 ; Test a case where one of the loads are optimized by the DAGCombiner to a
375 ; zero-extending load of half the original size.
376 define dso_local void @f27(i16* noalias %ptr1, i16* noalias %ptr2) {
381 %0 = load i16, i16 *%ptr1, align 2
383 %2 = load i16, i16 *%ptr2, align 2
384 %and7 = and i16 %1, %2
385 store i16 %and7, i16 *%ptr1, align 2
390 !1 = !{ !"set1", !0 }
391 !2 = !{ !"set2", !0 }
392 !3 = !{ !1, !1, i64 0}
393 !4 = !{ !2, !2, i64 0}