1 ; Test 16-bit conditional stores that are presented as selects. The volatile
2 ; tests require z10, which use a branch instead of a LOCR.
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
6 declare void @foo(i16 *)
8 ; Test the simple case, with the loaded value first.
9 define void @f1(i16 *%ptr, i16 %alt, i32 %limit) {
14 ; CHECK: sth %r3, 0(%r2)
16 %cond = icmp ult i32 %limit, 420
17 %orig = load i16, i16 *%ptr
18 %res = select i1 %cond, i16 %orig, i16 %alt
19 store i16 %res, i16 *%ptr
23 ; ...and with the loaded value second
24 define void @f2(i16 *%ptr, i16 %alt, i32 %limit) {
29 ; CHECK: sth %r3, 0(%r2)
31 %cond = icmp ult i32 %limit, 420
32 %orig = load i16, i16 *%ptr
33 %res = select i1 %cond, i16 %alt, i16 %orig
34 store i16 %res, i16 *%ptr
38 ; Test cases where the value is explicitly sign-extended to 32 bits, with the
40 define void @f3(i16 *%ptr, i32 %alt, i32 %limit) {
45 ; CHECK: sth %r3, 0(%r2)
47 %cond = icmp ult i32 %limit, 420
48 %orig = load i16, i16 *%ptr
49 %ext = sext i16 %orig to i32
50 %res = select i1 %cond, i32 %ext, i32 %alt
51 %trunc = trunc i32 %res to i16
52 store i16 %trunc, i16 *%ptr
56 ; ...and with the loaded value second
57 define void @f4(i16 *%ptr, i32 %alt, i32 %limit) {
62 ; CHECK: sth %r3, 0(%r2)
64 %cond = icmp ult i32 %limit, 420
65 %orig = load i16, i16 *%ptr
66 %ext = sext i16 %orig to i32
67 %res = select i1 %cond, i32 %alt, i32 %ext
68 %trunc = trunc i32 %res to i16
69 store i16 %trunc, i16 *%ptr
73 ; Test cases where the value is explicitly zero-extended to 32 bits, with the
75 define void @f5(i16 *%ptr, i32 %alt, i32 %limit) {
80 ; CHECK: sth %r3, 0(%r2)
82 %cond = icmp ult i32 %limit, 420
83 %orig = load i16, i16 *%ptr
84 %ext = zext i16 %orig to i32
85 %res = select i1 %cond, i32 %ext, i32 %alt
86 %trunc = trunc i32 %res to i16
87 store i16 %trunc, i16 *%ptr
91 ; ...and with the loaded value second
92 define void @f6(i16 *%ptr, i32 %alt, i32 %limit) {
97 ; CHECK: sth %r3, 0(%r2)
99 %cond = icmp ult i32 %limit, 420
100 %orig = load i16, i16 *%ptr
101 %ext = zext i16 %orig to i32
102 %res = select i1 %cond, i32 %alt, i32 %ext
103 %trunc = trunc i32 %res to i16
104 store i16 %trunc, i16 *%ptr
108 ; Test cases where the value is explicitly sign-extended to 64 bits, with the
109 ; loaded value first.
110 define void @f7(i16 *%ptr, i64 %alt, i32 %limit) {
115 ; CHECK: sth %r3, 0(%r2)
117 %cond = icmp ult i32 %limit, 420
118 %orig = load i16, i16 *%ptr
119 %ext = sext i16 %orig to i64
120 %res = select i1 %cond, i64 %ext, i64 %alt
121 %trunc = trunc i64 %res to i16
122 store i16 %trunc, i16 *%ptr
126 ; ...and with the loaded value second
127 define void @f8(i16 *%ptr, i64 %alt, i32 %limit) {
132 ; CHECK: sth %r3, 0(%r2)
134 %cond = icmp ult i32 %limit, 420
135 %orig = load i16, i16 *%ptr
136 %ext = sext i16 %orig to i64
137 %res = select i1 %cond, i64 %alt, i64 %ext
138 %trunc = trunc i64 %res to i16
139 store i16 %trunc, i16 *%ptr
143 ; Test cases where the value is explicitly zero-extended to 64 bits, with the
144 ; loaded value first.
145 define void @f9(i16 *%ptr, i64 %alt, i32 %limit) {
150 ; CHECK: sth %r3, 0(%r2)
152 %cond = icmp ult i32 %limit, 420
153 %orig = load i16, i16 *%ptr
154 %ext = zext i16 %orig to i64
155 %res = select i1 %cond, i64 %ext, i64 %alt
156 %trunc = trunc i64 %res to i16
157 store i16 %trunc, i16 *%ptr
161 ; ...and with the loaded value second
162 define void @f10(i16 *%ptr, i64 %alt, i32 %limit) {
167 ; CHECK: sth %r3, 0(%r2)
169 %cond = icmp ult i32 %limit, 420
170 %orig = load i16, i16 *%ptr
171 %ext = zext i16 %orig to i64
172 %res = select i1 %cond, i64 %alt, i64 %ext
173 %trunc = trunc i64 %res to i16
174 store i16 %trunc, i16 *%ptr
178 ; Check the high end of the aligned STH range.
179 define void @f11(i16 *%base, i16 %alt, i32 %limit) {
184 ; CHECK: sth %r3, 4094(%r2)
186 %ptr = getelementptr i16, i16 *%base, i64 2047
187 %cond = icmp ult i32 %limit, 420
188 %orig = load i16, i16 *%ptr
189 %res = select i1 %cond, i16 %orig, i16 %alt
190 store i16 %res, i16 *%ptr
194 ; Check the next halfword up, which should use STHY instead of STH.
195 define void @f12(i16 *%base, i16 %alt, i32 %limit) {
200 ; CHECK: sthy %r3, 4096(%r2)
202 %ptr = getelementptr i16, i16 *%base, i64 2048
203 %cond = icmp ult i32 %limit, 420
204 %orig = load i16, i16 *%ptr
205 %res = select i1 %cond, i16 %orig, i16 %alt
206 store i16 %res, i16 *%ptr
210 ; Check the high end of the aligned STHY range.
211 define void @f13(i16 *%base, i16 %alt, i32 %limit) {
216 ; CHECK: sthy %r3, 524286(%r2)
218 %ptr = getelementptr i16, i16 *%base, i64 262143
219 %cond = icmp ult i32 %limit, 420
220 %orig = load i16, i16 *%ptr
221 %res = select i1 %cond, i16 %orig, i16 %alt
222 store i16 %res, i16 *%ptr
226 ; Check the next halfword up, which needs separate address logic.
227 ; Other sequences besides this one would be OK.
228 define void @f14(i16 *%base, i16 %alt, i32 %limit) {
233 ; CHECK: agfi %r2, 524288
234 ; CHECK: sth %r3, 0(%r2)
236 %ptr = getelementptr i16, i16 *%base, i64 262144
237 %cond = icmp ult i32 %limit, 420
238 %orig = load i16, i16 *%ptr
239 %res = select i1 %cond, i16 %orig, i16 %alt
240 store i16 %res, i16 *%ptr
244 ; Check the low end of the STHY range.
245 define void @f15(i16 *%base, i16 %alt, i32 %limit) {
250 ; CHECK: sthy %r3, -524288(%r2)
252 %ptr = getelementptr i16, i16 *%base, i64 -262144
253 %cond = icmp ult i32 %limit, 420
254 %orig = load i16, i16 *%ptr
255 %res = select i1 %cond, i16 %orig, i16 %alt
256 store i16 %res, i16 *%ptr
260 ; Check the next halfword down, which needs separate address logic.
261 ; Other sequences besides this one would be OK.
262 define void @f16(i16 *%base, i16 %alt, i32 %limit) {
267 ; CHECK: agfi %r2, -524290
268 ; CHECK: sth %r3, 0(%r2)
270 %ptr = getelementptr i16, i16 *%base, i64 -262145
271 %cond = icmp ult i32 %limit, 420
272 %orig = load i16, i16 *%ptr
273 %res = select i1 %cond, i16 %orig, i16 %alt
274 store i16 %res, i16 *%ptr
278 ; Check that STHY allows an index.
279 define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) {
284 ; CHECK: sthy %r4, 4096(%r3,%r2)
286 %add1 = add i64 %base, %index
287 %add2 = add i64 %add1, 4096
288 %ptr = inttoptr i64 %add2 to i16 *
289 %cond = icmp ult i32 %limit, 420
290 %orig = load i16, i16 *%ptr
291 %res = select i1 %cond, i16 %orig, i16 %alt
292 store i16 %res, i16 *%ptr
296 ; Check that volatile loads are not matched.
297 define void @f18(i16 *%ptr, i16 %alt, i32 %limit) {
299 ; CHECK: lh {{%r[0-5]}}, 0(%r2)
300 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
302 ; CHECK: sth {{%r[0-5]}}, 0(%r2)
304 %cond = icmp ult i32 %limit, 420
305 %orig = load volatile i16, i16 *%ptr
306 %res = select i1 %cond, i16 %orig, i16 %alt
307 store i16 %res, i16 *%ptr
311 ; ...likewise stores. In this case we should have a conditional load into %r3.
312 define void @f19(i16 *%ptr, i16 %alt, i32 %limit) {
314 ; CHECK: jhe [[LABEL:[^ ]*]]
315 ; CHECK: lh %r3, 0(%r2)
317 ; CHECK: sth %r3, 0(%r2)
319 %cond = icmp ult i32 %limit, 420
320 %orig = load i16, i16 *%ptr
321 %res = select i1 %cond, i16 %orig, i16 %alt
322 store volatile i16 %res, i16 *%ptr
326 ; Check that atomic loads are not matched. The transformation is OK for
327 ; the "unordered" case tested here, but since we don't try to handle atomic
328 ; operations at all in this context, it seems better to assert that than
329 ; to restrict the test to a stronger ordering.
330 define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
331 ; FIXME: should use a normal load instead of CS.
333 ; CHECK: lh {{%r[0-9]+}}, 0(%r2)
334 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
336 ; CHECK: sth {{%r[0-9]+}}, 0(%r2)
338 %cond = icmp ult i32 %limit, 420
339 %orig = load atomic i16, i16 *%ptr unordered, align 2
340 %res = select i1 %cond, i16 %orig, i16 %alt
341 store i16 %res, i16 *%ptr
345 ; ...likewise stores.
346 define void @f21(i16 *%ptr, i16 %alt, i32 %limit) {
347 ; FIXME: should use a normal store instead of CS.
349 ; CHECK: jhe [[LABEL:[^ ]*]]
350 ; CHECK: lh %r3, 0(%r2)
352 ; CHECK: sth %r3, 0(%r2)
354 %cond = icmp ult i32 %limit, 420
355 %orig = load i16, i16 *%ptr
356 %res = select i1 %cond, i16 %orig, i16 %alt
357 store atomic i16 %res, i16 *%ptr unordered, align 2
361 ; Try a frame index base.
362 define void @f22(i16 %alt, i32 %limit) {
364 ; CHECK: brasl %r14, foo@PLT
366 ; CHECK: jl [[LABEL:[^ ]*]]
368 ; CHECK: sth {{%r[0-9]+}}, {{[0-9]+}}(%r15)
370 ; CHECK: brasl %r14, foo@PLT
373 call void @foo(i16 *%ptr)
374 %cond = icmp ult i32 %limit, 420
375 %orig = load i16, i16 *%ptr
376 %res = select i1 %cond, i16 %orig, i16 %alt
377 store i16 %res, i16 *%ptr
378 call void @foo(i16 *%ptr)