1 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
2 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
4 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
6 declare float @llvm.experimental.constrained.fma.f32(float %f1, float %f2, float %f3, metadata, metadata)
8 define float @f1(float %f1, float %f2, float %acc) {
10 ; CHECK-SCALAR: msebr %f4, %f0, %f2
11 ; CHECK-SCALAR: ler %f0, %f4
12 ; CHECK-VECTOR: wfmssb %f0, %f0, %f2, %f4
14 %negacc = fsub float -0.0, %acc
15 %res = call float @llvm.experimental.constrained.fma.f32 (
16 float %f1, float %f2, float %negacc,
17 metadata !"round.dynamic",
18 metadata !"fpexcept.strict")
22 define float @f2(float %f1, float *%ptr, float %acc) {
24 ; CHECK: mseb %f2, %f0, 0(%r2)
25 ; CHECK-SCALAR: ler %f0, %f2
26 ; CHECK-VECTOR: ldr %f0, %f2
28 %f2 = load float, float *%ptr
29 %negacc = fsub float -0.0, %acc
30 %res = call float @llvm.experimental.constrained.fma.f32 (
31 float %f1, float %f2, float %negacc,
32 metadata !"round.dynamic",
33 metadata !"fpexcept.strict")
37 define float @f3(float %f1, float *%base, float %acc) {
39 ; CHECK: mseb %f2, %f0, 4092(%r2)
40 ; CHECK-SCALAR: ler %f0, %f2
41 ; CHECK-VECTOR: ldr %f0, %f2
43 %ptr = getelementptr float, float *%base, i64 1023
44 %f2 = load float, float *%ptr
45 %negacc = fsub float -0.0, %acc
46 %res = call float @llvm.experimental.constrained.fma.f32 (
47 float %f1, float %f2, float %negacc,
48 metadata !"round.dynamic",
49 metadata !"fpexcept.strict")
53 define float @f4(float %f1, float *%base, float %acc) {
54 ; The important thing here is that we don't generate an out-of-range
55 ; displacement. Other sequences besides this one would be OK.
58 ; CHECK: aghi %r2, 4096
59 ; CHECK: mseb %f2, %f0, 0(%r2)
60 ; CHECK-SCALAR: ler %f0, %f2
61 ; CHECK-VECTOR: ldr %f0, %f2
63 %ptr = getelementptr float, float *%base, i64 1024
64 %f2 = load float, float *%ptr
65 %negacc = fsub float -0.0, %acc
66 %res = call float @llvm.experimental.constrained.fma.f32 (
67 float %f1, float %f2, float %negacc,
68 metadata !"round.dynamic",
69 metadata !"fpexcept.strict")
73 define float @f5(float %f1, float *%base, float %acc) {
74 ; Here too the important thing is that we don't generate an out-of-range
75 ; displacement. Other sequences besides this one would be OK.
79 ; CHECK: mseb %f2, %f0, 0(%r2)
80 ; CHECK-SCALAR: ler %f0, %f2
81 ; CHECK-VECTOR: ldr %f0, %f2
83 %ptr = getelementptr float, float *%base, i64 -1
84 %f2 = load float, float *%ptr
85 %negacc = fsub float -0.0, %acc
86 %res = call float @llvm.experimental.constrained.fma.f32 (
87 float %f1, float %f2, float %negacc,
88 metadata !"round.dynamic",
89 metadata !"fpexcept.strict")
93 define float @f6(float %f1, float *%base, i64 %index, float %acc) {
95 ; CHECK: sllg %r1, %r3, 2
96 ; CHECK: mseb %f2, %f0, 0(%r1,%r2)
97 ; CHECK-SCALAR: ler %f0, %f2
98 ; CHECK-VECTOR: ldr %f0, %f2
100 %ptr = getelementptr float, float *%base, i64 %index
101 %f2 = load float, float *%ptr
102 %negacc = fsub float -0.0, %acc
103 %res = call float @llvm.experimental.constrained.fma.f32 (
104 float %f1, float %f2, float %negacc,
105 metadata !"round.dynamic",
106 metadata !"fpexcept.strict")
110 define float @f7(float %f1, float *%base, i64 %index, float %acc) {
112 ; CHECK: sllg %r1, %r3, 2
113 ; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
114 ; CHECK-SCALAR: ler %f0, %f2
115 ; CHECK-VECTOR: ldr %f0, %f2
117 %index2 = add i64 %index, 1023
118 %ptr = getelementptr float, float *%base, i64 %index2
119 %f2 = load float, float *%ptr
120 %negacc = fsub float -0.0, %acc
121 %res = call float @llvm.experimental.constrained.fma.f32 (
122 float %f1, float %f2, float %negacc,
123 metadata !"round.dynamic",
124 metadata !"fpexcept.strict")
128 define float @f8(float %f1, float *%base, i64 %index, float %acc) {
130 ; CHECK: sllg %r1, %r3, 2
131 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
132 ; CHECK: mseb %f2, %f0, 0(%r1)
133 ; CHECK-SCALAR: ler %f0, %f2
134 ; CHECK-VECTOR: ldr %f0, %f2
136 %index2 = add i64 %index, 1024
137 %ptr = getelementptr float, float *%base, i64 %index2
138 %f2 = load float, float *%ptr
139 %negacc = fsub float -0.0, %acc
140 %res = call float @llvm.experimental.constrained.fma.f32 (
141 float %f1, float %f2, float %negacc,
142 metadata !"round.dynamic",
143 metadata !"fpexcept.strict")