1 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
2 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
4 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
6 declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
8 define float @f1(float %f1, float %f2, float %acc) {
10 ; CHECK-SCALAR: maebr %f4, %f0, %f2
11 ; CHECK-SCALAR: ler %f0, %f4
12 ; CHECK-VECTOR: wfmasb %f0, %f0, %f2, %f4
14 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
18 define float @f2(float %f1, ptr %ptr, float %acc) {
20 ; CHECK: maeb %f2, %f0, 0(%r2)
21 ; CHECK-SCALAR: ler %f0, %f2
22 ; CHECK-VECTOR: ldr %f0, %f2
24 %f2 = load float, ptr %ptr
25 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
29 define float @f3(float %f1, ptr %base, float %acc) {
31 ; CHECK: maeb %f2, %f0, 4092(%r2)
32 ; CHECK-SCALAR: ler %f0, %f2
33 ; CHECK-VECTOR: ldr %f0, %f2
35 %ptr = getelementptr float, ptr %base, i64 1023
36 %f2 = load float, ptr %ptr
37 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
41 define float @f4(float %f1, ptr %base, float %acc) {
42 ; The important thing here is that we don't generate an out-of-range
43 ; displacement. Other sequences besides this one would be OK.
46 ; CHECK: aghi %r2, 4096
47 ; CHECK: maeb %f2, %f0, 0(%r2)
48 ; CHECK-SCALAR: ler %f0, %f2
49 ; CHECK-VECTOR: ldr %f0, %f2
51 %ptr = getelementptr float, ptr %base, i64 1024
52 %f2 = load float, ptr %ptr
53 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
57 define float @f5(float %f1, ptr %base, float %acc) {
58 ; Here too the important thing is that we don't generate an out-of-range
59 ; displacement. Other sequences besides this one would be OK.
63 ; CHECK: maeb %f2, %f0, 0(%r2)
64 ; CHECK-SCALAR: ler %f0, %f2
65 ; CHECK-VECTOR: ldr %f0, %f2
67 %ptr = getelementptr float, ptr %base, i64 -1
68 %f2 = load float, ptr %ptr
69 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
73 define float @f6(float %f1, ptr %base, i64 %index, float %acc) {
75 ; CHECK: sllg %r1, %r3, 2
76 ; CHECK: maeb %f2, %f0, 0(%r1,%r2)
77 ; CHECK-SCALAR: ler %f0, %f2
78 ; CHECK-VECTOR: ldr %f0, %f2
80 %ptr = getelementptr float, ptr %base, i64 %index
81 %f2 = load float, ptr %ptr
82 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
86 define float @f7(float %f1, ptr %base, i64 %index, float %acc) {
88 ; CHECK: sllg %r1, %r3, 2
89 ; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
90 ; CHECK-SCALAR: ler %f0, %f2
91 ; CHECK-VECTOR: ldr %f0, %f2
93 %index2 = add i64 %index, 1023
94 %ptr = getelementptr float, ptr %base, i64 %index2
95 %f2 = load float, ptr %ptr
96 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
100 define float @f8(float %f1, ptr %base, i64 %index, float %acc) {
102 ; CHECK: sllg %r1, %r3, 2
103 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
104 ; CHECK: maeb %f2, %f0, 0(%r1)
105 ; CHECK-SCALAR: ler %f0, %f2
106 ; CHECK-VECTOR: ldr %f0, %f2
108 %index2 = add i64 %index, 1024
109 %ptr = getelementptr float, ptr %base, i64 %index2
110 %f2 = load float, ptr %ptr
111 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)