1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \
3 ; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK,RV32IZFBFMIN %s
4 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \
5 ; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK,RV64IZFBFMIN %s
7 define bfloat @flh(ptr %a) nounwind {
10 ; CHECK-NEXT: flh fa5, 6(a0)
11 ; CHECK-NEXT: flh fa4, 0(a0)
12 ; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
13 ; CHECK-NEXT: fcvt.s.bf16 fa4, fa4
14 ; CHECK-NEXT: fadd.s fa5, fa4, fa5
15 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5
17 %1 = load bfloat, ptr %a
18 %2 = getelementptr bfloat, ptr %a, i32 3
19 %3 = load bfloat, ptr %2
20 ; Use both loaded values in an FP op to ensure an flh is used, even for the
22 %4 = fadd bfloat %1, %3
26 define dso_local void @fsh(ptr %a, bfloat %b, bfloat %c) nounwind {
29 ; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
30 ; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
31 ; CHECK-NEXT: fadd.s fa5, fa4, fa5
32 ; CHECK-NEXT: fcvt.bf16.s fa5, fa5
33 ; CHECK-NEXT: fsh fa5, 0(a0)
34 ; CHECK-NEXT: fsh fa5, 16(a0)
36 %1 = fadd bfloat %b, %c
37 store bfloat %1, ptr %a
38 %2 = getelementptr bfloat, ptr %a, i32 8
39 store bfloat %1, ptr %2
43 ; Check load and store to a global
44 @G = dso_local global bfloat 0.0
46 define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind {
47 ; Use %a and %b in an FP op to ensure bfloat precision floating point registers
48 ; are used, even for the soft bfloat ABI
49 ; CHECK-LABEL: flh_fsh_global:
51 ; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
52 ; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
53 ; CHECK-NEXT: fadd.s fa5, fa4, fa5
54 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5
55 ; CHECK-NEXT: lui a0, %hi(G)
56 ; CHECK-NEXT: flh fa5, %lo(G)(a0)
57 ; CHECK-NEXT: addi a1, a0, %lo(G)
58 ; CHECK-NEXT: fsh fa0, %lo(G)(a0)
59 ; CHECK-NEXT: flh fa5, 18(a1)
60 ; CHECK-NEXT: fsh fa0, 18(a1)
62 %1 = fadd bfloat %a, %b
63 %2 = load volatile bfloat, ptr @G
64 store bfloat %1, ptr @G
65 %3 = getelementptr bfloat, ptr @G, i32 9
66 %4 = load volatile bfloat, ptr %3
67 store bfloat %1, ptr %3
71 ; Ensure that 1 is added to the high 20 bits if bit 11 of the low part is 1
72 define bfloat @flh_fsh_constant(bfloat %a) nounwind {
73 ; RV32IZFBFMIN-LABEL: flh_fsh_constant:
74 ; RV32IZFBFMIN: # %bb.0:
75 ; RV32IZFBFMIN-NEXT: lui a0, 912092
76 ; RV32IZFBFMIN-NEXT: flh fa5, -273(a0)
77 ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
78 ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
79 ; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
80 ; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
81 ; RV32IZFBFMIN-NEXT: fsh fa0, -273(a0)
82 ; RV32IZFBFMIN-NEXT: ret
84 ; RV64IZFBFMIN-LABEL: flh_fsh_constant:
85 ; RV64IZFBFMIN: # %bb.0:
86 ; RV64IZFBFMIN-NEXT: lui a0, 228023
87 ; RV64IZFBFMIN-NEXT: slli a0, a0, 2
88 ; RV64IZFBFMIN-NEXT: flh fa5, -273(a0)
89 ; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
90 ; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
91 ; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
92 ; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
93 ; RV64IZFBFMIN-NEXT: fsh fa0, -273(a0)
94 ; RV64IZFBFMIN-NEXT: ret
95 %1 = inttoptr i32 3735928559 to ptr
96 %2 = load volatile bfloat, ptr %1
97 %3 = fadd bfloat %a, %2
98 store bfloat %3, ptr %1
102 declare void @notdead(ptr)
104 define bfloat @flh_stack(bfloat %a) nounwind {
105 ; RV32IZFBFMIN-LABEL: flh_stack:
106 ; RV32IZFBFMIN: # %bb.0:
107 ; RV32IZFBFMIN-NEXT: addi sp, sp, -16
108 ; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
109 ; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
110 ; RV32IZFBFMIN-NEXT: fmv.s fs0, fa0
111 ; RV32IZFBFMIN-NEXT: addi a0, sp, 4
112 ; RV32IZFBFMIN-NEXT: call notdead
113 ; RV32IZFBFMIN-NEXT: flh fa5, 4(sp)
114 ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fs0
115 ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
116 ; RV32IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
117 ; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
118 ; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
119 ; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
120 ; RV32IZFBFMIN-NEXT: addi sp, sp, 16
121 ; RV32IZFBFMIN-NEXT: ret
123 ; RV64IZFBFMIN-LABEL: flh_stack:
124 ; RV64IZFBFMIN: # %bb.0:
125 ; RV64IZFBFMIN-NEXT: addi sp, sp, -16
126 ; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
127 ; RV64IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
128 ; RV64IZFBFMIN-NEXT: fmv.s fs0, fa0
129 ; RV64IZFBFMIN-NEXT: mv a0, sp
130 ; RV64IZFBFMIN-NEXT: call notdead
131 ; RV64IZFBFMIN-NEXT: flh fa5, 0(sp)
132 ; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fs0
133 ; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
134 ; RV64IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
135 ; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
136 ; RV64IZFBFMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
137 ; RV64IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
138 ; RV64IZFBFMIN-NEXT: addi sp, sp, 16
139 ; RV64IZFBFMIN-NEXT: ret
140 %1 = alloca bfloat, align 4
141 call void @notdead(ptr %1)
142 %2 = load bfloat, ptr %1
143 %3 = fadd bfloat %2, %a ; force load in to FPR16
147 define dso_local void @fsh_stack(bfloat %a, bfloat %b) nounwind {
148 ; RV32IZFBFMIN-LABEL: fsh_stack:
149 ; RV32IZFBFMIN: # %bb.0:
150 ; RV32IZFBFMIN-NEXT: addi sp, sp, -16
151 ; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
152 ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1
153 ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
154 ; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
155 ; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
156 ; RV32IZFBFMIN-NEXT: fsh fa5, 8(sp)
157 ; RV32IZFBFMIN-NEXT: addi a0, sp, 8
158 ; RV32IZFBFMIN-NEXT: call notdead
159 ; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
160 ; RV32IZFBFMIN-NEXT: addi sp, sp, 16
161 ; RV32IZFBFMIN-NEXT: ret
163 ; RV64IZFBFMIN-LABEL: fsh_stack:
164 ; RV64IZFBFMIN: # %bb.0:
165 ; RV64IZFBFMIN-NEXT: addi sp, sp, -16
166 ; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
167 ; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1
168 ; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
169 ; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
170 ; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
171 ; RV64IZFBFMIN-NEXT: fsh fa5, 4(sp)
172 ; RV64IZFBFMIN-NEXT: addi a0, sp, 4
173 ; RV64IZFBFMIN-NEXT: call notdead
174 ; RV64IZFBFMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
175 ; RV64IZFBFMIN-NEXT: addi sp, sp, 16
176 ; RV64IZFBFMIN-NEXT: ret
177 %1 = fadd bfloat %a, %b ; force store from FPR16
178 %2 = alloca bfloat, align 4
179 store bfloat %1, ptr %2
180 call void @notdead(ptr %2)