1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ;RUN: llc -mtriple=amdgcn-- < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
3 ;RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
4 ;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
6 define float @v_exp_f32(float %arg0) {
9 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
11 ; SI-NEXT: v_exp_f32_e32 v0, v0
12 ; SI-NEXT: s_setpc_b64 s[30:31]
14 ; VI-LABEL: v_exp_f32:
16 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17 ; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
18 ; VI-NEXT: v_exp_f32_e32 v0, v0
19 ; VI-NEXT: s_setpc_b64 s[30:31]
21 ; GFX9-LABEL: v_exp_f32:
23 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
25 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
26 ; GFX9-NEXT: s_setpc_b64 s[30:31]
27 %result = call float @llvm.exp.f32(float %arg0)
31 define <2 x float> @v_exp_v2f32(<2 x float> %arg0) {
32 ; GCN-LABEL: v_exp_v2f32:
34 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GCN-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
36 ; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
37 ; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
38 ; GCN-NEXT: v_exp_f32_e32 v0, v0
39 ; GCN-NEXT: v_exp_f32_e32 v1, v1
40 ; GCN-NEXT: s_setpc_b64 s[30:31]
41 %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %arg0)
42 ret <2 x float> %result
45 define <3 x float> @v_exp_v3f32(<3 x float> %arg0) {
46 ; GCN-LABEL: v_exp_v3f32:
48 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49 ; GCN-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
50 ; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
51 ; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
52 ; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
53 ; GCN-NEXT: v_exp_f32_e32 v0, v0
54 ; GCN-NEXT: v_exp_f32_e32 v1, v1
55 ; GCN-NEXT: v_exp_f32_e32 v2, v2
56 ; GCN-NEXT: s_setpc_b64 s[30:31]
58 %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %arg0)
59 ret <3 x float> %result
62 define <4 x float> @v_exp_v4f32(<4 x float> %arg0) {
63 ; SI-LABEL: v_exp_v4f32:
65 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66 ; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
67 ; SI-NEXT: v_mul_f32_e32 v0, [[SREG]], v0
68 ; SI-NEXT: v_mul_f32_e32 v1, [[SREG]], v1
69 ; SI-NEXT: v_mul_f32_e32 v2, [[SREG]], v2
70 ; SI-NEXT: v_mul_f32_e32 v3, [[SREG]], v3
71 ; SI-NEXT: v_exp_f32_e32 v0, v0
72 ; SI-NEXT: v_exp_f32_e32 v1, v1
73 ; SI-NEXT: v_exp_f32_e32 v2, v2
74 ; SI-NEXT: v_exp_f32_e32 v3, v3
75 ; SI-NEXT: s_setpc_b64 s[30:31]
76 %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %arg0)
77 ret <4 x float> %result
80 define half @v_exp_f16(half %arg0) {
81 ; SI-LABEL: v_exp_f16:
83 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
85 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
86 ; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
87 ; SI-NEXT: v_exp_f32_e32 v0, v0
88 ; SI-NEXT: s_setpc_b64 s[30:31]
90 ; VI-LABEL: v_exp_f16:
92 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93 ; VI-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
94 ; VI-NEXT: v_exp_f16_e32 v0, v0
95 ; VI-NEXT: s_setpc_b64 s[30:31]
97 ; GFX9-LABEL: v_exp_f16:
99 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100 ; GFX9-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
101 ; GFX9-NEXT: v_exp_f16_e32 v0, v0
102 ; GFX9-NEXT: s_setpc_b64 s[30:31]
103 %result = call half @llvm.exp.f16(half %arg0)
107 define <2 x half> @v_exp_v2f16(<2 x half> %arg0) {
108 ; SI-LABEL: v_exp_v2f16:
110 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
112 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
113 ; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
114 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
115 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
116 ; SI-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
117 ; SI-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
118 ; SI-NEXT: v_exp_f32_e32 v0, v0
119 ; SI-NEXT: v_exp_f32_e32 v1, v1
120 ; SI-NEXT: s_setpc_b64 s[30:31]
122 ; VI-LABEL: v_exp_v2f16:
124 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125 ; VI-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
126 ; VI-NEXT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
127 ; VI-NEXT: v_mul_f16_sdwa [[MUL1:v[0-9]+]], v{{[0-9]+}}, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
128 ; VI-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v{{[0-9]+}}
129 ; VI-NEXT: v_exp_f16_sdwa [[MUL1]], [[MUL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
130 ; VI-NEXT: v_exp_f16_e32 [[MUL2]], [[MUL2]]
131 ; VI-NEXT: v_or_b32_e32 v{{[0-9]+}}, [[MUL2]], [[MUL1]]
132 ; VI-NEXT: s_setpc_b64 s[30:31]
134 ; GFX9-LABEL: v_exp_v2f16:
136 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
137 ; GFX9-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
138 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, [[SREG]] op_sel_hi:[1,0]
139 ; GFX9-NEXT: v_exp_f16_e32 v1, v0
140 ; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
141 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
142 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
143 ; GFX9-NEXT: s_setpc_b64 s[30:31]
144 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %arg0)
145 ret <2 x half> %result
148 ; define <3 x half> @v_exp_v3f16(<3 x half> %arg0) {
149 ; %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %arg0)
150 ; ret <3 x half> %result
153 define <4 x half> @v_exp_v4f16(<4 x half> %arg0) {
154 ; SI-LABEL: v_exp_v4f16:
156 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
158 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
159 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
160 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
161 ; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
162 ; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
163 ; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
164 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
165 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
166 ; SI-NEXT: v_mul_f32_e32 v0, [[SREG]], v0
167 ; SI-NEXT: v_mul_f32_e32 v1, [[SREG]], v1
168 ; SI-NEXT: v_mul_f32_e32 v2, [[SREG]], v2
169 ; SI-NEXT: v_mul_f32_e32 v3, [[SREG]], v3
170 ; SI-NEXT: v_exp_f32_e32 v0, v0
171 ; SI-NEXT: v_exp_f32_e32 v1, v1
172 ; SI-NEXT: v_exp_f32_e32 v2, v2
173 ; SI-NEXT: v_exp_f32_e32 v3, v3
174 ; SI-NEXT: s_setpc_b64 s[30:31]
176 ; VI-LABEL: v_exp_v4f16:
178 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179 ; VI-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
180 ; VI-NEXT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
181 ; VI-NEXT: v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1
182 ; VI-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0
183 ; VI-NEXT: v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
184 ; VI-NEXT: v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
185 ; VI-NEXT: v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]]
186 ; VI-NEXT: v_exp_f16_sdwa [[EXP2:v[0-9]+]], v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
187 ; VI-NEXT: v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL2]]
188 ; VI-NEXT: v_exp_f16_sdwa [[EXP4:v[0-9]+]], v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
189 ; VI-NEXT: v_or_b32_e32 v1, [[EXP1]], [[EXP2]]
190 ; VI-NEXT: v_or_b32_e32 v0, [[EXP3]], [[EXP4]]
191 ; VI-NEXT: s_setpc_b64 s[30:31]
193 ; GFX9-LABEL: v_exp_v4f16:
195 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196 ; GFX9-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
197 ; GFX9-NEXT: v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1
198 ; GFX9-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0
199 ; GFX9-NEXT: v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
200 ; GFX9-NEXT: v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
201 ; GFX9-NEXT: v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]]
202 ; GFX9-NEXT: v_exp_f16_e32 [[EXP2:v[0-9]+]], [[MUL2]]
203 ; GFX9-NEXT: v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL4]]
204 ; GFX9-NEXT: v_exp_f16_e32 [[EXP4:v[0-9]+]], [[MUL3]]
205 ; GFX9-NEXT: v_mov_b32_e32 [[VCONST:v[0-9]+]], 0xffff
206 ; GFX9-NEXT: v_and_b32_e32 [[AND1:v[0-9]+]], [[VCONST]], [[EXP2]]
207 ; GFX9-NEXT: v_and_b32_e32 [[AND2:v[0-9]+]], [[VCONST]], [[EXP1]]
208 ; GFX9-NEXT: v_lshl_or_b32 v0, [[EXP3]], 16, [[AND1]]
209 ; GFX9-NEXT: v_lshl_or_b32 v1, [[EXP4]], 16, [[AND2]]
210 ; GFX9-NEXT: s_setpc_b64 s[30:31]
211 %result = call <4 x half> @llvm.exp.v4f16(<4 x half> %arg0)
212 ret <4 x half> %result
215 declare float @llvm.exp.f32(float)
216 declare <2 x float> @llvm.exp.v2f32(<2 x float>)
217 declare <3 x float> @llvm.exp.v3f32(<3 x float>)
218 declare <4 x float> @llvm.exp.v4f32(<4 x float>)
220 declare half @llvm.exp.f16(half)
221 declare <2 x half> @llvm.exp.v2f16(<2 x half>)
222 declare <3 x half> @llvm.exp.v3f16(<3 x half>)
223 declare <4 x half> @llvm.exp.v4f16(<4 x half>)