1 ;RUN: llc -mtriple=amdgcn-- < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2 ;RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
3 ;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
5 define float @v_exp_f32(float %arg0) {
8 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9 ; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
10 ; SI-NEXT: v_exp_f32_e32 v0, v0
11 ; SI-NEXT: s_setpc_b64 s[30:31]
13 ; VI-LABEL: v_exp_f32:
15 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16 ; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
17 ; VI-NEXT: v_exp_f32_e32 v0, v0
18 ; VI-NEXT: s_setpc_b64 s[30:31]
20 ; GFX9-LABEL: v_exp_f32:
22 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
24 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
25 ; GFX9-NEXT: s_setpc_b64 s[30:31]
26 %result = call float @llvm.exp.f32(float %arg0)
30 define <2 x float> @v_exp_v2f32(<2 x float> %arg0) {
31 ; GCN-LABEL: v_exp_v2f32:
33 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34 ; GCN-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
35 ; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
36 ; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
37 ; GCN-NEXT: v_exp_f32_e32 v0, v0
38 ; GCN-NEXT: v_exp_f32_e32 v1, v1
39 ; GCN-NEXT: s_setpc_b64 s[30:31]
40 %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %arg0)
41 ret <2 x float> %result
44 define <3 x float> @v_exp_v3f32(<3 x float> %arg0) {
45 ; GCN-LABEL: v_exp_v3f32:
47 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GCN-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
49 ; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
50 ; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
51 ; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
52 ; GCN-NEXT: v_exp_f32_e32 v0, v0
53 ; GCN-NEXT: v_exp_f32_e32 v1, v1
54 ; GCN-NEXT: v_exp_f32_e32 v2, v2
55 ; GCN-NEXT: s_setpc_b64 s[30:31]
57 %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %arg0)
58 ret <3 x float> %result
61 define <4 x float> @v_exp_v4f32(<4 x float> %arg0) {
62 ; SI-LABEL: v_exp_v4f32:
64 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65 ; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
66 ; SI-NEXT: v_mul_f32_e32 v0, [[SREG]], v0
67 ; SI-NEXT: v_mul_f32_e32 v1, [[SREG]], v1
68 ; SI-NEXT: v_mul_f32_e32 v2, [[SREG]], v2
69 ; SI-NEXT: v_mul_f32_e32 v3, [[SREG]], v3
70 ; SI-NEXT: v_exp_f32_e32 v0, v0
71 ; SI-NEXT: v_exp_f32_e32 v1, v1
72 ; SI-NEXT: v_exp_f32_e32 v2, v2
73 ; SI-NEXT: v_exp_f32_e32 v3, v3
74 ; SI-NEXT: s_setpc_b64 s[30:31]
75 %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %arg0)
76 ret <4 x float> %result
79 define half @v_exp_f16(half %arg0) {
80 ; SI-LABEL: v_exp_f16:
82 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
84 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
85 ; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
86 ; SI-NEXT: v_exp_f32_e32 v0, v0
87 ; SI-NEXT: s_setpc_b64 s[30:31]
89 ; VI-LABEL: v_exp_f16:
91 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92 ; VI-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
93 ; VI-NEXT: v_exp_f16_e32 v0, v0
94 ; VI-NEXT: s_setpc_b64 s[30:31]
96 ; GFX9-LABEL: v_exp_f16:
98 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99 ; GFX9-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
100 ; GFX9-NEXT: v_exp_f16_e32 v0, v0
101 ; GFX9-NEXT: s_setpc_b64 s[30:31]
102 %result = call half @llvm.exp.f16(half %arg0)
106 define <2 x half> @v_exp_v2f16(<2 x half> %arg0) {
107 ; SI-LABEL: v_exp_v2f16:
109 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
111 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
112 ; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
113 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
114 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
115 ; SI-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
116 ; SI-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
117 ; SI-NEXT: v_exp_f32_e32 v0, v0
118 ; SI-NEXT: v_exp_f32_e32 v1, v1
119 ; SI-NEXT: s_setpc_b64 s[30:31]
121 ; VI-LABEL: v_exp_v2f16:
123 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; VI-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
125 ; VI-NEXT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
126 ; VI-NEXT: v_mul_f16_sdwa [[MUL1:v[0-9]+]], v{{[0-9]+}}, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
127 ; VI-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v{{[0-9]+}}
128 ; VI-NEXT: v_exp_f16_sdwa [[MUL1]], [[MUL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
129 ; VI-NEXT: v_exp_f16_e32 [[MUL2]], [[MUL2]]
130 ; VI-NEXT: v_or_b32_e32 v{{[0-9]+}}, [[MUL2]], [[MUL1]]
131 ; VI-NEXT: s_setpc_b64 s[30:31]
133 ; GFX9-LABEL: v_exp_v2f16:
135 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136 ; GFX9-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
137 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, [[SREG]] op_sel_hi:[1,0]
138 ; GFX9-NEXT: v_exp_f16_e32 v1, v0
139 ; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
140 ; GFX9-NEXT: v_pack_b32_f16 v0, v1, v0
141 ; GFX9-NEXT: s_setpc_b64 s[30:31]
142 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %arg0)
143 ret <2 x half> %result
146 ; define <3 x half> @v_exp_v3f16(<3 x half> %arg0) {
147 ; %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %arg0)
148 ; ret <3 x half> %result
151 define <4 x half> @v_exp_v4f16(<4 x half> %arg0) {
152 ; SI-LABEL: v_exp_v4f16:
154 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155 ; SI-NEXT: v_cvt_f16_f32_e32 v3, v3
156 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
157 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
158 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
159 ; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
160 ; SI-NEXT: v_cvt_f32_f16_e32 v3, v3
161 ; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
162 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
163 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
164 ; SI-NEXT: v_mul_f32_e32 v0, [[SREG]], v0
165 ; SI-NEXT: v_mul_f32_e32 v1, [[SREG]], v1
166 ; SI-NEXT: v_mul_f32_e32 v2, [[SREG]], v2
167 ; SI-NEXT: v_mul_f32_e32 v3, [[SREG]], v3
168 ; SI-NEXT: v_exp_f32_e32 v0, v0
169 ; SI-NEXT: v_exp_f32_e32 v1, v1
170 ; SI-NEXT: v_exp_f32_e32 v2, v2
171 ; SI-NEXT: v_exp_f32_e32 v3, v3
172 ; SI-NEXT: s_setpc_b64 s[30:31]
174 ; VI-LABEL: v_exp_v4f16:
176 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177 ; VI-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
178 ; VI-NEXT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
179 ; VI-NEXT: v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1
180 ; VI-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0
181 ; VI-NEXT: v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
182 ; VI-NEXT: v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
183 ; VI-NEXT: v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]]
184 ; VI-NEXT: v_exp_f16_sdwa [[EXP2:v[0-9]+]], v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
185 ; VI-NEXT: v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL2]]
186 ; VI-NEXT: v_exp_f16_sdwa [[EXP4:v[0-9]+]], v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
187 ; VI-NEXT: v_or_b32_e32 v1, [[EXP1]], [[EXP2]]
188 ; VI-NEXT: v_or_b32_e32 v0, [[EXP3]], [[EXP4]]
189 ; VI-NEXT: s_setpc_b64 s[30:31]
191 ; GFX9-LABEL: v_exp_v4f16:
193 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GFX9-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
195 ; GFX9-NEXT: v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1
196 ; GFX9-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0
197 ; GFX9-NEXT: v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
198 ; GFX9-NEXT: v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
199 ; GFX9-NEXT: v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]]
200 ; GFX9-NEXT: v_exp_f16_e32 [[EXP2:v[0-9]+]], [[MUL3]]
201 ; GFX9-NEXT: v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL2]]
202 ; GFX9-NEXT: v_exp_f16_e32 [[EXP4:v[0-9]+]], [[MUL4]]
203 ; GFX9-NEXT: v_pack_b32_f16 v1, [[EXP1]], [[EXP2]]
204 ; GFX9-NEXT: v_pack_b32_f16 v0, [[EXP3]], [[EXP4]]
205 ; GFX9-NEXT: s_setpc_b64 s[30:31]
206 %result = call <4 x half> @llvm.exp.v4f16(<4 x half> %arg0)
207 ret <4 x half> %result
210 declare float @llvm.exp.f32(float)
211 declare <2 x float> @llvm.exp.v2f32(<2 x float>)
212 declare <3 x float> @llvm.exp.v3f32(<3 x float>)
213 declare <4 x float> @llvm.exp.v4f32(<4 x float>)
215 declare half @llvm.exp.f16(half)
216 declare <2 x half> @llvm.exp.v2f16(<2 x half>)
217 declare <3 x half> @llvm.exp.v3f16(<3 x half>)
218 declare <4 x half> @llvm.exp.v4f16(<4 x half>)