1 ; ## Full FP16 support enabled by default.
\r
2 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
\r
3 ; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
\r
4 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 %s
\r
5 ; ## FP16 support explicitly disabled.
\r
6 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
\r
7 ; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
\r
8 ; RUN: -verify-machineinstrs \
\r
9 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
\r
10 ; ## FP16 is not supported by hardware.
\r
11 ; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
\r
12 ; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \
\r
13 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
\r
15 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
\r
17 ; CHECK-LABEL: test_ret_const(
\r
18 ; CHECK: mov.u32 [[T:%r[0-9+]]], 1073757184;
\r
19 ; CHECK: mov.b32 [[R:%hh[0-9+]]], [[T]];
\r
20 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
22 define <2 x half> @test_ret_const() #0 {
\r
23 ret <2 x half> <half 1.0, half 2.0>
\r
26 ; CHECK-LABEL: test_extract_0(
\r
27 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_0_param_0];
\r
28 ; CHECK: mov.b32 {[[R:%h[0-9]+]], %tmp_hi}, [[A]];
\r
29 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
\r
31 define half @test_extract_0(<2 x half> %a) #0 {
\r
32 %e = extractelement <2 x half> %a, i32 0
\r
36 ; CHECK-LABEL: test_extract_1(
\r
37 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_1_param_0];
\r
38 ; CHECK: mov.b32 {%tmp_lo, [[R:%h[0-9]+]]}, [[A]];
\r
39 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
\r
41 define half @test_extract_1(<2 x half> %a) #0 {
\r
42 %e = extractelement <2 x half> %a, i32 1
\r
46 ; CHECK-LABEL: test_extract_i(
\r
47 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_i_param_0];
\r
48 ; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1];
\r
49 ; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0;
\r
50 ; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]];
\r
51 ; CHECK: selp.b16 [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]];
\r
52 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
\r
54 define half @test_extract_i(<2 x half> %a, i64 %idx) #0 {
\r
55 %e = extractelement <2 x half> %a, i64 %idx
\r
59 ; CHECK-LABEL: test_fadd(
\r
60 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_param_0];
\r
61 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_param_1];
\r
63 ; CHECK-F16-NEXT: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
\r
65 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
66 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
67 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
68 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
69 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
70 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
71 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
\r
72 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
\r
73 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
\r
74 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
\r
75 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
77 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
79 define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 {
\r
80 %r = fadd <2 x half> %a, %b
\r
84 ; Check that we can lower fadd with immediate arguments.
\r
85 ; CHECK-LABEL: test_fadd_imm_0(
\r
86 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_imm_0_param_0];
\r
88 ; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184;
\r
89 ; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]];
\r
90 ; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[IHH]];
\r
92 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
93 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
94 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
95 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
\r
96 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
\r
97 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
\r
98 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
\r
99 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
101 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
103 define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 {
\r
104 %r = fadd <2 x half> <half 1.0, half 2.0>, %a
\r
108 ; CHECK-LABEL: test_fadd_imm_1(
\r
109 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_imm_1_param_0];
\r
111 ; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184;
\r
112 ; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]];
\r
113 ; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[IHH]];
\r
115 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
116 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
117 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
118 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
\r
119 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
\r
120 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
\r
121 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
\r
122 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
124 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
126 define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 {
\r
127 %r = fadd <2 x half> %a, <half 1.0, half 2.0>
\r
131 ; CHECK-LABEL: test_fsub(
\r
132 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fsub_param_0];
\r
134 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fsub_param_1];
\r
135 ; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
\r
137 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
138 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
139 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
140 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
141 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
142 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
143 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
\r
144 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
\r
145 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
\r
146 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
\r
147 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
149 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
151 define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 {
\r
152 %r = fsub <2 x half> %a, %b
\r
156 ; CHECK-LABEL: test_fneg(
\r
157 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fneg_param_0];
\r
159 ; CHECK-F16: mov.u32 [[I0:%r[0-9+]]], 0;
\r
160 ; CHECK-F16: mov.b32 [[IHH0:%hh[0-9+]]], [[I0]];
\r
161 ; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[IHH0]], [[A]];
\r
163 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
164 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
165 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
166 ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
\r
167 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[Z]], [[FA0]];
\r
168 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[Z]], [[FA1]];
\r
169 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
\r
170 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
\r
171 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
173 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
175 define <2 x half> @test_fneg(<2 x half> %a) #0 {
\r
176 %r = fsub <2 x half> <half 0.0, half 0.0>, %a
\r
180 ; CHECK-LABEL: test_fmul(
\r
181 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmul_param_0];
\r
182 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmul_param_1];
\r
183 ; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
\r
185 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
186 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
187 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
188 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
189 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
190 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
191 ; CHECK-NOF16-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
\r
192 ; CHECK-NOF16-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
\r
193 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
\r
194 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
\r
195 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
197 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
199 define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 {
\r
200 %r = fmul <2 x half> %a, %b
\r
204 ; CHECK-LABEL: test_fdiv(
\r
205 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fdiv_param_0];
\r
206 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fdiv_param_1];
\r
207 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
208 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
209 ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]];
\r
210 ; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]];
\r
211 ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]];
\r
212 ; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]];
\r
213 ; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
\r
214 ; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
\r
215 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]];
\r
216 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]];
\r
217 ; CHECK-NEXT: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
218 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
220 define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 {
\r
221 %r = fdiv <2 x half> %a, %b
\r
225 ; CHECK-LABEL: test_frem(
\r
226 ; -- Load two 16x2 inputs and split them into f16 elements
\r
227 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_frem_param_0];
\r
228 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_frem_param_1];
\r
229 ; -- Split into elements
\r
230 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
231 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
232 ; -- promote to f32.
\r
233 ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]];
\r
234 ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]];
\r
235 ; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]];
\r
236 ; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]];
\r
237 ; -- frem(a[0],b[0]).
\r
238 ; CHECK-DAG: div.rn.f32 [[FD0:%f[0-9]+]], [[FA0]], [[FB0]];
\r
239 ; CHECK-DAG: cvt.rzi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]];
\r
240 ; CHECK-DAG: mul.f32 [[RI0:%f[0-9]+]], [[DI0]], [[FB0]];
\r
241 ; CHECK-DAG: sub.f32 [[RF0:%f[0-9]+]], [[FA0]], [[RI0]];
\r
242 ; -- frem(a[1],b[1]).
\r
243 ; CHECK-DAG: div.rn.f32 [[FD1:%f[0-9]+]], [[FA1]], [[FB1]];
\r
244 ; CHECK-DAG: cvt.rzi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]];
\r
245 ; CHECK-DAG: mul.f32 [[RI1:%f[0-9]+]], [[DI1]], [[FB1]];
\r
246 ; CHECK-DAG: sub.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RI1]];
\r
247 ; -- convert back to f16.
\r
248 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
\r
249 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
\r
250 ; -- merge into f16x2 and return it.
\r
251 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
252 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
254 define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
\r
255 %r = frem <2 x half> %a, %b
\r
259 ; CHECK-LABEL: .func test_ldst_v2f16(
\r
260 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0];
\r
261 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1];
\r
262 ; CHECK-DAG: ld.b32 [[E:%hh[0-9]+]], [%[[A]]]
\r
263 ; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]];
\r
264 ; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]};
\r
266 define void @test_ldst_v2f16(<2 x half>* %a, <2 x half>* %b) {
\r
267 %t1 = load <2 x half>, <2 x half>* %a
\r
268 store <2 x half> %t1, <2 x half>* %b, align 16
\r
272 ; CHECK-LABEL: .func test_ldst_v3f16(
\r
273 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v3f16_param_0];
\r
274 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v3f16_param_1];
\r
275 ; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair
\r
276 ; number of bitshifting instructions that may change at llvm's whim.
\r
277 ; So we only verify that we only issue correct number of writes using
\r
278 ; correct offset, but not the values we write.
\r
279 ; CHECK-DAG: ld.u64
\r
280 ; CHECK-DAG: st.u32 [%[[B]]],
\r
281 ; CHECK-DAG: st.b16 [%[[B]]+4],
\r
283 define void @test_ldst_v3f16(<3 x half>* %a, <3 x half>* %b) {
\r
284 %t1 = load <3 x half>, <3 x half>* %a
\r
285 store <3 x half> %t1, <3 x half>* %b, align 16
\r
289 ; CHECK-LABEL: .func test_ldst_v4f16(
\r
290 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0];
\r
291 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1];
\r
292 ; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]];
\r
293 ; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
\r
295 define void @test_ldst_v4f16(<4 x half>* %a, <4 x half>* %b) {
\r
296 %t1 = load <4 x half>, <4 x half>* %a
\r
297 store <4 x half> %t1, <4 x half>* %b, align 16
\r
301 ; CHECK-LABEL: .func test_ldst_v8f16(
\r
302 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v8f16_param_0];
\r
303 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v8f16_param_1];
\r
304 ; CHECK-DAG: ld.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]];
\r
305 ; CHECK-DAG: st.v4.b32 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
\r
307 define void @test_ldst_v8f16(<8 x half>* %a, <8 x half>* %b) {
\r
308 %t1 = load <8 x half>, <8 x half>* %a
\r
309 store <8 x half> %t1, <8 x half>* %b, align 16
\r
313 declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0
\r
315 ; CHECK-LABEL: test_call(
\r
316 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_param_0];
\r
317 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_param_1];
\r
319 ; CHECK-DAG: .param .align 4 .b8 param0[4];
\r
320 ; CHECK-DAG: .param .align 4 .b8 param1[4];
\r
321 ; CHECK-DAG: st.param.b32 [param0+0], [[A]];
\r
322 ; CHECK-DAG: st.param.b32 [param1+0], [[B]];
\r
323 ; CHECK-DAG: .param .align 4 .b8 retval0[4];
\r
324 ; CHECK: call.uni (retval0),
\r
325 ; CHECK-NEXT: test_callee,
\r
327 ; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
\r
329 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
331 define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
\r
332 %r = call <2 x half> @test_callee(<2 x half> %a, <2 x half> %b)
\r
336 ; CHECK-LABEL: test_call_flipped(
\r
337 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_flipped_param_0];
\r
338 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_flipped_param_1];
\r
340 ; CHECK-DAG: .param .align 4 .b8 param0[4];
\r
341 ; CHECK-DAG: .param .align 4 .b8 param1[4];
\r
342 ; CHECK-DAG: st.param.b32 [param0+0], [[B]];
\r
343 ; CHECK-DAG: st.param.b32 [param1+0], [[A]];
\r
344 ; CHECK-DAG: .param .align 4 .b8 retval0[4];
\r
345 ; CHECK: call.uni (retval0),
\r
346 ; CHECK-NEXT: test_callee,
\r
348 ; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
\r
350 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
352 define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
\r
353 %r = call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
\r
357 ; CHECK-LABEL: test_tailcall_flipped(
\r
358 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_tailcall_flipped_param_0];
\r
359 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_tailcall_flipped_param_1];
\r
361 ; CHECK-DAG: .param .align 4 .b8 param0[4];
\r
362 ; CHECK-DAG: .param .align 4 .b8 param1[4];
\r
363 ; CHECK-DAG: st.param.b32 [param0+0], [[B]];
\r
364 ; CHECK-DAG: st.param.b32 [param1+0], [[A]];
\r
365 ; CHECK-DAG: .param .align 4 .b8 retval0[4];
\r
366 ; CHECK: call.uni (retval0),
\r
367 ; CHECK-NEXT: test_callee,
\r
369 ; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
\r
371 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
373 define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
\r
374 %r = tail call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
\r
378 ; CHECK-LABEL: test_select(
\r
379 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_param_0];
\r
380 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_param_1];
\r
381 ; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2]
\r
382 ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
\r
383 ; CHECK-NEXT: selp.b32 [[R:%hh[0-9]+]], [[A]], [[B]], [[PRED]];
\r
384 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
386 define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
\r
387 %r = select i1 %c, <2 x half> %a, <2 x half> %b
\r
391 ; CHECK-LABEL: test_select_cc(
\r
392 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_param_0];
\r
393 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_param_1];
\r
394 ; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_param_2];
\r
395 ; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_param_3];
\r
397 ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
\r
399 ; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
\r
400 ; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
\r
401 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
\r
402 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
\r
403 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
\r
404 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]];
\r
405 ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]]
\r
406 ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]]
\r
408 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
409 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
410 ; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
\r
411 ; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
\r
412 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
413 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
415 define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 {
\r
416 %cc = fcmp une <2 x half> %c, %d
\r
417 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
\r
421 ; CHECK-LABEL: test_select_cc_f32_f16(
\r
422 ; CHECK-DAG: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_0];
\r
423 ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_1];
\r
424 ; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_f32_f16_param_2];
\r
425 ; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3];
\r
427 ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
\r
428 ; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
\r
429 ; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
\r
430 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
\r
431 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
\r
432 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
\r
433 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]];
\r
434 ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]]
\r
435 ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]]
\r
437 ; CHECK-DAG: selp.f32 [[R0:%f[0-9]+]], [[A0]], [[B0]], [[P0]];
\r
438 ; CHECK-DAG: selp.f32 [[R1:%f[0-9]+]], [[A1]], [[B1]], [[P1]];
\r
439 ; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]};
\r
441 define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b,
\r
442 <2 x half> %c, <2 x half> %d) #0 {
\r
443 %cc = fcmp une <2 x half> %c, %d
\r
444 %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b
\r
448 ; CHECK-LABEL: test_select_cc_f16_f32(
\r
449 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_f16_f32_param_0];
\r
450 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_f16_f32_param_1];
\r
451 ; CHECK-DAG: ld.param.v2.f32 {[[C0:%f[0-9]+]], [[C1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_2];
\r
452 ; CHECK-DAG: ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3];
\r
453 ; CHECK-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[C0]], [[D0]]
\r
454 ; CHECK-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[C1]], [[D1]]
\r
455 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
456 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
457 ; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
\r
458 ; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
\r
459 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
460 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
\r
462 define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b,
\r
463 <2 x float> %c, <2 x float> %d) #0 {
\r
464 %cc = fcmp une <2 x float> %c, %d
\r
465 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
\r
469 ; CHECK-LABEL: test_fcmp_une(
\r
470 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_une_param_0];
\r
471 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_une_param_1];
\r
472 ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
473 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
474 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
475 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
476 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
477 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
478 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
479 ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
480 ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
481 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
482 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
483 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
484 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
486 define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 {
\r
487 %r = fcmp une <2 x half> %a, %b
\r
491 ; CHECK-LABEL: test_fcmp_ueq(
\r
492 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0];
\r
493 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1];
\r
494 ; CHECK-F16: setp.equ.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
495 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
496 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
497 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
498 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
499 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
500 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
501 ; CHECK-NOF16-DAG: setp.equ.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
502 ; CHECK-NOF16-DAG: setp.equ.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
503 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
504 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
505 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
506 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
508 define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 {
\r
509 %r = fcmp ueq <2 x half> %a, %b
\r
513 ; CHECK-LABEL: test_fcmp_ugt(
\r
514 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0];
\r
515 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1];
\r
516 ; CHECK-F16: setp.gtu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
517 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
518 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
519 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
520 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
521 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
522 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
523 ; CHECK-NOF16-DAG: setp.gtu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
524 ; CHECK-NOF16-DAG: setp.gtu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
525 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
526 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
527 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
528 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
530 define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 {
\r
531 %r = fcmp ugt <2 x half> %a, %b
\r
535 ; CHECK-LABEL: test_fcmp_uge(
\r
536 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uge_param_0];
\r
537 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uge_param_1];
\r
538 ; CHECK-F16: setp.geu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
539 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
540 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
541 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
542 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
543 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
544 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
545 ; CHECK-NOF16-DAG: setp.geu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
546 ; CHECK-NOF16-DAG: setp.geu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
547 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
548 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
549 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
550 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
552 define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 {
\r
553 %r = fcmp uge <2 x half> %a, %b
\r
557 ; CHECK-LABEL: test_fcmp_ult(
\r
558 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ult_param_0];
\r
559 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ult_param_1];
\r
560 ; CHECK-F16: setp.ltu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
561 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
562 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
563 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
564 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
565 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
566 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
567 ; CHECK-NOF16-DAG: setp.ltu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
568 ; CHECK-NOF16-DAG: setp.ltu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
569 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
570 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
571 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
572 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
574 define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 {
\r
575 %r = fcmp ult <2 x half> %a, %b
\r
579 ; CHECK-LABEL: test_fcmp_ule(
\r
580 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ule_param_0];
\r
581 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ule_param_1];
\r
582 ; CHECK-F16: setp.leu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
583 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
584 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
585 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
586 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
587 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
588 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
589 ; CHECK-NOF16-DAG: setp.leu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
590 ; CHECK-NOF16-DAG: setp.leu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
591 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
592 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
593 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
594 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
596 define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 {
\r
597 %r = fcmp ule <2 x half> %a, %b
\r
602 ; CHECK-LABEL: test_fcmp_uno(
\r
603 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uno_param_0];
\r
604 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uno_param_1];
\r
605 ; CHECK-F16: setp.nan.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
606 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
607 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
608 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
609 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
610 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
611 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
612 ; CHECK-NOF16-DAG: setp.nan.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
613 ; CHECK-NOF16-DAG: setp.nan.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
614 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
615 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
616 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
617 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
619 define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 {
\r
620 %r = fcmp uno <2 x half> %a, %b
\r
624 ; CHECK-LABEL: test_fcmp_one(
\r
625 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_one_param_0];
\r
626 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_one_param_1];
\r
627 ; CHECK-F16: setp.ne.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
628 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
629 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
630 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
631 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
632 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
633 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
634 ; CHECK-NOF16-DAG: setp.ne.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
635 ; CHECK-NOF16-DAG: setp.ne.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
636 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
637 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
638 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
639 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
641 define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 {
\r
642 %r = fcmp one <2 x half> %a, %b
\r
646 ; CHECK-LABEL: test_fcmp_oeq(
\r
647 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0];
\r
648 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1];
\r
649 ; CHECK-F16: setp.eq.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
650 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
651 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
652 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
653 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
654 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
655 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
656 ; CHECK-NOF16-DAG: setp.eq.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
657 ; CHECK-NOF16-DAG: setp.eq.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
658 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
659 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
660 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
661 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
663 define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 {
\r
664 %r = fcmp oeq <2 x half> %a, %b
\r
668 ; CHECK-LABEL: test_fcmp_ogt(
\r
669 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0];
\r
670 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1];
\r
671 ; CHECK-F16: setp.gt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
672 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
673 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
674 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
675 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
676 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
677 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
678 ; CHECK-NOF16-DAG: setp.gt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
679 ; CHECK-NOF16-DAG: setp.gt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
680 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
681 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
682 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
683 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
685 define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 {
\r
686 %r = fcmp ogt <2 x half> %a, %b
\r
690 ; CHECK-LABEL: test_fcmp_oge(
\r
691 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oge_param_0];
\r
692 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oge_param_1];
\r
693 ; CHECK-F16: setp.ge.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
694 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
695 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
696 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
697 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
698 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
699 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
700 ; CHECK-NOF16-DAG: setp.ge.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
701 ; CHECK-NOF16-DAG: setp.ge.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
702 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
703 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
704 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
705 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
707 define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 {
\r
708 %r = fcmp oge <2 x half> %a, %b
\r
712 ; CHECK-LABEL: test_fcmp_olt(
\r
713 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_olt_param_0];
\r
714 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_olt_param_1];
\r
715 ; CHECK-F16: setp.lt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
716 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
717 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
718 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
719 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
720 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
721 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
722 ; CHECK-NOF16-DAG: setp.lt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
723 ; CHECK-NOF16-DAG: setp.lt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
724 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
725 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
726 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
727 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
729 define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 {
\r
730 %r = fcmp olt <2 x half> %a, %b
\r
734 ; XCHECK-LABEL: test_fcmp_ole(
\r
735 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ole_param_0];
\r
736 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ole_param_1];
\r
737 ; CHECK-F16: setp.le.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
738 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
739 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
740 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
741 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
742 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
743 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
744 ; CHECK-NOF16-DAG: setp.le.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
745 ; CHECK-NOF16-DAG: setp.le.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
746 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
747 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
748 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
749 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
751 define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 {
\r
752 %r = fcmp ole <2 x half> %a, %b
\r
756 ; CHECK-LABEL: test_fcmp_ord(
\r
757 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ord_param_0];
\r
758 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ord_param_1];
\r
759 ; CHECK-F16: setp.num.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
\r
760 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
761 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
762 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
763 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
764 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
765 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
766 ; CHECK-NOF16-DAG: setp.num.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
\r
767 ; CHECK-NOF16-DAG: setp.num.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
\r
768 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
\r
769 ; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]];
\r
770 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
\r
771 ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]];
\r
773 define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 {
\r
774 %r = fcmp ord <2 x half> %a, %b
\r
778 ; CHECK-LABEL: test_fptosi_i32(
\r
779 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i32_param_0];
\r
780 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
781 ; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]];
\r
782 ; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]];
\r
783 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
\r
785 define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 {
\r
786 %r = fptosi <2 x half> %a to <2 x i32>
\r
790 ; CHECK-LABEL: test_fptosi_i64(
\r
791 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i64_param_0];
\r
792 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
793 ; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]];
\r
794 ; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]];
\r
795 ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
\r
797 define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 {
\r
798 %r = fptosi <2 x half> %a to <2 x i64>
\r
802 ; CHECK-LABEL: test_fptoui_2xi32(
\r
803 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0];
\r
804 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
805 ; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]];
\r
806 ; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]];
\r
807 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
\r
809 define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 {
\r
810 %r = fptoui <2 x half> %a to <2 x i32>
\r
814 ; CHECK-LABEL: test_fptoui_2xi64(
\r
815 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0];
\r
816 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
817 ; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]];
\r
818 ; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]];
\r
819 ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
\r
821 define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 {
\r
822 %r = fptoui <2 x half> %a to <2 x i64>
\r
826 ; CHECK-LABEL: test_uitofp_2xi32(
\r
827 ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0];
\r
828 ; CHECK-DAG: cvt.rn.f16.u32 [[R0:%h[0-9]+]], [[A0]];
\r
829 ; CHECK-DAG: cvt.rn.f16.u32 [[R1:%h[0-9]+]], [[A1]];
\r
830 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
831 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
833 define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 {
\r
834 %r = uitofp <2 x i32> %a to <2 x half>
\r
838 ; CHECK-LABEL: test_uitofp_2xi64(
\r
839 ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0];
\r
840 ; CHECK-DAG: cvt.rn.f16.u64 [[R0:%h[0-9]+]], [[A0]];
\r
841 ; CHECK-DAG: cvt.rn.f16.u64 [[R1:%h[0-9]+]], [[A1]];
\r
842 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
843 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
845 define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 {
\r
846 %r = uitofp <2 x i64> %a to <2 x half>
\r
850 ; CHECK-LABEL: test_sitofp_2xi32(
\r
851 ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0];
\r
852 ; CHECK-DAG: cvt.rn.f16.s32 [[R0:%h[0-9]+]], [[A0]];
\r
853 ; CHECK-DAG: cvt.rn.f16.s32 [[R1:%h[0-9]+]], [[A1]];
\r
854 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
855 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
857 define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 {
\r
858 %r = sitofp <2 x i32> %a to <2 x half>
\r
862 ; CHECK-LABEL: test_sitofp_2xi64(
\r
863 ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0];
\r
864 ; CHECK-DAG: cvt.rn.f16.s64 [[R0:%h[0-9]+]], [[A0]];
\r
865 ; CHECK-DAG: cvt.rn.f16.s64 [[R1:%h[0-9]+]], [[A1]];
\r
866 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
867 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
869 define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 {
\r
870 %r = sitofp <2 x i64> %a to <2 x half>
\r
874 ; CHECK-LABEL: test_uitofp_2xi32_fadd(
\r
875 ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0];
\r
876 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1];
\r
877 ; CHECK-DAG: cvt.rn.f16.u32 [[C0:%h[0-9]+]], [[A0]];
\r
878 ; CHECK-DAG: cvt.rn.f16.u32 [[C1:%h[0-9]+]], [[A1]];
\r
880 ; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
\r
881 ; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]];
\r
883 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
884 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
885 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
886 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
\r
887 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]]
\r
888 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
\r
889 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
\r
890 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
\r
891 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
\r
892 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
894 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
896 define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
\r
897 %c = uitofp <2 x i32> %a to <2 x half>
\r
898 %r = fadd <2 x half> %b, %c
\r
902 ; CHECK-LABEL: test_sitofp_2xi32_fadd(
\r
903 ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0];
\r
904 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1];
\r
905 ; CHECK-DAG: cvt.rn.f16.s32 [[C0:%h[0-9]+]], [[A0]];
\r
906 ; CHECK-DAG: cvt.rn.f16.s32 [[C1:%h[0-9]+]], [[A1]];
\r
908 ; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
\r
909 ; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]];
\r
911 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
912 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
913 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
914 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
\r
915 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]]
\r
916 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
\r
917 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
\r
918 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
\r
919 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
\r
920 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
922 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
924 define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
\r
925 %c = sitofp <2 x i32> %a to <2 x half>
\r
926 %r = fadd <2 x half> %b, %c
\r
930 ; CHECK-LABEL: test_fptrunc_2xfloat(
\r
931 ; CHECK: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0];
\r
932 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[A0]];
\r
933 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[A1]];
\r
934 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
935 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
937 define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
\r
938 %r = fptrunc <2 x float> %a to <2 x half>
\r
942 ; CHECK-LABEL: test_fptrunc_2xdouble(
\r
943 ; CHECK: ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0];
\r
944 ; CHECK-DAG: cvt.rn.f16.f64 [[R0:%h[0-9]+]], [[A0]];
\r
945 ; CHECK-DAG: cvt.rn.f16.f64 [[R1:%h[0-9]+]], [[A1]];
\r
946 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
947 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
949 define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 {
\r
950 %r = fptrunc <2 x double> %a to <2 x half>
\r
954 ; CHECK-LABEL: test_fpext_2xfloat(
\r
955 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0];
\r
956 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
957 ; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]];
\r
958 ; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]];
\r
959 ; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]};
\r
961 define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 {
\r
962 %r = fpext <2 x half> %a to <2 x float>
\r
966 ; CHECK-LABEL: test_fpext_2xdouble(
\r
967 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0];
\r
968 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
969 ; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]];
\r
970 ; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]];
\r
971 ; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]};
\r
973 define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 {
\r
974 %r = fpext <2 x half> %a to <2 x double>
\r
975 ret <2 x double> %r
\r
979 ; CHECK-LABEL: test_bitcast_2xhalf_to_2xi16(
\r
980 ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_bitcast_2xhalf_to_2xi16_param_0];
\r
981 ; CHECK-DAG: cvt.u16.u32 [[R0:%rs[0-9]+]], [[A]]
\r
982 ; CHECK-DAG: shr.u32 [[AH:%r[0-9]+]], [[A]], 16
\r
983 ; CHECK-DAG: cvt.u16.u32 [[R1:%rs[0-9]+]], [[AH]]
\r
984 ; CHECK: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}
\r
986 define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 {
\r
987 %r = bitcast <2 x half> %a to <2 x i16>
\r
991 ; CHECK-LABEL: test_bitcast_2xi16_to_2xhalf(
\r
992 ; CHECK: ld.param.v2.u16 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [test_bitcast_2xi16_to_2xhalf_param_0];
\r
993 ; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RS0]];
\r
994 ; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RS1]];
\r
995 ; CHECK-DAG: shl.b32 [[R1H:%r[0-9]+]], [[R1]], 16;
\r
996 ; CHECK-DAG: or.b32 [[R1H0L:%r[0-9]+]], [[R0]], [[R1H]];
\r
997 ; CHECK: mov.b32 [[R:%hh[0-9]+]], [[R1H0L]];
\r
998 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1000 define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 {
\r
1001 %r = bitcast <2 x i16> %a to <2 x half>
\r
1006 declare <2 x half> @llvm.sqrt.f16(<2 x half> %a) #0
\r
1007 declare <2 x half> @llvm.powi.f16.i32(<2 x half> %a, <2 x i32> %b) #0
\r
1008 declare <2 x half> @llvm.sin.f16(<2 x half> %a) #0
\r
1009 declare <2 x half> @llvm.cos.f16(<2 x half> %a) #0
\r
1010 declare <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) #0
\r
1011 declare <2 x half> @llvm.exp.f16(<2 x half> %a) #0
\r
1012 declare <2 x half> @llvm.exp2.f16(<2 x half> %a) #0
\r
1013 declare <2 x half> @llvm.log.f16(<2 x half> %a) #0
\r
1014 declare <2 x half> @llvm.log10.f16(<2 x half> %a) #0
\r
1015 declare <2 x half> @llvm.log2.f16(<2 x half> %a) #0
\r
1016 declare <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
\r
1017 declare <2 x half> @llvm.fabs.f16(<2 x half> %a) #0
\r
1018 declare <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) #0
\r
1019 declare <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) #0
\r
1020 declare <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) #0
\r
1021 declare <2 x half> @llvm.floor.f16(<2 x half> %a) #0
\r
1022 declare <2 x half> @llvm.ceil.f16(<2 x half> %a) #0
\r
1023 declare <2 x half> @llvm.trunc.f16(<2 x half> %a) #0
\r
1024 declare <2 x half> @llvm.rint.f16(<2 x half> %a) #0
\r
1025 declare <2 x half> @llvm.nearbyint.f16(<2 x half> %a) #0
\r
1026 declare <2 x half> @llvm.round.f16(<2 x half> %a) #0
\r
1027 declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
\r
1029 ; CHECK-LABEL: test_sqrt(
\r
1030 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sqrt_param_0];
\r
1031 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1032 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
\r
1033 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
\r
1034 ; CHECK-DAG: sqrt.rn.f32 [[RF0:%f[0-9]+]], [[AF0]];
\r
1035 ; CHECK-DAG: sqrt.rn.f32 [[RF1:%f[0-9]+]], [[AF1]];
\r
1036 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
\r
1037 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
\r
1038 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1039 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1041 define <2 x half> @test_sqrt(<2 x half> %a) #0 {
\r
1042 %r = call <2 x half> @llvm.sqrt.f16(<2 x half> %a)
\r
1046 ;;; Can't do this yet: requires libcall.
\r
1047 ; XCHECK-LABEL: test_powi(
\r
1048 ;define <2 x half> @test_powi(<2 x half> %a, <2 x i32> %b) #0 {
\r
1049 ; %r = call <2 x half> @llvm.powi.f16.i32(<2 x half> %a, <2 x i32> %b)
\r
1050 ; ret <2 x half> %r
\r
1053 ; CHECK-LABEL: test_sin(
\r
1054 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sin_param_0];
\r
1055 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1056 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
\r
1057 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
\r
1058 ; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
\r
1059 ; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
\r
1060 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
\r
1061 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
\r
1062 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1063 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1065 define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
\r
1066 %r = call <2 x half> @llvm.sin.f16(<2 x half> %a)
\r
1070 ; CHECK-LABEL: test_cos(
\r
1071 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_cos_param_0];
\r
1072 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1073 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
\r
1074 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
\r
1075 ; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
\r
1076 ; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
\r
1077 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
\r
1078 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
\r
1079 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1080 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1082 define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
\r
1083 %r = call <2 x half> @llvm.cos.f16(<2 x half> %a)
\r
1087 ;;; Can't do this yet: requires libcall.
\r
1088 ; XCHECK-LABEL: test_pow(
\r
1089 ;define <2 x half> @test_pow(<2 x half> %a, <2 x half> %b) #0 {
\r
1090 ; %r = call <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b)
\r
1091 ; ret <2 x half> %r
\r
1094 ;;; Can't do this yet: requires libcall.
\r
1095 ; XCHECK-LABEL: test_exp(
\r
1096 ;define <2 x half> @test_exp(<2 x half> %a) #0 {
\r
1097 ; %r = call <2 x half> @llvm.exp.f16(<2 x half> %a)
\r
1098 ; ret <2 x half> %r
\r
1101 ;;; Can't do this yet: requires libcall.
\r
1102 ; XCHECK-LABEL: test_exp2(
\r
1103 ;define <2 x half> @test_exp2(<2 x half> %a) #0 {
\r
1104 ; %r = call <2 x half> @llvm.exp2.f16(<2 x half> %a)
\r
1105 ; ret <2 x half> %r
\r
1108 ;;; Can't do this yet: requires libcall.
\r
1109 ; XCHECK-LABEL: test_log(
\r
1110 ;define <2 x half> @test_log(<2 x half> %a) #0 {
\r
1111 ; %r = call <2 x half> @llvm.log.f16(<2 x half> %a)
\r
1112 ; ret <2 x half> %r
\r
1115 ;;; Can't do this yet: requires libcall.
\r
1116 ; XCHECK-LABEL: test_log10(
\r
1117 ;define <2 x half> @test_log10(<2 x half> %a) #0 {
\r
1118 ; %r = call <2 x half> @llvm.log10.f16(<2 x half> %a)
\r
1119 ; ret <2 x half> %r
\r
1122 ;;; Can't do this yet: requires libcall.
\r
1123 ; XCHECK-LABEL: test_log2(
\r
1124 ;define <2 x half> @test_log2(<2 x half> %a) #0 {
\r
1125 ; %r = call <2 x half> @llvm.log2.f16(<2 x half> %a)
\r
1126 ; ret <2 x half> %r
\r
1129 ; CHECK-LABEL: test_fma(
\r
1130 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fma_param_0];
\r
1131 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fma_param_1];
\r
1132 ; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fma_param_2];
\r
1134 ; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]];
\r
1136 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1137 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
1138 ; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
\r
1139 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
1140 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
1141 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
\r
1142 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
1143 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
1144 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
\r
1145 ; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]];
\r
1146 ; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]];
\r
1147 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
\r
1148 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
\r
1149 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1151 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1153 define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
\r
1154 %r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
\r
1158 ; CHECK-LABEL: test_fabs(
\r
1159 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fabs_param_0];
\r
1160 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1161 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
\r
1162 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
\r
1163 ; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
\r
1164 ; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
\r
1165 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
\r
1166 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
\r
1167 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1168 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1170 define <2 x half> @test_fabs(<2 x half> %a) #0 {
\r
1171 %r = call <2 x half> @llvm.fabs.f16(<2 x half> %a)
\r
1175 ; CHECK-LABEL: test_minnum(
\r
1176 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_minnum_param_0];
\r
1177 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_minnum_param_1];
\r
1178 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1179 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
1180 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
\r
1181 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
\r
1182 ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]];
\r
1183 ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]];
\r
1184 ; CHECK-DAG: min.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
\r
1185 ; CHECK-DAG: min.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
\r
1186 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
\r
1187 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
\r
1188 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1189 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1191 define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 {
\r
1192 %r = call <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b)
\r
1196 ; CHECK-LABEL: test_maxnum(
\r
1197 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_maxnum_param_0];
\r
1198 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_maxnum_param_1];
\r
1199 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1200 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
1201 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
\r
1202 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
\r
1203 ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]];
\r
1204 ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]];
\r
1205 ; CHECK-DAG: max.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
\r
1206 ; CHECK-DAG: max.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
\r
1207 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
\r
1208 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
\r
1209 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1210 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1212 define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
\r
1213 %r = call <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b)
\r
1217 ; CHECK-LABEL: test_copysign(
\r
1218 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_param_0];
\r
1219 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_param_1];
\r
1220 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1221 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
1222 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
\r
1223 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
\r
1224 ; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]];
\r
1225 ; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]];
\r
1226 ; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767;
\r
1227 ; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767;
\r
1228 ; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768;
\r
1229 ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768;
\r
1230 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
\r
1231 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
\r
1232 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
\r
1233 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
\r
1234 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1235 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1237 define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
\r
1238 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
\r
1242 ; CHECK-LABEL: test_copysign_f32(
\r
1243 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f32_param_0];
\r
1244 ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1];
\r
1245 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1246 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
\r
1247 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
\r
1248 ; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
\r
1249 ; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
\r
1250 ; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767;
\r
1251 ; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767;
\r
1252 ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
\r
1253 ; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
\r
1254 ; CHECK-DAG: shr.u32 [[BY0:%r[0-9]+]], [[BX0]], 16;
\r
1255 ; CHECK-DAG: shr.u32 [[BY1:%r[0-9]+]], [[BX1]], 16;
\r
1256 ; CHECK-DAG: cvt.u16.u32 [[BZ0:%rs[0-9]+]], [[BY0]];
\r
1257 ; CHECK-DAG: cvt.u16.u32 [[BZ1:%rs[0-9]+]], [[BY1]];
\r
1258 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
\r
1259 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
\r
1260 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
\r
1261 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
\r
1262 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1263 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1265 define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
\r
1266 %tb = fptrunc <2 x float> %b to <2 x half>
\r
1267 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
\r
1271 ; CHECK-LABEL: test_copysign_f64(
\r
1272 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f64_param_0];
\r
1273 ; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1];
\r
1274 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1275 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
\r
1276 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
\r
1277 ; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
\r
1278 ; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
\r
1279 ; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767;
\r
1280 ; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767;
\r
1281 ; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
\r
1282 ; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
\r
1283 ; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
\r
1284 ; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
\r
1285 ; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
\r
1286 ; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
\r
1287 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
\r
1288 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
\r
1289 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
\r
1290 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
\r
1291 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1292 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1294 define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
\r
1295 %tb = fptrunc <2 x double> %b to <2 x half>
\r
1296 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
\r
1300 ; CHECK-LABEL: test_copysign_extended(
\r
1301 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_extended_param_0];
\r
1302 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_extended_param_1];
\r
1303 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1304 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
1305 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
\r
1306 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
\r
1307 ; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]];
\r
1308 ; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]];
\r
1309 ; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767;
\r
1310 ; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767;
\r
1311 ; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768;
\r
1312 ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768;
\r
1313 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
\r
1314 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
\r
1315 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
\r
1316 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
\r
1317 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1318 ; CHECK: mov.b32 {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]]
\r
1319 ; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]];
\r
1320 ; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]];
\r
1321 ; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]};
\r
1323 define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 {
\r
1324 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
\r
1325 %xr = fpext <2 x half> %r to <2 x float>
\r
1326 ret <2 x float> %xr
\r
1329 ; CHECK-LABEL: test_floor(
\r
1330 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_floor_param_0];
\r
1331 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
\r
1332 ; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
\r
1333 ; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
\r
1334 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1335 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1337 define <2 x half> @test_floor(<2 x half> %a) #0 {
\r
1338 %r = call <2 x half> @llvm.floor.f16(<2 x half> %a)
\r
1342 ; CHECK-LABEL: test_ceil(
\r
1343 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_ceil_param_0];
\r
1344 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
\r
1345 ; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
\r
1346 ; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
\r
1347 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1348 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1350 define <2 x half> @test_ceil(<2 x half> %a) #0 {
\r
1351 %r = call <2 x half> @llvm.ceil.f16(<2 x half> %a)
\r
1355 ; CHECK-LABEL: test_trunc(
\r
1356 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_trunc_param_0];
\r
1357 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
\r
1358 ; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
\r
1359 ; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
\r
1360 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1361 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1363 define <2 x half> @test_trunc(<2 x half> %a) #0 {
\r
1364 %r = call <2 x half> @llvm.trunc.f16(<2 x half> %a)
\r
1368 ; CHECK-LABEL: test_rint(
\r
1369 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_rint_param_0];
\r
1370 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
\r
1371 ; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
\r
1372 ; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
\r
1373 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1374 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1376 define <2 x half> @test_rint(<2 x half> %a) #0 {
\r
1377 %r = call <2 x half> @llvm.rint.f16(<2 x half> %a)
\r
1381 ; CHECK-LABEL: test_nearbyint(
\r
1382 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_nearbyint_param_0];
\r
1383 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
\r
1384 ; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
\r
1385 ; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
\r
1386 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1387 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1389 define <2 x half> @test_nearbyint(<2 x half> %a) #0 {
\r
1390 %r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a)
\r
1394 ; CHECK-LABEL: test_round(
\r
1395 ; CHECK: ld.param.b32 {{.*}}, [test_round_param_0];
\r
1396 ; check the use of sign mask and 0.5 to implement round
\r
1397 ; CHECK: and.b32 [[R1:%r[0-9]+]], {{.*}}, -2147483648;
\r
1398 ; CHECK: or.b32 {{.*}}, [[R1]], 1056964608;
\r
1399 ; CHECK: and.b32 [[R2:%r[0-9]+]], {{.*}}, -2147483648;
\r
1400 ; CHECK: or.b32 {{.*}}, [[R2]], 1056964608;
\r
1401 ; CHECK: st.param.b32 [func_retval0+0], {{.*}};
\r
1403 define <2 x half> @test_round(<2 x half> %a) #0 {
\r
1404 %r = call <2 x half> @llvm.round.f16(<2 x half> %a)
\r
1408 ; CHECK-LABEL: test_fmuladd(
\r
1409 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmuladd_param_0];
\r
1410 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmuladd_param_1];
\r
1411 ; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fmuladd_param_2];
\r
1413 ; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]];
\r
1415 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
\r
1416 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
\r
1417 ; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
\r
1418 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
\r
1419 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
\r
1420 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
\r
1421 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
\r
1422 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
\r
1423 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
\r
1424 ; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]];
\r
1425 ; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]];
\r
1426 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
\r
1427 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
\r
1428 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
\r
1430 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
\r
1432 define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
\r
1433 %r = call <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
\r
1437 ; CHECK-LABEL: test_shufflevector(
\r
1438 ; CHECK: mov.b32 {%h1, %h2}, %hh1;
\r
1439 ; CHECK: mov.b32 %hh2, {%h2, %h1};
\r
1440 define <2 x half> @test_shufflevector(<2 x half> %a) #0 {
\r
1441 %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
\r
1445 ; CHECK-LABEL: test_insertelement(
\r
1446 ; CHECK: mov.b32 {%h2, %tmp_hi}, %hh1;
\r
1447 ; CHECK: mov.b32 %hh2, {%h2, %h1};
\r
1448 define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 {
\r
1449 %i = insertelement <2 x half> %a, half %x, i64 1
\r
1453 attributes #0 = { nounwind }
\r
1454 attributes #1 = { "unsafe-fp-math" = "true" }
\r