1 ; ## Support i16x2 instructions
2 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 -asm-verbose=false \
3 ; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
4 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes COMMON,I16x2 %s
6 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
7 ; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
8 ; RUN: | %ptxas-verify -arch=sm_90 \
10 ; ## No support for i16x2 instructions
11 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
12 ; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
13 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes COMMON,NO-I16x2 %s
15 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
16 ; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
17 ; RUN: | %ptxas-verify -arch=sm_53 \
20 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
22 ; COMMON-LABEL: test_ret_const(
23 ; COMMON: mov.b32 [[R:%r[0-9+]]], 131073;
24 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
26 define <2 x i16> @test_ret_const() #0 {
27 ret <2 x i16> <i16 1, i16 2>
30 ; COMMON-LABEL: test_extract_0(
31 ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_extract_0_param_0];
32 ; COMMON: mov.b32 {[[RS:%rs[0-9]+]], tmp}, [[A]];
33 ; COMMON: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]];
34 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
36 define i16 @test_extract_0(<2 x i16> %a) #0 {
37 %e = extractelement <2 x i16> %a, i32 0
41 ; COMMON-LABEL: test_extract_1(
42 ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_extract_1_param_0];
43 ; COMMON: mov.b32 {tmp, [[RS:%rs[0-9]+]]}, [[A]];
44 ; COMMON: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]];
45 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
47 define i16 @test_extract_1(<2 x i16> %a) #0 {
48 %e = extractelement <2 x i16> %a, i32 1
52 ; COMMON-LABEL: test_extract_i(
53 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_extract_i_param_0];
54 ; COMMON-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1];
55 ; COMMON-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0;
56 ; COMMON-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[A]];
57 ; COMMON: selp.b16 [[RS:%rs[0-9]+]], [[E0]], [[E1]], [[PRED]];
58 ; COMMON: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]];
59 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
61 define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 {
62 %e = extractelement <2 x i16> %a, i64 %idx
66 ; COMMON-LABEL: test_add(
67 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_add_param_0];
68 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_add_param_1];
70 ; I16x2-NEXT: add.s16x2 [[R:%r[0-9]+]], [[A]], [[B]];
72 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
73 ; NO-I16x2-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
74 ; NO-I16x2-DAG: add.s16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
75 ; NO-I16x2-DAG: add.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
76 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
78 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
80 define <2 x i16> @test_add(<2 x i16> %a, <2 x i16> %b) #0 {
81 %r = add <2 x i16> %a, %b
85 ; Check that we can lower add with immediate arguments.
86 ; COMMON-LABEL: test_add_imm_0(
87 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_add_imm_0_param_0];
89 ; I16x2: mov.b32 [[I:%r[0-9+]]], 131073;
90 ; I16x2: add.s16x2 [[R:%r[0-9]+]], [[A]], [[I]];
92 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
93 ; NO-I16x2-DAG: add.s16 [[RS2:%rs[0-9]+]], [[RS0]], 1;
94 ; NO-I16x2-DAG: add.s16 [[RS3:%rs[0-9]+]], [[RS1]], 2;
95 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS2]], [[RS3]]};
97 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
99 define <2 x i16> @test_add_imm_0(<2 x i16> %a) #0 {
100 %r = add <2 x i16> <i16 1, i16 2>, %a
104 ; COMMON-LABEL: test_add_imm_1(
105 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_add_imm_1_param_0];
107 ; I16x2: mov.b32 [[I:%r[0-9+]]], 131073;
108 ; I16x2: add.s16x2 [[R:%r[0-9]+]], [[A]], [[I]];
110 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
111 ; NO-I16x2-DAG: add.s16 [[RS2:%rs[0-9]+]], [[RS0]], 1;
112 ; NO-I16x2-DAG: add.s16 [[RS3:%rs[0-9]+]], [[RS1]], 2;
113 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS2]], [[RS3]]};
115 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
117 define <2 x i16> @test_add_imm_1(<2 x i16> %a) #0 {
118 %r = add <2 x i16> %a, <i16 1, i16 2>
122 ; COMMON-LABEL: test_sub(
123 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sub_param_0];
125 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_sub_param_1];
127 ; COMMON-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
128 ; COMMON-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
129 ; COMMON-DAG: sub.s16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
130 ; COMMON-DAG: sub.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
131 ; COMMON-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
133 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
135 define <2 x i16> @test_sub(<2 x i16> %a, <2 x i16> %b) #0 {
136 %r = sub <2 x i16> %a, %b
140 ; COMMON-LABEL: test_smax(
141 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_smax_param_0];
143 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_smax_param_1];
144 ; I16x2: max.s16x2 [[R:%r[0-9]+]], [[A]], [[B]];
146 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
147 ; NO-I16x2-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
148 ; NO-I16x2-DAG: max.s16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
149 ; NO-I16x2-DAG: max.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
150 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
152 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
154 define <2 x i16> @test_smax(<2 x i16> %a, <2 x i16> %b) #0 {
155 %cmp = icmp sgt <2 x i16> %a, %b
156 %r = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
160 ; COMMON-LABEL: test_umax(
161 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_umax_param_0];
163 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_umax_param_1];
164 ; I16x2: max.u16x2 [[R:%r[0-9]+]], [[A]], [[B]];
166 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
167 ; NO-I16x2-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
168 ; NO-I16x2-DAG: max.u16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
169 ; NO-I16x2-DAG: max.u16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
170 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
172 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
174 define <2 x i16> @test_umax(<2 x i16> %a, <2 x i16> %b) #0 {
175 %cmp = icmp ugt <2 x i16> %a, %b
176 %r = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
180 ; COMMON-LABEL: test_smin(
181 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_smin_param_0];
183 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_smin_param_1];
184 ; I16x2: min.s16x2 [[R:%r[0-9]+]], [[A]], [[B]];
186 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
187 ; NO-I16x2-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
188 ; NO-I16x2-DAG: min.s16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
189 ; NO-I16x2-DAG: min.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
190 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
192 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
194 define <2 x i16> @test_smin(<2 x i16> %a, <2 x i16> %b) #0 {
195 %cmp = icmp sle <2 x i16> %a, %b
196 %r = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
200 ; COMMON-LABEL: test_umin(
201 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_umin_param_0];
203 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_umin_param_1];
204 ; I16x2: min.u16x2 [[R:%r[0-9]+]], [[A]], [[B]];
206 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
207 ; NO-I16x2-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
208 ; NO-I16x2-DAG: min.u16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
209 ; NO-I16x2-DAG: min.u16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
210 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
212 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
214 define <2 x i16> @test_umin(<2 x i16> %a, <2 x i16> %b) #0 {
215 %cmp = icmp ule <2 x i16> %a, %b
216 %r = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
220 ; COMMON-LABEL: test_mul(
221 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_mul_param_0];
222 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_mul_param_1];
224 ; COMMON-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
225 ; COMMON-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
226 ; COMMON-DAG: mul.lo.s16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
227 ; COMMON-DAG: mul.lo.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
228 ; COMMON-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
230 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
232 define <2 x i16> @test_mul(<2 x i16> %a, <2 x i16> %b) #0 {
233 %r = mul <2 x i16> %a, %b
237 ;; Logical ops are available on all GPUs as regular 32-bit logical ops
238 ; COMMON-LABEL: test_or(
239 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_or_param_0];
240 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_or_param_1];
241 ; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], [[B]];
242 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
244 define <2 x i16> @test_or(<2 x i16> %a, <2 x i16> %b) #0 {
245 %r = or <2 x i16> %a, %b
249 ; Ops that operate on computed arguments go though a different lowering path.
250 ; compared to the ones that operate on loaded data. So we test them separately.
251 ; COMMON-LABEL: test_or_computed(
252 ; COMMON: ld.param.u16 [[A:%rs[0-9+]]], [test_or_computed_param_0];
253 ; COMMON-DAG: mov.u16 [[C0:%rs[0-9]+]], 0;
254 ; COMMON-DAG: mov.b32 [[R1:%r[0-9]+]], {[[A]], [[C0]]};
255 ; COMMON-DAG: mov.u16 [[C5:%rs[0-9]+]], 5;
256 ; COMMON-DAG: mov.b32 [[R2:%r[0-9]+]], {[[A]], [[C5]]};
257 ; COMMON: or.b32 [[R:%r[0-9]+]], [[R2]], [[R1]];
258 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
259 define <2 x i16> @test_or_computed(i16 %a) {
260 %ins.0 = insertelement <2 x i16> zeroinitializer, i16 %a, i32 0
261 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1
262 %r = or <2 x i16> %ins.1, %ins.0
266 ; Check that we can lower or with immediate arguments.
267 ; COMMON-LABEL: test_or_imm_0(
268 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_or_imm_0_param_0];
269 ; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], 131073;
270 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
272 define <2 x i16> @test_or_imm_0(<2 x i16> %a) #0 {
273 %r = or <2 x i16> <i16 1, i16 2>, %a
277 ; COMMON-LABEL: test_or_imm_1(
278 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_or_imm_1_param_0];
279 ; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], 131073;
280 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
282 define <2 x i16> @test_or_imm_1(<2 x i16> %a) #0 {
283 %r = or <2 x i16> %a, <i16 1, i16 2>
287 ; COMMON-LABEL: test_xor(
288 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_xor_param_0];
289 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_xor_param_1];
290 ; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], [[B]];
291 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
293 define <2 x i16> @test_xor(<2 x i16> %a, <2 x i16> %b) #0 {
294 %r = xor <2 x i16> %a, %b
298 ; COMMON-LABEL: test_xor_computed(
299 ; COMMON: ld.param.u16 [[A:%rs[0-9+]]], [test_xor_computed_param_0];
300 ; COMMON-DAG: mov.u16 [[C0:%rs[0-9]+]], 0;
301 ; COMMON-DAG: mov.b32 [[R1:%r[0-9]+]], {[[A]], [[C0]]};
302 ; COMMON-DAG: mov.u16 [[C5:%rs[0-9]+]], 5;
303 ; COMMON-DAG: mov.b32 [[R2:%r[0-9]+]], {[[A]], [[C5]]};
304 ; COMMON: xor.b32 [[R:%r[0-9]+]], [[R2]], [[R1]];
305 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
306 define <2 x i16> @test_xor_computed(i16 %a) {
307 %ins.0 = insertelement <2 x i16> zeroinitializer, i16 %a, i32 0
308 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1
309 %r = xor <2 x i16> %ins.1, %ins.0
313 ; Check that we can lower xor with immediate arguments.
314 ; COMMON-LABEL: test_xor_imm_0(
315 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_xor_imm_0_param_0];
316 ; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], 131073;
317 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
319 define <2 x i16> @test_xor_imm_0(<2 x i16> %a) #0 {
320 %r = xor <2 x i16> <i16 1, i16 2>, %a
324 ; COMMON-LABEL: test_xor_imm_1(
325 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_xor_imm_1_param_0];
326 ; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], 131073;
327 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
329 define <2 x i16> @test_xor_imm_1(<2 x i16> %a) #0 {
330 %r = xor <2 x i16> %a, <i16 1, i16 2>
334 ; COMMON-LABEL: test_and(
335 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_and_param_0];
336 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_and_param_1];
337 ; COMMON-NEXT: and.b32 [[R:%r[0-9]+]], [[A]], [[B]];
338 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
340 define <2 x i16> @test_and(<2 x i16> %a, <2 x i16> %b) #0 {
341 %r = and <2 x i16> %a, %b
345 ; Ops that operate on computed arguments go though a different lowering path.
346 ; compared to the ones that operate on loaded data. So we test them separately.
347 ; COMMON-LABEL: test_and_computed(
348 ; COMMON: ld.param.u16 [[A:%rs[0-9+]]], [test_and_computed_param_0];
349 ; COMMON-DAG: mov.u16 [[C0:%rs[0-9]+]], 0;
350 ; COMMON-DAG: mov.b32 [[R1:%r[0-9]+]], {[[A]], [[C0]]};
351 ; COMMON-DAG: mov.u16 [[C5:%rs[0-9]+]], 5;
352 ; COMMON-DAG: mov.b32 [[R2:%r[0-9]+]], {[[A]], [[C5]]};
353 ; COMMON: and.b32 [[R:%r[0-9]+]], [[R2]], [[R1]];
354 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
355 define <2 x i16> @test_and_computed(i16 %a) {
356 %ins.0 = insertelement <2 x i16> zeroinitializer, i16 %a, i32 0
357 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1
358 %r = and <2 x i16> %ins.1, %ins.0
362 ; Check that we can lower and with immediate arguments.
363 ; COMMON-LABEL: test_and_imm_0(
364 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_and_imm_0_param_0];
365 ; COMMON-NEXT: and.b32 [[R:%r[0-9]+]], [[A]], 131073;
366 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
368 define <2 x i16> @test_and_imm_0(<2 x i16> %a) #0 {
369 %r = and <2 x i16> <i16 1, i16 2>, %a
373 ; COMMON-LABEL: test_and_imm_1(
374 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_and_imm_1_param_0];
375 ; COMMON-NEXT: and.b32 [[R:%r[0-9]+]], [[A]], 131073;
376 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
378 define <2 x i16> @test_and_imm_1(<2 x i16> %a) #0 {
379 %r = and <2 x i16> %a, <i16 1, i16 2>
383 ; COMMON-LABEL: .func test_ldst_v2i16(
384 ; COMMON-DAG: ld.param.u64 [[A:%rd[0-9]+]], [test_ldst_v2i16_param_0];
385 ; COMMON-DAG: ld.param.u64 [[B:%rd[0-9]+]], [test_ldst_v2i16_param_1];
386 ; COMMON-DAG: ld.u32 [[E:%r[0-9]+]], [[[A]]];
387 ; COMMON-DAG: st.u32 [[[B]]], [[E]];
389 define void @test_ldst_v2i16(ptr %a, ptr %b) {
390 %t1 = load <2 x i16>, ptr %a
391 store <2 x i16> %t1, ptr %b, align 16
395 ; COMMON-LABEL: .func test_ldst_v3i16(
396 ; COMMON-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v3i16_param_0];
397 ; COMMON-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v3i16_param_1];
398 ; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair
399 ; number of bitshifting instructions that may change at llvm's whim.
400 ; So we only verify that we only issue correct number of writes using
401 ; correct offset, but not the values we write.
403 ; COMMON-DAG: st.u32 [%[[B]]],
404 ; COMMON-DAG: st.u16 [%[[B]]+4],
406 define void @test_ldst_v3i16(ptr %a, ptr %b) {
407 %t1 = load <3 x i16>, ptr %a
408 store <3 x i16> %t1, ptr %b, align 16
412 ; COMMON-LABEL: .func test_ldst_v4i16(
413 ; COMMON-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4i16_param_0];
414 ; COMMON-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4i16_param_1];
415 ; COMMON-DAG: ld.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [%[[A]]];
416 ; COMMON-DAG: st.v4.u16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
418 define void @test_ldst_v4i16(ptr %a, ptr %b) {
419 %t1 = load <4 x i16>, ptr %a
420 store <4 x i16> %t1, ptr %b, align 16
424 ; COMMON-LABEL: .func test_ldst_v8i16(
425 ; COMMON-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v8i16_param_0];
426 ; COMMON-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v8i16_param_1];
427 ; COMMON-DAG: ld.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]];
428 ; COMMON-DAG: st.v4.b32 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
430 define void @test_ldst_v8i16(ptr %a, ptr %b) {
431 %t1 = load <8 x i16>, ptr %a
432 store <8 x i16> %t1, ptr %b, align 16
436 declare <2 x i16> @test_callee(<2 x i16> %a, <2 x i16> %b) #0
438 ; COMMON-LABEL: test_call(
439 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_call_param_0];
440 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_call_param_1];
442 ; COMMON-DAG: .param .align 4 .b8 param0[4];
443 ; COMMON-DAG: .param .align 4 .b8 param1[4];
444 ; COMMON-DAG: st.param.b32 [param0+0], [[A]];
445 ; COMMON-DAG: st.param.b32 [param1+0], [[B]];
446 ; COMMON-DAG: .param .align 4 .b8 retval0[4];
447 ; COMMON: call.uni (retval0),
448 ; COMMON-NEXT: test_callee,
450 ; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
452 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
454 define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 {
455 %r = call <2 x i16> @test_callee(<2 x i16> %a, <2 x i16> %b)
459 ; COMMON-LABEL: test_call_flipped(
460 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_call_flipped_param_0];
461 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_call_flipped_param_1];
463 ; COMMON-DAG: .param .align 4 .b8 param0[4];
464 ; COMMON-DAG: .param .align 4 .b8 param1[4];
465 ; COMMON-DAG: st.param.b32 [param0+0], [[B]];
466 ; COMMON-DAG: st.param.b32 [param1+0], [[A]];
467 ; COMMON-DAG: .param .align 4 .b8 retval0[4];
468 ; COMMON: call.uni (retval0),
469 ; COMMON-NEXT: test_callee,
471 ; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
473 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
475 define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
476 %r = call <2 x i16> @test_callee(<2 x i16> %b, <2 x i16> %a)
480 ; COMMON-LABEL: test_tailcall_flipped(
481 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_tailcall_flipped_param_0];
482 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_tailcall_flipped_param_1];
484 ; COMMON-DAG: .param .align 4 .b8 param0[4];
485 ; COMMON-DAG: .param .align 4 .b8 param1[4];
486 ; COMMON-DAG: st.param.b32 [param0+0], [[B]];
487 ; COMMON-DAG: st.param.b32 [param1+0], [[A]];
488 ; COMMON-DAG: .param .align 4 .b8 retval0[4];
489 ; COMMON: call.uni (retval0),
490 ; COMMON-NEXT: test_callee,
492 ; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
494 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
496 define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
497 %r = tail call <2 x i16> @test_callee(<2 x i16> %b, <2 x i16> %a)
501 ; COMMON-LABEL: test_select(
502 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_select_param_0];
503 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_select_param_1];
504 ; COMMON-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2]
505 ; COMMON-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
506 ; COMMON-NEXT: selp.b32 [[R:%r[0-9]+]], [[A]], [[B]], [[PRED]];
507 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
509 define <2 x i16> @test_select(<2 x i16> %a, <2 x i16> %b, i1 zeroext %c) #0 {
510 %r = select i1 %c, <2 x i16> %a, <2 x i16> %b
514 ; COMMON-LABEL: test_select_cc(
515 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_select_cc_param_0];
516 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_select_cc_param_1];
517 ; COMMON-DAG: ld.param.u32 [[C:%r[0-9]+]], [test_select_cc_param_2];
518 ; COMMON-DAG: ld.param.u32 [[D:%r[0-9]+]], [test_select_cc_param_3];
519 ; COMMON-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]]
520 ; COMMON-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]]
521 ; COMMON-DAG: setp.ne.s16 [[P0:%p[0-9]+]], [[C0]], [[D0]]
522 ; COMMON-DAG: setp.ne.s16 [[P1:%p[0-9]+]], [[C1]], [[D1]]
523 ; COMMON-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
524 ; COMMON-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
525 ; COMMON-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]];
526 ; COMMON-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]];
527 ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
528 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
530 define <2 x i16> @test_select_cc(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) #0 {
531 %cc = icmp ne <2 x i16> %c, %d
532 %r = select <2 x i1> %cc, <2 x i16> %a, <2 x i16> %b
536 ; COMMON-LABEL: test_select_cc_i32_i16(
537 ; COMMON-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_select_cc_i32_i16_param_0];
538 ; COMMON-DAG: ld.param.v2.u32 {[[B0:%r[0-9]+]], [[B1:%r[0-9]+]]}, [test_select_cc_i32_i16_param_1];
539 ; COMMON-DAG: ld.param.u32 [[C:%r[0-9]+]], [test_select_cc_i32_i16_param_2];
540 ; COMMON-DAG: ld.param.u32 [[D:%r[0-9]+]], [test_select_cc_i32_i16_param_3];
541 ; COMMON-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]]
542 ; COMMON-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]]
543 ; COMMON-DAG: setp.ne.s16 [[P0:%p[0-9]+]], [[C0]], [[D0]]
544 ; COMMON-DAG: setp.ne.s16 [[P1:%p[0-9]+]], [[C1]], [[D1]]
545 ; COMMON-DAG: selp.b32 [[R0:%r[0-9]+]], [[A0]], [[B0]], [[P0]];
546 ; COMMON-DAG: selp.b32 [[R1:%r[0-9]+]], [[A1]], [[B1]], [[P1]];
547 ; COMMON-NEXT: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]};
549 define <2 x i32> @test_select_cc_i32_i16(<2 x i32> %a, <2 x i32> %b,
550 <2 x i16> %c, <2 x i16> %d) #0 {
551 %cc = icmp ne <2 x i16> %c, %d
552 %r = select <2 x i1> %cc, <2 x i32> %a, <2 x i32> %b
556 ; COMMON-LABEL: test_select_cc_i16_i32(
557 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_select_cc_i16_i32_param_0];
558 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_select_cc_i16_i32_param_1];
559 ; COMMON-DAG: ld.param.v2.u32 {[[C0:%r[0-9]+]], [[C1:%r[0-9]+]]}, [test_select_cc_i16_i32_param_2];
560 ; COMMON-DAG: ld.param.v2.u32 {[[D0:%r[0-9]+]], [[D1:%r[0-9]+]]}, [test_select_cc_i16_i32_param_3];
561 ; COMMON-DAG: setp.ne.s32 [[P0:%p[0-9]+]], [[C0]], [[D0]]
562 ; COMMON-DAG: setp.ne.s32 [[P1:%p[0-9]+]], [[C1]], [[D1]]
563 ; COMMON-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
564 ; COMMON-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
565 ; COMMON-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]];
566 ; COMMON-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]];
567 ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
568 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
570 define <2 x i16> @test_select_cc_i16_i32(<2 x i16> %a, <2 x i16> %b,
571 <2 x i32> %c, <2 x i32> %d) #0 {
572 %cc = icmp ne <2 x i32> %c, %d
573 %r = select <2 x i1> %cc, <2 x i16> %a, <2 x i16> %b
578 ; COMMON-LABEL: test_trunc_2xi32(
579 ; COMMON: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_trunc_2xi32_param_0];
580 ; COMMON-DAG: cvt.u16.u32 [[R0:%rs[0-9]+]], [[A0]];
581 ; COMMON-DAG: cvt.u16.u32 [[R1:%rs[0-9]+]], [[A1]];
582 ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
583 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
585 define <2 x i16> @test_trunc_2xi32(<2 x i32> %a) #0 {
586 %r = trunc <2 x i32> %a to <2 x i16>
590 ; COMMON-LABEL: test_trunc_2xi64(
591 ; COMMON: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_trunc_2xi64_param_0];
592 ; COMMON-DAG: cvt.u16.u64 [[R0:%rs[0-9]+]], [[A0]];
593 ; COMMON-DAG: cvt.u16.u64 [[R1:%rs[0-9]+]], [[A1]];
594 ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
595 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
597 define <2 x i16> @test_trunc_2xi64(<2 x i64> %a) #0 {
598 %r = trunc <2 x i64> %a to <2 x i16>
602 ; COMMON-LABEL: test_zext_2xi32(
603 ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_zext_2xi32_param_0];
604 ; COMMON: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
605 ; COMMON-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[A0]];
606 ; COMMON-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[A1]];
607 ; COMMON-NEXT: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]};
609 define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 {
610 %r = zext <2 x i16> %a to <2 x i32>
614 ; COMMON-LABEL: test_zext_2xi64(
615 ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_zext_2xi64_param_0];
616 ; COMMON: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
617 ; COMMON-DAG: cvt.u64.u16 [[R0:%rd[0-9]+]], [[A0]];
618 ; COMMON-DAG: cvt.u64.u16 [[R1:%rd[0-9]+]], [[A1]];
619 ; COMMON-NEXT: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]};
621 define <2 x i64> @test_zext_2xi64(<2 x i16> %a) #0 {
622 %r = zext <2 x i16> %a to <2 x i64>
626 ; COMMON-LABEL: test_bitcast_i32_to_2xi16(
627 ; COMMON: ld.param.u32 [[R:%r[0-9]+]], [test_bitcast_i32_to_2xi16_param_0];
628 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
630 define <2 x i16> @test_bitcast_i32_to_2xi16(i32 %a) #0 {
631 %r = bitcast i32 %a to <2 x i16>
635 ; COMMON-LABEL: test_bitcast_2xi16_to_i32(
636 ; COMMON: ld.param.u32 [[R:%r[0-9]+]], [test_bitcast_2xi16_to_i32_param_0];
637 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
639 define i32 @test_bitcast_2xi16_to_i32(<2 x i16> %a) #0 {
640 %r = bitcast <2 x i16> %a to i32
644 ; COMMON-LABEL: test_bitcast_2xi16_to_2xhalf(
645 ; COMMON: ld.param.u16 [[RS1:%rs[0-9]+]], [test_bitcast_2xi16_to_2xhalf_param_0];
646 ; COMMON: mov.u16 [[RS2:%rs[0-9]+]], 5;
647 ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[RS1]], [[RS2]]};
648 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
650 define <2 x half> @test_bitcast_2xi16_to_2xhalf(i16 %a) #0 {
651 %ins.0 = insertelement <2 x i16> undef, i16 %a, i32 0
652 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1
653 %r = bitcast <2 x i16> %ins.1 to <2 x half>
658 ; COMMON-LABEL: test_shufflevector(
659 ; COMMON: ld.param.u32 [[R:%r[0-9]+]], [test_shufflevector_param_0];
660 ; COMMON: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[R]];
661 ; COMMON: mov.b32 [[R1:%r[0-9]+]], {[[RS1]], [[RS0]]};
662 ; COMMON: st.param.b32 [func_retval0+0], [[R1]];
664 define <2 x i16> @test_shufflevector(<2 x i16> %a) #0 {
665 %s = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
669 ; COMMON-LABEL: test_insertelement(
670 ; COMMON: ld.param.u16 [[B:%rs[0-9]+]], [test_insertelement_param_1];
671 ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_insertelement_param_0];
672 ; COMMON: { .reg .b16 tmp; mov.b32 {[[R0:%rs[0-9]+]], tmp}, [[A]]; }
673 ; COMMON: mov.b32 [[R1:%r[0-9]+]], {[[R0]], [[B]]};
674 ; COMMON: st.param.b32 [func_retval0+0], [[R1]];
676 define <2 x i16> @test_insertelement(<2 x i16> %a, i16 %x) #0 {
677 %i = insertelement <2 x i16> %a, i16 %x, i64 1
681 ; COMMON-LABEL: test_fptosi_2xhalf_to_2xi16(
682 ; COMMON: cvt.rzi.s16.f16
683 ; COMMON: cvt.rzi.s16.f16
685 define <2 x i16> @test_fptosi_2xhalf_to_2xi16(<2 x half> %a) #0 {
686 %r = fptosi <2 x half> %a to <2 x i16>
690 ; COMMON-LABEL: test_fptoui_2xhalf_to_2xi16(
691 ; COMMON: cvt.rzi.u16.f16
692 ; COMMON: cvt.rzi.u16.f16
694 define <2 x i16> @test_fptoui_2xhalf_to_2xi16(<2 x half> %a) #0 {
695 %r = fptoui <2 x half> %a to <2 x i16>
699 attributes #0 = { nounwind }