1 ; ## Support i16x2 instructions
2 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 -asm-verbose=false \
3 ; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
4 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes COMMON,I16x2 %s
6 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
7 ; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
8 ; RUN: | %ptxas-verify -arch=sm_90 \
10 ; ## No support for i16x2 instructions
11 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
12 ; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
13 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes COMMON,NO-I16x2 %s
15 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
16 ; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
17 ; RUN: | %ptxas-verify -arch=sm_53 \
20 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
22 ; COMMON-LABEL: test_ret_const(
23 ; COMMON: mov.b32 [[R:%r[0-9+]]], 131073;
24 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
26 define <2 x i16> @test_ret_const() #0 {
27 ret <2 x i16> <i16 1, i16 2>
30 ; COMMON-LABEL: test_extract_0(
31 ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_extract_0_param_0];
32 ; COMMON: mov.b32 {[[RS:%rs[0-9]+]], tmp}, [[A]];
33 ; COMMON: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]];
34 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
36 define i16 @test_extract_0(<2 x i16> %a) #0 {
37 %e = extractelement <2 x i16> %a, i32 0
41 ; COMMON-LABEL: test_extract_1(
42 ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_extract_1_param_0];
43 ; COMMON: mov.b32 {tmp, [[RS:%rs[0-9]+]]}, [[A]];
44 ; COMMON: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]];
45 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
47 define i16 @test_extract_1(<2 x i16> %a) #0 {
48 %e = extractelement <2 x i16> %a, i32 1
52 ; COMMON-LABEL: test_extract_i(
53 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_extract_i_param_0];
54 ; COMMON-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1];
55 ; COMMON-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0;
56 ; COMMON-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[A]];
57 ; COMMON: selp.b16 [[RS:%rs[0-9]+]], [[E0]], [[E1]], [[PRED]];
58 ; COMMON: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]];
59 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
61 define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 {
62 %e = extractelement <2 x i16> %a, i64 %idx
66 ; COMMON-LABEL: test_add(
67 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_add_param_0];
68 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_add_param_1];
70 ; I16x2-NEXT: add.s16x2 [[R:%r[0-9]+]], [[A]], [[B]];
72 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
73 ; NO-I16x2-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
74 ; NO-I16x2-DAG: add.s16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
75 ; NO-I16x2-DAG: add.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
76 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
78 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
80 define <2 x i16> @test_add(<2 x i16> %a, <2 x i16> %b) #0 {
81 %r = add <2 x i16> %a, %b
85 ; Check that we can lower add with immediate arguments.
86 ; COMMON-LABEL: test_add_imm_0(
87 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_add_imm_0_param_0];
89 ; I16x2: mov.b32 [[I:%r[0-9+]]], 131073;
90 ; I16x2: add.s16x2 [[R:%r[0-9]+]], [[A]], [[I]];
92 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
93 ; NO-I16x2-DAG: add.s16 [[RS2:%rs[0-9]+]], [[RS0]], 1;
94 ; NO-I16x2-DAG: add.s16 [[RS3:%rs[0-9]+]], [[RS1]], 2;
95 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS2]], [[RS3]]};
97 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
99 define <2 x i16> @test_add_imm_0(<2 x i16> %a) #0 {
100 %r = add <2 x i16> <i16 1, i16 2>, %a
104 ; COMMON-LABEL: test_add_imm_1(
105 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_add_imm_1_param_0];
107 ; I16x2: mov.b32 [[I:%r[0-9+]]], 131073;
108 ; I16x2: add.s16x2 [[R:%r[0-9]+]], [[A]], [[I]];
110 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
111 ; NO-I16x2-DAG: add.s16 [[RS2:%rs[0-9]+]], [[RS0]], 1;
112 ; NO-I16x2-DAG: add.s16 [[RS3:%rs[0-9]+]], [[RS1]], 2;
113 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS2]], [[RS3]]};
115 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
117 define <2 x i16> @test_add_imm_1(<2 x i16> %a) #0 {
118 %r = add <2 x i16> %a, <i16 1, i16 2>
122 ; COMMON-LABEL: test_sub(
123 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sub_param_0];
125 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_sub_param_1];
126 ; I16x2: sub.s16x2 [[R:%r[0-9]+]], [[A]], [[B]];
128 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
129 ; NO-I16x2-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
130 ; NO-I16x2-DAG: sub.s16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
131 ; NO-I16x2-DAG: sub.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
132 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
134 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
136 define <2 x i16> @test_sub(<2 x i16> %a, <2 x i16> %b) #0 {
137 %r = sub <2 x i16> %a, %b
141 ; COMMON-LABEL: test_smax(
142 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_smax_param_0];
144 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_smax_param_1];
145 ; I16x2: max.s16x2 [[R:%r[0-9]+]], [[A]], [[B]];
147 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
148 ; NO-I16x2-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
149 ; NO-I16x2-DAG: max.s16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
150 ; NO-I16x2-DAG: max.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
151 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
153 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
155 define <2 x i16> @test_smax(<2 x i16> %a, <2 x i16> %b) #0 {
156 %cmp = icmp sgt <2 x i16> %a, %b
157 %r = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
161 ; COMMON-LABEL: test_umax(
162 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_umax_param_0];
164 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_umax_param_1];
165 ; I16x2: max.u16x2 [[R:%r[0-9]+]], [[A]], [[B]];
167 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
168 ; NO-I16x2-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
169 ; NO-I16x2-DAG: max.u16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
170 ; NO-I16x2-DAG: max.u16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
171 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
173 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
175 define <2 x i16> @test_umax(<2 x i16> %a, <2 x i16> %b) #0 {
176 %cmp = icmp ugt <2 x i16> %a, %b
177 %r = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
181 ; COMMON-LABEL: test_smin(
182 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_smin_param_0];
184 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_smin_param_1];
185 ; I16x2: min.s16x2 [[R:%r[0-9]+]], [[A]], [[B]];
187 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
188 ; NO-I16x2-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
189 ; NO-I16x2-DAG: min.s16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
190 ; NO-I16x2-DAG: min.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
191 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
193 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
195 define <2 x i16> @test_smin(<2 x i16> %a, <2 x i16> %b) #0 {
196 %cmp = icmp sle <2 x i16> %a, %b
197 %r = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
201 ; COMMON-LABEL: test_umin(
202 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_umin_param_0];
204 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_umin_param_1];
205 ; I16x2: min.u16x2 [[R:%r[0-9]+]], [[A]], [[B]];
207 ; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
208 ; NO-I16x2-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
209 ; NO-I16x2-DAG: min.u16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
210 ; NO-I16x2-DAG: min.u16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
211 ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
213 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
215 define <2 x i16> @test_umin(<2 x i16> %a, <2 x i16> %b) #0 {
216 %cmp = icmp ule <2 x i16> %a, %b
217 %r = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
221 ; COMMON-LABEL: test_mul(
222 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_mul_param_0];
223 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_mul_param_1];
225 ; COMMON-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
226 ; COMMON-DAG: mov.b32 {[[RS2:%rs[0-9]+]], [[RS3:%rs[0-9]+]]}, [[B]];
227 ; COMMON-DAG: mul.lo.s16 [[RS4:%rs[0-9]+]], [[RS0]], [[RS2]];
228 ; COMMON-DAG: mul.lo.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]];
229 ; COMMON-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]};
231 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
233 define <2 x i16> @test_mul(<2 x i16> %a, <2 x i16> %b) #0 {
234 %r = mul <2 x i16> %a, %b
238 ;; Logical ops are available on all GPUs as regular 32-bit logical ops
239 ; COMMON-LABEL: test_or(
240 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_or_param_0];
241 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_or_param_1];
242 ; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], [[B]];
243 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
245 define <2 x i16> @test_or(<2 x i16> %a, <2 x i16> %b) #0 {
246 %r = or <2 x i16> %a, %b
250 ; Ops that operate on computed arguments go though a different lowering path.
251 ; compared to the ones that operate on loaded data. So we test them separately.
252 ; COMMON-LABEL: test_or_computed(
253 ; COMMON: ld.param.u16 [[A:%rs[0-9+]]], [test_or_computed_param_0];
254 ; COMMON-DAG: mov.u16 [[C0:%rs[0-9]+]], 0;
255 ; COMMON-DAG: mov.b32 [[R1:%r[0-9]+]], {[[A]], [[C0]]};
256 ; COMMON-DAG: mov.u16 [[C5:%rs[0-9]+]], 5;
257 ; COMMON-DAG: mov.b32 [[R2:%r[0-9]+]], {[[A]], [[C5]]};
258 ; COMMON: or.b32 [[R:%r[0-9]+]], [[R2]], [[R1]];
259 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
260 define <2 x i16> @test_or_computed(i16 %a) {
261 %ins.0 = insertelement <2 x i16> zeroinitializer, i16 %a, i32 0
262 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1
263 %r = or <2 x i16> %ins.1, %ins.0
267 ; Check that we can lower or with immediate arguments.
268 ; COMMON-LABEL: test_or_imm_0(
269 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_or_imm_0_param_0];
270 ; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], 131073;
271 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
273 define <2 x i16> @test_or_imm_0(<2 x i16> %a) #0 {
274 %r = or <2 x i16> <i16 1, i16 2>, %a
278 ; COMMON-LABEL: test_or_imm_1(
279 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_or_imm_1_param_0];
280 ; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], 131073;
281 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
283 define <2 x i16> @test_or_imm_1(<2 x i16> %a) #0 {
284 %r = or <2 x i16> %a, <i16 1, i16 2>
288 ; COMMON-LABEL: test_xor(
289 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_xor_param_0];
290 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_xor_param_1];
291 ; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], [[B]];
292 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
294 define <2 x i16> @test_xor(<2 x i16> %a, <2 x i16> %b) #0 {
295 %r = xor <2 x i16> %a, %b
299 ; COMMON-LABEL: test_xor_computed(
300 ; COMMON: ld.param.u16 [[A:%rs[0-9+]]], [test_xor_computed_param_0];
301 ; COMMON-DAG: mov.u16 [[C0:%rs[0-9]+]], 0;
302 ; COMMON-DAG: mov.b32 [[R1:%r[0-9]+]], {[[A]], [[C0]]};
303 ; COMMON-DAG: mov.u16 [[C5:%rs[0-9]+]], 5;
304 ; COMMON-DAG: mov.b32 [[R2:%r[0-9]+]], {[[A]], [[C5]]};
305 ; COMMON: xor.b32 [[R:%r[0-9]+]], [[R2]], [[R1]];
306 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
307 define <2 x i16> @test_xor_computed(i16 %a) {
308 %ins.0 = insertelement <2 x i16> zeroinitializer, i16 %a, i32 0
309 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1
310 %r = xor <2 x i16> %ins.1, %ins.0
314 ; Check that we can lower xor with immediate arguments.
315 ; COMMON-LABEL: test_xor_imm_0(
316 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_xor_imm_0_param_0];
317 ; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], 131073;
318 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
320 define <2 x i16> @test_xor_imm_0(<2 x i16> %a) #0 {
321 %r = xor <2 x i16> <i16 1, i16 2>, %a
325 ; COMMON-LABEL: test_xor_imm_1(
326 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_xor_imm_1_param_0];
327 ; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], 131073;
328 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
330 define <2 x i16> @test_xor_imm_1(<2 x i16> %a) #0 {
331 %r = xor <2 x i16> %a, <i16 1, i16 2>
335 ; COMMON-LABEL: test_and(
336 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_and_param_0];
337 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_and_param_1];
338 ; COMMON-NEXT: and.b32 [[R:%r[0-9]+]], [[A]], [[B]];
339 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
341 define <2 x i16> @test_and(<2 x i16> %a, <2 x i16> %b) #0 {
342 %r = and <2 x i16> %a, %b
346 ; Ops that operate on computed arguments go though a different lowering path.
347 ; compared to the ones that operate on loaded data. So we test them separately.
348 ; COMMON-LABEL: test_and_computed(
349 ; COMMON: ld.param.u16 [[A:%rs[0-9+]]], [test_and_computed_param_0];
350 ; COMMON-DAG: mov.u16 [[C0:%rs[0-9]+]], 0;
351 ; COMMON-DAG: mov.b32 [[R1:%r[0-9]+]], {[[A]], [[C0]]};
352 ; COMMON-DAG: mov.u16 [[C5:%rs[0-9]+]], 5;
353 ; COMMON-DAG: mov.b32 [[R2:%r[0-9]+]], {[[A]], [[C5]]};
354 ; COMMON: and.b32 [[R:%r[0-9]+]], [[R2]], [[R1]];
355 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
356 define <2 x i16> @test_and_computed(i16 %a) {
357 %ins.0 = insertelement <2 x i16> zeroinitializer, i16 %a, i32 0
358 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1
359 %r = and <2 x i16> %ins.1, %ins.0
363 ; Check that we can lower and with immediate arguments.
364 ; COMMON-LABEL: test_and_imm_0(
365 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_and_imm_0_param_0];
366 ; COMMON-NEXT: and.b32 [[R:%r[0-9]+]], [[A]], 131073;
367 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
369 define <2 x i16> @test_and_imm_0(<2 x i16> %a) #0 {
370 %r = and <2 x i16> <i16 1, i16 2>, %a
374 ; COMMON-LABEL: test_and_imm_1(
375 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_and_imm_1_param_0];
376 ; COMMON-NEXT: and.b32 [[R:%r[0-9]+]], [[A]], 131073;
377 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
379 define <2 x i16> @test_and_imm_1(<2 x i16> %a) #0 {
380 %r = and <2 x i16> %a, <i16 1, i16 2>
384 ; COMMON-LABEL: .func test_ldst_v2i16(
385 ; COMMON-DAG: ld.param.u64 [[A:%rd[0-9]+]], [test_ldst_v2i16_param_0];
386 ; COMMON-DAG: ld.param.u64 [[B:%rd[0-9]+]], [test_ldst_v2i16_param_1];
387 ; COMMON-DAG: ld.u32 [[E:%r[0-9]+]], [[[A]]];
388 ; COMMON-DAG: st.u32 [[[B]]], [[E]];
390 define void @test_ldst_v2i16(ptr %a, ptr %b) {
391 %t1 = load <2 x i16>, ptr %a
392 store <2 x i16> %t1, ptr %b, align 16
396 ; COMMON-LABEL: .func test_ldst_v3i16(
397 ; COMMON-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v3i16_param_0];
398 ; COMMON-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v3i16_param_1];
399 ; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair
400 ; number of bitshifting instructions that may change at llvm's whim.
401 ; So we only verify that we only issue correct number of writes using
402 ; correct offset, but not the values we write.
404 ; COMMON-DAG: st.u32 [%[[B]]],
405 ; COMMON-DAG: st.u16 [%[[B]]+4],
407 define void @test_ldst_v3i16(ptr %a, ptr %b) {
408 %t1 = load <3 x i16>, ptr %a
409 store <3 x i16> %t1, ptr %b, align 16
413 ; COMMON-LABEL: .func test_ldst_v4i16(
414 ; COMMON-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4i16_param_0];
415 ; COMMON-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4i16_param_1];
416 ; COMMON-DAG: ld.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [%[[A]]];
417 ; COMMON-DAG: st.v4.u16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
419 define void @test_ldst_v4i16(ptr %a, ptr %b) {
420 %t1 = load <4 x i16>, ptr %a
421 store <4 x i16> %t1, ptr %b, align 16
425 ; COMMON-LABEL: .func test_ldst_v8i16(
426 ; COMMON-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v8i16_param_0];
427 ; COMMON-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v8i16_param_1];
428 ; COMMON-DAG: ld.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]];
429 ; COMMON-DAG: st.v4.b32 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
431 define void @test_ldst_v8i16(ptr %a, ptr %b) {
432 %t1 = load <8 x i16>, ptr %a
433 store <8 x i16> %t1, ptr %b, align 16
437 declare <2 x i16> @test_callee(<2 x i16> %a, <2 x i16> %b) #0
439 ; COMMON-LABEL: test_call(
440 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_call_param_0];
441 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_call_param_1];
443 ; COMMON-DAG: .param .align 4 .b8 param0[4];
444 ; COMMON-DAG: .param .align 4 .b8 param1[4];
445 ; COMMON-DAG: st.param.b32 [param0+0], [[A]];
446 ; COMMON-DAG: st.param.b32 [param1+0], [[B]];
447 ; COMMON-DAG: .param .align 4 .b8 retval0[4];
448 ; COMMON: call.uni (retval0),
449 ; COMMON-NEXT: test_callee,
451 ; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
453 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
455 define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 {
456 %r = call <2 x i16> @test_callee(<2 x i16> %a, <2 x i16> %b)
460 ; COMMON-LABEL: test_call_flipped(
461 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_call_flipped_param_0];
462 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_call_flipped_param_1];
464 ; COMMON-DAG: .param .align 4 .b8 param0[4];
465 ; COMMON-DAG: .param .align 4 .b8 param1[4];
466 ; COMMON-DAG: st.param.b32 [param0+0], [[B]];
467 ; COMMON-DAG: st.param.b32 [param1+0], [[A]];
468 ; COMMON-DAG: .param .align 4 .b8 retval0[4];
469 ; COMMON: call.uni (retval0),
470 ; COMMON-NEXT: test_callee,
472 ; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
474 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
476 define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
477 %r = call <2 x i16> @test_callee(<2 x i16> %b, <2 x i16> %a)
481 ; COMMON-LABEL: test_tailcall_flipped(
482 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_tailcall_flipped_param_0];
483 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_tailcall_flipped_param_1];
485 ; COMMON-DAG: .param .align 4 .b8 param0[4];
486 ; COMMON-DAG: .param .align 4 .b8 param1[4];
487 ; COMMON-DAG: st.param.b32 [param0+0], [[B]];
488 ; COMMON-DAG: st.param.b32 [param1+0], [[A]];
489 ; COMMON-DAG: .param .align 4 .b8 retval0[4];
490 ; COMMON: call.uni (retval0),
491 ; COMMON-NEXT: test_callee,
493 ; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
495 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
497 define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
498 %r = tail call <2 x i16> @test_callee(<2 x i16> %b, <2 x i16> %a)
502 ; COMMON-LABEL: test_select(
503 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_select_param_0];
504 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_select_param_1];
505 ; COMMON-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2]
506 ; COMMON-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
507 ; COMMON-NEXT: selp.b32 [[R:%r[0-9]+]], [[A]], [[B]], [[PRED]];
508 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
510 define <2 x i16> @test_select(<2 x i16> %a, <2 x i16> %b, i1 zeroext %c) #0 {
511 %r = select i1 %c, <2 x i16> %a, <2 x i16> %b
515 ; COMMON-LABEL: test_select_cc(
516 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_select_cc_param_0];
517 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_select_cc_param_1];
518 ; COMMON-DAG: ld.param.u32 [[C:%r[0-9]+]], [test_select_cc_param_2];
519 ; COMMON-DAG: ld.param.u32 [[D:%r[0-9]+]], [test_select_cc_param_3];
520 ; COMMON-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]]
521 ; COMMON-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]]
522 ; COMMON-DAG: setp.ne.s16 [[P0:%p[0-9]+]], [[C0]], [[D0]]
523 ; COMMON-DAG: setp.ne.s16 [[P1:%p[0-9]+]], [[C1]], [[D1]]
524 ; COMMON-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
525 ; COMMON-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
526 ; COMMON-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]];
527 ; COMMON-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]];
528 ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
529 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
531 define <2 x i16> @test_select_cc(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) #0 {
532 %cc = icmp ne <2 x i16> %c, %d
533 %r = select <2 x i1> %cc, <2 x i16> %a, <2 x i16> %b
537 ; COMMON-LABEL: test_select_cc_i32_i16(
538 ; COMMON-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_select_cc_i32_i16_param_0];
539 ; COMMON-DAG: ld.param.v2.u32 {[[B0:%r[0-9]+]], [[B1:%r[0-9]+]]}, [test_select_cc_i32_i16_param_1];
540 ; COMMON-DAG: ld.param.u32 [[C:%r[0-9]+]], [test_select_cc_i32_i16_param_2];
541 ; COMMON-DAG: ld.param.u32 [[D:%r[0-9]+]], [test_select_cc_i32_i16_param_3];
542 ; COMMON-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]]
543 ; COMMON-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]]
544 ; COMMON-DAG: setp.ne.s16 [[P0:%p[0-9]+]], [[C0]], [[D0]]
545 ; COMMON-DAG: setp.ne.s16 [[P1:%p[0-9]+]], [[C1]], [[D1]]
546 ; COMMON-DAG: selp.b32 [[R0:%r[0-9]+]], [[A0]], [[B0]], [[P0]];
547 ; COMMON-DAG: selp.b32 [[R1:%r[0-9]+]], [[A1]], [[B1]], [[P1]];
548 ; COMMON-NEXT: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]};
550 define <2 x i32> @test_select_cc_i32_i16(<2 x i32> %a, <2 x i32> %b,
551 <2 x i16> %c, <2 x i16> %d) #0 {
552 %cc = icmp ne <2 x i16> %c, %d
553 %r = select <2 x i1> %cc, <2 x i32> %a, <2 x i32> %b
557 ; COMMON-LABEL: test_select_cc_i16_i32(
558 ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_select_cc_i16_i32_param_0];
559 ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_select_cc_i16_i32_param_1];
560 ; COMMON-DAG: ld.param.v2.u32 {[[C0:%r[0-9]+]], [[C1:%r[0-9]+]]}, [test_select_cc_i16_i32_param_2];
561 ; COMMON-DAG: ld.param.v2.u32 {[[D0:%r[0-9]+]], [[D1:%r[0-9]+]]}, [test_select_cc_i16_i32_param_3];
562 ; COMMON-DAG: setp.ne.s32 [[P0:%p[0-9]+]], [[C0]], [[D0]]
563 ; COMMON-DAG: setp.ne.s32 [[P1:%p[0-9]+]], [[C1]], [[D1]]
564 ; COMMON-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
565 ; COMMON-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
566 ; COMMON-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]];
567 ; COMMON-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]];
568 ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
569 ; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]];
571 define <2 x i16> @test_select_cc_i16_i32(<2 x i16> %a, <2 x i16> %b,
572 <2 x i32> %c, <2 x i32> %d) #0 {
573 %cc = icmp ne <2 x i32> %c, %d
574 %r = select <2 x i1> %cc, <2 x i16> %a, <2 x i16> %b
579 ; COMMON-LABEL: test_trunc_2xi32(
580 ; COMMON: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_trunc_2xi32_param_0];
581 ; COMMON-DAG: cvt.u16.u32 [[R0:%rs[0-9]+]], [[A0]];
582 ; COMMON-DAG: cvt.u16.u32 [[R1:%rs[0-9]+]], [[A1]];
583 ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
584 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
586 define <2 x i16> @test_trunc_2xi32(<2 x i32> %a) #0 {
587 %r = trunc <2 x i32> %a to <2 x i16>
591 ; COMMON-LABEL: test_trunc_2xi64(
592 ; COMMON: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_trunc_2xi64_param_0];
593 ; COMMON-DAG: cvt.u16.u64 [[R0:%rs[0-9]+]], [[A0]];
594 ; COMMON-DAG: cvt.u16.u64 [[R1:%rs[0-9]+]], [[A1]];
595 ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
596 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
598 define <2 x i16> @test_trunc_2xi64(<2 x i64> %a) #0 {
599 %r = trunc <2 x i64> %a to <2 x i16>
603 ; COMMON-LABEL: test_zext_2xi32(
604 ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_zext_2xi32_param_0];
605 ; COMMON: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
606 ; COMMON-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[A0]];
607 ; COMMON-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[A1]];
608 ; COMMON-NEXT: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]};
610 define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 {
611 %r = zext <2 x i16> %a to <2 x i32>
615 ; COMMON-LABEL: test_zext_2xi64(
616 ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_zext_2xi64_param_0];
617 ; COMMON: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
618 ; COMMON-DAG: cvt.u64.u16 [[R0:%rd[0-9]+]], [[A0]];
619 ; COMMON-DAG: cvt.u64.u16 [[R1:%rd[0-9]+]], [[A1]];
620 ; COMMON-NEXT: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]};
622 define <2 x i64> @test_zext_2xi64(<2 x i16> %a) #0 {
623 %r = zext <2 x i16> %a to <2 x i64>
627 ; COMMON-LABEL: test_bitcast_i32_to_2xi16(
628 ; COMMON: ld.param.u32 [[R:%r[0-9]+]], [test_bitcast_i32_to_2xi16_param_0];
629 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
631 define <2 x i16> @test_bitcast_i32_to_2xi16(i32 %a) #0 {
632 %r = bitcast i32 %a to <2 x i16>
636 ; COMMON-LABEL: test_bitcast_2xi16_to_i32(
637 ; COMMON: ld.param.u32 [[R:%r[0-9]+]], [test_bitcast_2xi16_to_i32_param_0];
638 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
640 define i32 @test_bitcast_2xi16_to_i32(<2 x i16> %a) #0 {
641 %r = bitcast <2 x i16> %a to i32
645 ; COMMON-LABEL: test_bitcast_2xi16_to_2xhalf(
646 ; COMMON: ld.param.u16 [[RS1:%rs[0-9]+]], [test_bitcast_2xi16_to_2xhalf_param_0];
647 ; COMMON: mov.u16 [[RS2:%rs[0-9]+]], 5;
648 ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[RS1]], [[RS2]]};
649 ; COMMON: st.param.b32 [func_retval0+0], [[R]];
651 define <2 x half> @test_bitcast_2xi16_to_2xhalf(i16 %a) #0 {
652 %ins.0 = insertelement <2 x i16> undef, i16 %a, i32 0
653 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1
654 %r = bitcast <2 x i16> %ins.1 to <2 x half>
659 ; COMMON-LABEL: test_shufflevector(
660 ; COMMON: ld.param.u32 [[R:%r[0-9]+]], [test_shufflevector_param_0];
661 ; COMMON: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[R]];
662 ; COMMON: mov.b32 [[R1:%r[0-9]+]], {[[RS1]], [[RS0]]};
663 ; COMMON: st.param.b32 [func_retval0+0], [[R1]];
665 define <2 x i16> @test_shufflevector(<2 x i16> %a) #0 {
666 %s = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
670 ; COMMON-LABEL: test_insertelement(
671 ; COMMON: ld.param.u16 [[B:%rs[0-9]+]], [test_insertelement_param_1];
672 ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_insertelement_param_0];
673 ; COMMON: { .reg .b16 tmp; mov.b32 {[[R0:%rs[0-9]+]], tmp}, [[A]]; }
674 ; COMMON: mov.b32 [[R1:%r[0-9]+]], {[[R0]], [[B]]};
675 ; COMMON: st.param.b32 [func_retval0+0], [[R1]];
677 define <2 x i16> @test_insertelement(<2 x i16> %a, i16 %x) #0 {
678 %i = insertelement <2 x i16> %a, i16 %x, i64 1
682 ; COMMON-LABEL: test_fptosi_2xhalf_to_2xi16(
683 ; COMMON: cvt.rzi.s16.f16
684 ; COMMON: cvt.rzi.s16.f16
686 define <2 x i16> @test_fptosi_2xhalf_to_2xi16(<2 x half> %a) #0 {
687 %r = fptosi <2 x half> %a to <2 x i16>
691 ; COMMON-LABEL: test_fptoui_2xhalf_to_2xi16(
692 ; COMMON: cvt.rzi.u16.f16
693 ; COMMON: cvt.rzi.u16.f16
695 define <2 x i16> @test_fptoui_2xhalf_to_2xi16(<2 x half> %a) #0 {
696 %r = fptoui <2 x half> %a to <2 x i16>
700 attributes #0 = { nounwind }