1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
2 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
5 ; CHECK-LABEL: test_v2i8
6 ; CHECK-DAG: ld.param.u16 [[A:%rs[0-9+]]], [test_v2i8_param_0];
7 ; CHECK-DAG: cvt.s16.s8 [[E0:%rs[0-9+]]], [[A]];
8 ; CHECK-DAG: shr.s16 [[E1:%rs[0-9+]]], [[A]], 8;
9 define i16 @test_v2i8(i16 %a) {
10 %v = bitcast i16 %a to <2 x i8>
11 %r0 = extractelement <2 x i8> %v, i64 0
12 %r1 = extractelement <2 x i8> %v, i64 1
13 %r0i = sext i8 %r0 to i16
14 %r1i = sext i8 %r1 to i16
15 %r01 = add i16 %r0i, %r1i
19 ; CHECK-LABEL: test_v4i8
20 ; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_param_0];
21 ; CHECK-DAG: bfe.s32 [[R0:%r[0-9+]]], [[R]], 0, 8;
22 ; CHECK-DAG: cvt.s8.s32 [[E0:%rs[0-9+]]], [[R0]];
23 ; CHECK-DAG: bfe.s32 [[R1:%r[0-9+]]], [[R]], 8, 8;
24 ; CHECK-DAG: cvt.s8.s32 [[E1:%rs[0-9+]]], [[R1]];
25 ; CHECK-DAG: bfe.s32 [[R2:%r[0-9+]]], [[R]], 16, 8;
26 ; CHECK-DAG: cvt.s8.s32 [[E2:%rs[0-9+]]], [[R2]];
27 ; CHECK-DAG: bfe.s32 [[R3:%r[0-9+]]], [[R]], 24, 8;
28 ; CHECK-DAG: cvt.s8.s32 [[E3:%rs[0-9+]]], [[R3]];
29 define i16 @test_v4i8(i32 %a) {
30 %v = bitcast i32 %a to <4 x i8>
31 %r0 = extractelement <4 x i8> %v, i64 0
32 %r1 = extractelement <4 x i8> %v, i64 1
33 %r2 = extractelement <4 x i8> %v, i64 2
34 %r3 = extractelement <4 x i8> %v, i64 3
35 %r0i = sext i8 %r0 to i16
36 %r1i = sext i8 %r1 to i16
37 %r2i = sext i8 %r2 to i16
38 %r3i = sext i8 %r3 to i16
39 %r01 = add i16 %r0i, %r1i
40 %r23 = add i16 %r2i, %r3i
41 %r = add i16 %r01, %r23
45 ; CHECK-LABEL: test_v4i8_s32
46 ; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_s32_param_0];
47 ; CHECK-DAG: bfe.s32 [[R0:%r[0-9+]]], [[R]], 0, 8;
48 ; CHECK-DAG: bfe.s32 [[R1:%r[0-9+]]], [[R]], 8, 8;
49 ; CHECK-DAG: bfe.s32 [[R2:%r[0-9+]]], [[R]], 16, 8;
50 ; CHECK-DAG: bfe.s32 [[R3:%r[0-9+]]], [[R]], 24, 8;
51 ; CHECK-DAG: add.s32 [[R01:%r[0-9+]]], [[R0]], [[R1]]
52 ; CHECK-DAG: add.s32 [[R23:%r[0-9+]]], [[R2]], [[R3]]
53 ; CHECK-DAG: add.s32 [[R0123:%r[0-9+]]], [[R01]], [[R23]]
54 define i32 @test_v4i8_s32(i32 %a) {
55 %v = bitcast i32 %a to <4 x i8>
56 %r0 = extractelement <4 x i8> %v, i64 0
57 %r1 = extractelement <4 x i8> %v, i64 1
58 %r2 = extractelement <4 x i8> %v, i64 2
59 %r3 = extractelement <4 x i8> %v, i64 3
60 %r0i = sext i8 %r0 to i32
61 %r1i = sext i8 %r1 to i32
62 %r2i = sext i8 %r2 to i32
63 %r3i = sext i8 %r3 to i32
64 %r01 = add i32 %r0i, %r1i
65 %r23 = add i32 %r2i, %r3i
66 %r = add i32 %r01, %r23
70 ; CHECK-LABEL: test_v4i8_u32
71 ; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_u32_param_0];
72 ; CHECK-DAG: bfe.u32 [[R0:%r[0-9+]]], [[R]], 0, 8;
73 ; CHECK-DAG: bfe.u32 [[R1:%r[0-9+]]], [[R]], 8, 8;
74 ; CHECK-DAG: bfe.u32 [[R2:%r[0-9+]]], [[R]], 16, 8;
75 ; CHECK-DAG: bfe.u32 [[R3:%r[0-9+]]], [[R]], 24, 8;
76 ; CHECK-DAG: add.s32 [[R01:%r[0-9+]]], [[R0]], [[R1]]
77 ; CHECK-DAG: add.s32 [[R23:%r[0-9+]]], [[R2]], [[R3]]
78 ; CHECK-DAG: add.s32 [[R0123:%r[0-9+]]], [[R01]], [[R23]]
79 define i32 @test_v4i8_u32(i32 %a) {
80 %v = bitcast i32 %a to <4 x i8>
81 %r0 = extractelement <4 x i8> %v, i64 0
82 %r1 = extractelement <4 x i8> %v, i64 1
83 %r2 = extractelement <4 x i8> %v, i64 2
84 %r3 = extractelement <4 x i8> %v, i64 3
85 %r0i = zext i8 %r0 to i32
86 %r1i = zext i8 %r1 to i32
87 %r2i = zext i8 %r2 to i32
88 %r3i = zext i8 %r3 to i32
89 %r01 = add i32 %r0i, %r1i
90 %r23 = add i32 %r2i, %r3i
91 %r = add i32 %r01, %r23
97 ; CHECK-LABEL: test_v8i8
98 ; CHECK: ld.param.u64 [[R:%rd[0-9+]]], [test_v8i8_param_0];
99 ; CHECK-DAG: cvt.s8.s64 [[E0:%rs[0-9+]]], [[R]];
100 ; Element 1 is still extracted by trunc, shr 8, not sure why.
101 ; CHECK-DAG: cvt.u16.u64 [[R01:%rs[0-9+]]], [[R]];
102 ; CHECK-DAG: shr.s16 [[E1:%rs[0-9+]]], [[R01]], 8;
103 ; CHECK-DAG: bfe.s64 [[RD2:%rd[0-9+]]], [[R]], 16, 8;
104 ; CHECK-DAG: cvt.s8.s64 [[E2:%rs[0-9+]]], [[RD2]];
105 ; CHECK-DAG: bfe.s64 [[RD3:%rd[0-9+]]], [[R]], 24, 8;
106 ; CHECK-DAG: cvt.s8.s64 [[E3:%rs[0-9+]]], [[RD3]];
107 ; CHECK-DAG: bfe.s64 [[RD4:%rd[0-9+]]], [[R]], 32, 8;
108 ; CHECK-DAG: cvt.s8.s64 [[E4:%rs[0-9+]]], [[RD4]];
109 ; CHECK-DAG: bfe.s64 [[RD5:%rd[0-9+]]], [[R]], 40, 8;
110 ; CHECK-DAG: cvt.s8.s64 [[E5:%rs[0-9+]]], [[RD5]];
111 ; CHECK-DAG: bfe.s64 [[RD6:%rd[0-9+]]], [[R]], 48, 8;
112 ; CHECK-DAG: cvt.s8.s64 [[E6:%rs[0-9+]]], [[RD6]];
113 ; CHECK-DAG: bfe.s64 [[RD7:%rd[0-9+]]], [[R]], 56, 8;
114 ; CHECK-DAG: cvt.s8.s64 [[E7:%rs[0-9+]]], [[RD7]];
116 define i16 @test_v8i8(i64 %a) {
117 %v = bitcast i64 %a to <8 x i8>
118 %r0 = extractelement <8 x i8> %v, i64 0
119 %r1 = extractelement <8 x i8> %v, i64 1
120 %r2 = extractelement <8 x i8> %v, i64 2
121 %r3 = extractelement <8 x i8> %v, i64 3
122 %r4 = extractelement <8 x i8> %v, i64 4
123 %r5 = extractelement <8 x i8> %v, i64 5
124 %r6 = extractelement <8 x i8> %v, i64 6
125 %r7 = extractelement <8 x i8> %v, i64 7
126 %r0i = sext i8 %r0 to i16
127 %r1i = sext i8 %r1 to i16
128 %r2i = sext i8 %r2 to i16
129 %r3i = sext i8 %r3 to i16
130 %r4i = sext i8 %r4 to i16
131 %r5i = sext i8 %r5 to i16
132 %r6i = sext i8 %r6 to i16
133 %r7i = sext i8 %r7 to i16
134 %r01 = add i16 %r0i, %r1i
135 %r23 = add i16 %r2i, %r3i
136 %r45 = add i16 %r4i, %r5i
137 %r67 = add i16 %r6i, %r7i
138 %r0123 = add i16 %r01, %r23
139 %r4567 = add i16 %r45, %r67
140 %r = add i16 %r0123, %r4567