1 ; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \
2 ; RUN: | llc -x mir -march=nvptx64 -start-before=nvptx-proxyreg-erasure 2>&1 \
3 ; RUN: | FileCheck %s --check-prefix=PTX --check-prefix=PTX-WITH
5 ; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \
6 ; RUN: | llc -x mir -march=nvptx64 -start-after=nvptx-proxyreg-erasure 2>&1 \
7 ; RUN: | FileCheck %s --check-prefix=PTX --check-prefix=PTX-WITHOUT
9 ; Thorough testing of ProxyRegErasure: PTX assembly with and without the pass.
11 declare i1 @callee_i1()
12 define i1 @check_i1() {
14 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
15 ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
16 ; PTX-DAG: } // callseq {{[0-9]+}}
18 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
19 ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 1;
20 ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 1;
22 ; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]];
24 %ret = call i1 @callee_i1()
28 declare i16 @callee_i16()
29 define i16 @check_i16() {
30 ; PTX-LABEL: check_i16
31 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
32 ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
33 ; PTX-DAG: } // callseq {{[0-9]+}}
35 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
36 ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 65535;
37 ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 65535;
39 ; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]];
41 %ret = call i16 @callee_i16()
45 declare i32 @callee_i32()
46 define i32 @check_i32() {
47 ; PTX-LABEL: check_i32
48 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
49 ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
50 ; PTX-DAG: } // callseq {{[0-9]+}}
52 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
53 ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]];
54 ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]];
56 %ret = call i32 @callee_i32()
60 declare i64 @callee_i64()
61 define i64 @check_i64() {
62 ; PTX-LABEL: check_i64
63 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
64 ; PTX-DAG: ld.param.b64 [[LD:%rd[0-9]+]], [retval0+0];
65 ; PTX-DAG: } // callseq {{[0-9]+}}
67 ; PTX-WITHOUT-DAG: mov.b64 [[PROXY:%rd[0-9]+]], [[LD]];
68 ; PTX-WITHOUT-DAG: st.param.b64 [func_retval0+0], [[PROXY]];
69 ; PTX-WITH-DAG: st.param.b64 [func_retval0+0], [[LD]];
71 %ret = call i64 @callee_i64()
75 declare i128 @callee_i128()
76 define i128 @check_i128() {
77 ; PTX-LABEL: check_i128
78 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
79 ; PTX-DAG: ld.param.v2.b64 {[[LD0:%rd[0-9]+]], [[LD1:%rd[0-9]+]]}, [retval0+0];
80 ; PTX-DAG: } // callseq {{[0-9]+}}
82 ; PTX-WITHOUT-DAG: mov.b64 [[PROXY0:%rd[0-9]+]], [[LD0]];
83 ; PTX-WITHOUT-DAG: mov.b64 [[PROXY1:%rd[0-9]+]], [[LD1]];
84 ; PTX-WITHOUT-DAG: st.param.v2.b64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]};
85 ; PTX-WITH-DAG: st.param.v2.b64 [func_retval0+0], {[[LD0]], [[LD1]]};
87 %ret = call i128 @callee_i128()
91 declare half @callee_f16()
92 define half @check_f16() {
93 ; PTX-LABEL: check_f16
94 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
95 ; PTX-DAG: ld.param.b16 [[LD:%h[0-9]+]], [retval0+0];
96 ; PTX-DAG: } // callseq {{[0-9]+}}
98 ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%h[0-9]+]], [[LD]];
99 ; PTX-WITHOUT-DAG: st.param.b16 [func_retval0+0], [[PROXY]];
100 ; PTX-WITH-DAG: st.param.b16 [func_retval0+0], [[LD]];
102 %ret = call half @callee_f16()
106 declare float @callee_f32()
107 define float @check_f32() {
108 ; PTX-LABEL: check_f32
109 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
110 ; PTX-DAG: ld.param.f32 [[LD:%f[0-9]+]], [retval0+0];
111 ; PTX-DAG: } // callseq {{[0-9]+}}
113 ; PTX-WITHOUT-DAG: mov.f32 [[PROXY:%f[0-9]+]], [[LD]];
114 ; PTX-WITHOUT-DAG: st.param.f32 [func_retval0+0], [[PROXY]];
115 ; PTX-WITH-DAG: st.param.f32 [func_retval0+0], [[LD]];
117 %ret = call float @callee_f32()
121 declare double @callee_f64()
122 define double @check_f64() {
123 ; PTX-LABEL: check_f64
124 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
125 ; PTX-DAG: ld.param.f64 [[LD:%fd[0-9]+]], [retval0+0];
126 ; PTX-DAG: } // callseq {{[0-9]+}}
128 ; PTX-WITHOUT-DAG: mov.f64 [[PROXY:%fd[0-9]+]], [[LD]];
129 ; PTX-WITHOUT-DAG: st.param.f64 [func_retval0+0], [[PROXY]];
130 ; PTX-WITH-DAG: st.param.f64 [func_retval0+0], [[LD]];
132 %ret = call double @callee_f64()
136 declare <4 x i32> @callee_vec_i32()
137 define <4 x i32> @check_vec_i32() {
138 ; PTX-LABEL: check_vec_i32
139 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
140 ; PTX-DAG: ld.param.v4.b32 {[[LD0:%r[0-9]+]], [[LD1:%r[0-9]+]], [[LD2:%r[0-9]+]], [[LD3:%r[0-9]+]]}, [retval0+0];
141 ; PTX-DAG: } // callseq {{[0-9]+}}
143 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY0:%r[0-9]+]], [[LD0]];
144 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY1:%r[0-9]+]], [[LD1]];
145 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY2:%r[0-9]+]], [[LD2]];
146 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY3:%r[0-9]+]], [[LD3]];
147 ; PTX-WITHOUT-DAG: st.param.v4.b32 [func_retval0+0], {[[PROXY0]], [[PROXY1]], [[PROXY2]], [[PROXY3]]};
148 ; PTX-WITH-DAG: st.param.v4.b32 [func_retval0+0], {[[LD0]], [[LD1]], [[LD2]], [[LD3]]};
150 %ret = call <4 x i32> @callee_vec_i32()
154 declare <2 x half> @callee_vec_f16()
155 define <2 x half> @check_vec_f16() {
156 ; PTX-LABEL: check_vec_f16
157 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
158 ; PTX-DAG: ld.param.b32 [[LD:%hh[0-9]+]], [retval0+0];
159 ; PTX-DAG: } // callseq {{[0-9]+}}
161 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%hh[0-9]+]], [[LD]];
162 ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]];
163 ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]];
165 %ret = call <2 x half> @callee_vec_f16()
169 declare <2 x double> @callee_vec_f64()
170 define <2 x double> @check_vec_f64() {
171 ; PTX-LABEL: check_vec_f64
172 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
173 ; PTX-DAG: ld.param.v2.f64 {[[LD0:%fd[0-9]+]], [[LD1:%fd[0-9]+]]}, [retval0+0];
174 ; PTX-DAG: } // callseq {{[0-9]+}}
176 ; PTX-WITHOUT-DAG: mov.f64 [[PROXY0:%fd[0-9]+]], [[LD0]];
177 ; PTX-WITHOUT-DAG: mov.f64 [[PROXY1:%fd[0-9]+]], [[LD1]];
178 ; PTX-WITHOUT-DAG: st.param.v2.f64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]};
179 ; PTX-WITH-DAG: st.param.v2.f64 [func_retval0+0], {[[LD0]], [[LD1]]};
181 %ret = call <2 x double> @callee_vec_f64()
182 ret <2 x double> %ret