1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4 define amdgpu_ps void @v_interp_f32(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
5 ; GCN-LABEL: v_interp_f32:
6 ; GCN: ; %bb.0: ; %main_body
7 ; GCN-NEXT: s_mov_b32 s3, exec_lo
8 ; GCN-NEXT: s_wqm_b32 exec_lo, exec_lo
9 ; GCN-NEXT: s_mov_b32 m0, s2
10 ; GCN-NEXT: lds_param_load v0, attr0.y wait_vdst:15
11 ; GCN-NEXT: lds_param_load v1, attr1.x wait_vdst:15
12 ; GCN-NEXT: s_mov_b32 exec_lo, s3
13 ; GCN-NEXT: v_mov_b32_e32 v2, s0
14 ; GCN-NEXT: v_mov_b32_e32 v4, s1
15 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
16 ; GCN-NEXT: v_interp_p10_f32 v3, v0, v2, v0 wait_exp:1
17 ; GCN-NEXT: v_interp_p10_f32 v2, v1, v2, v1
18 ; GCN-NEXT: v_interp_p2_f32 v5, v0, v4, v3 wait_exp:7
19 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
20 ; GCN-NEXT: v_interp_p2_f32 v4, v1, v4, v5 wait_exp:7
21 ; GCN-NEXT: exp mrt0 v3, v2, v5, v4 done
24 %p0 = call float @llvm.amdgcn.lds.param.load(i32 1, i32 0, i32 %m0)
25 %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0)
26 %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0)
27 %p1_0 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0)
28 %p0_1 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1)
29 %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0)
30 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_1, float %p1_0, float %p1_1, i1 true, i1 true) #0
34 define amdgpu_ps void @v_interp_f32_many(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
35 ; GCN-LABEL: v_interp_f32_many:
36 ; GCN: ; %bb.0: ; %main_body
37 ; GCN-NEXT: s_mov_b32 s3, exec_lo
38 ; GCN-NEXT: s_wqm_b32 exec_lo, exec_lo
39 ; GCN-NEXT: s_mov_b32 m0, s2
40 ; GCN-NEXT: lds_param_load v0, attr0.x wait_vdst:15
41 ; GCN-NEXT: lds_param_load v1, attr1.x wait_vdst:15
42 ; GCN-NEXT: lds_param_load v2, attr2.x wait_vdst:15
43 ; GCN-NEXT: lds_param_load v3, attr3.x wait_vdst:15
44 ; GCN-NEXT: s_mov_b32 exec_lo, s3
45 ; GCN-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
46 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
47 ; GCN-NEXT: v_interp_p10_f32 v6, v0, v4, v0 wait_exp:3
48 ; GCN-NEXT: v_interp_p10_f32 v7, v1, v4, v1 wait_exp:2
49 ; GCN-NEXT: v_interp_p10_f32 v8, v2, v4, v2 wait_exp:1
50 ; GCN-NEXT: v_interp_p10_f32 v4, v3, v4, v3
51 ; GCN-NEXT: v_interp_p2_f32 v6, v0, v5, v6 wait_exp:7
52 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
53 ; GCN-NEXT: v_interp_p2_f32 v7, v1, v5, v7 wait_exp:7
54 ; GCN-NEXT: v_interp_p2_f32 v8, v2, v5, v8 wait_exp:7
55 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_4)
56 ; GCN-NEXT: v_interp_p2_f32 v4, v3, v5, v4 wait_exp:7
57 ; GCN-NEXT: exp mrt0 v6, v7, v8, v4 done
60 %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0)
61 %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0)
62 %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0)
63 %p3 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 3, i32 %m0)
64 %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0)
65 %p0_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0)
66 %p1_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1)
67 %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0)
68 %p2_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p2, float %i, float %p2)
69 %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0)
70 %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3)
71 %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0)
72 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0
76 define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) #0 {
77 ; GCN-LABEL: v_interp_f32_many_vm:
78 ; GCN: ; %bb.0: ; %main_body
79 ; GCN-NEXT: global_load_b64 v[0:1], v[0:1], off offset:4
80 ; GCN-NEXT: s_mov_b32 m0, s0
81 ; GCN-NEXT: s_mov_b32 s0, exec_lo
82 ; GCN-NEXT: s_wqm_b32 exec_lo, exec_lo
83 ; GCN-NEXT: lds_param_load v2, attr0.x wait_vdst:15
84 ; GCN-NEXT: lds_param_load v3, attr1.x wait_vdst:15
85 ; GCN-NEXT: lds_param_load v4, attr2.x wait_vdst:15
86 ; GCN-NEXT: lds_param_load v5, attr3.x wait_vdst:15
87 ; GCN-NEXT: s_mov_b32 exec_lo, s0
88 ; GCN-NEXT: s_waitcnt vmcnt(0)
89 ; GCN-NEXT: v_interp_p10_f32 v6, v2, v0, v2 wait_exp:3
90 ; GCN-NEXT: v_interp_p10_f32 v7, v3, v0, v3 wait_exp:2
91 ; GCN-NEXT: v_interp_p10_f32 v8, v4, v0, v4 wait_exp:1
92 ; GCN-NEXT: v_interp_p10_f32 v0, v5, v0, v5
93 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
94 ; GCN-NEXT: v_interp_p2_f32 v6, v2, v1, v6 wait_exp:7
95 ; GCN-NEXT: v_interp_p2_f32 v7, v3, v1, v7 wait_exp:7
96 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
97 ; GCN-NEXT: v_interp_p2_f32 v8, v4, v1, v8 wait_exp:7
98 ; GCN-NEXT: v_interp_p2_f32 v0, v5, v1, v0 wait_exp:7
99 ; GCN-NEXT: exp mrt0 v6, v7, v8, v0 done
102 %i.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 1
103 %i = load float, ptr addrspace(1) %i.ptr, align 4
104 %j.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 2
105 %j = load float, ptr addrspace(1) %j.ptr, align 4
106 %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0)
107 %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0)
108 %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0)
109 %p3 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 3, i32 %m0)
110 %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0)
111 %p0_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0)
112 %p1_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1)
113 %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0)
114 %p2_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p2, float %i, float %p2)
115 %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0)
116 %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3)
117 %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0)
118 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0
122 define amdgpu_ps half @v_interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
123 ; GCN-LABEL: v_interp_f16:
124 ; GCN: ; %bb.0: ; %main_body
125 ; GCN-NEXT: s_mov_b32 s3, exec_lo
126 ; GCN-NEXT: s_wqm_b32 exec_lo, exec_lo
127 ; GCN-NEXT: s_mov_b32 m0, s2
128 ; GCN-NEXT: lds_param_load v1, attr0.x wait_vdst:15
129 ; GCN-NEXT: s_mov_b32 exec_lo, s3
130 ; GCN-NEXT: v_mov_b32_e32 v0, s0
131 ; GCN-NEXT: v_mov_b32_e32 v2, s1
132 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
133 ; GCN-NEXT: v_interp_p10_f16_f32 v3, v1, v0, v1
134 ; GCN-NEXT: v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
135 ; GCN-NEXT: v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7
136 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
137 ; GCN-NEXT: v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
138 ; GCN-NEXT: v_add_f16_e32 v0, v3, v0
139 ; GCN-NEXT: ; return to shader part epilog
141 %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0)
142 %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float %p0, float %i, float %p0, i1 0)
143 %l_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float %p0, float %j, float %l_p0, i1 0)
144 %h_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float %p0, float %i, float %p0, i1 1)
145 %h_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float %p0, float %j, float %h_p0, i1 1)
146 %res = fadd half %l_p1, %h_p1
150 define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #0 {
151 ; GCN-LABEL: v_interp_f16_imm_params:
152 ; GCN: ; %bb.0: ; %main_body
153 ; GCN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
154 ; GCN-NEXT: v_mov_b32_e32 v2, s1
155 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
156 ; GCN-NEXT: v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7
157 ; GCN-NEXT: v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7
158 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
159 ; GCN-NEXT: v_cvt_f16_f32_e32 v1, v1
160 ; GCN-NEXT: v_add_f16_e32 v0, v1, v0
161 ; GCN-NEXT: ; return to shader part epilog
163 %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float 0.0, float %i, float 0.0, i1 0)
164 %l_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float 0.0, float %j, float 0.0, i1 0)
165 %h = fptrunc float %l_p0 to half
166 %res = fadd half %h, %l_p1
170 declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1
171 declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #0
172 declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #0
173 declare float @llvm.amdgcn.interp.inreg.p10.f16(float, float, float, i1) #0
174 declare half @llvm.amdgcn.interp.inreg.p2.f16(float, float, float, i1) #0
175 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
176 declare void @llvm.amdgcn.exp.f16(i32, i32, float, float, float, float, i1, i1) #0
178 attributes #0 = { nounwind }
179 attributes #1 = { nounwind readnone }