1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,PREGFX11 %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
5 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
6 declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
7 declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
9 ; GCN-LABEL: {{^}}test_export_zeroes_f32:
10 ; GCN: exp mrt0 off, off, off, off{{$}}
11 ; GCN: exp mrt0 off, off, off, off done{{$}}
12 define amdgpu_kernel void @test_export_zeroes_f32() #0 {
14 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
15 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
19 ; FIXME: Should not set up registers for the unused source registers.
21 ; GCN-LABEL: {{^}}test_export_en_src0_f32:
22 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
23 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
24 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
25 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
26 ; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
27 define amdgpu_kernel void @test_export_en_src0_f32() #0 {
28 call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
32 ; GCN-LABEL: {{^}}test_export_en_src1_f32:
33 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
34 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
35 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
36 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
37 ; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
38 define amdgpu_kernel void @test_export_en_src1_f32() #0 {
39 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
43 ; GCN-LABEL: {{^}}test_export_en_src2_f32:
44 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
45 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
46 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
47 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
48 ; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
49 define amdgpu_kernel void @test_export_en_src2_f32() #0 {
50 call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
54 ; GCN-LABEL: {{^}}test_export_en_src3_f32:
55 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
56 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
57 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
58 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
59 ; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
60 define amdgpu_kernel void @test_export_en_src3_f32() #0 {
61 call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
65 ; GCN-LABEL: {{^}}test_export_en_src0_src1_f32:
66 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
67 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
68 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
69 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
70 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
71 define amdgpu_kernel void @test_export_en_src0_src1_f32() #0 {
72 call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
76 ; GCN-LABEL: {{^}}test_export_en_src0_src2_f32:
77 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
78 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
79 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
80 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
81 ; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
82 define amdgpu_kernel void @test_export_en_src0_src2_f32() #0 {
83 call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
87 ; GCN-LABEL: {{^}}test_export_en_src0_src3_f32:
88 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
89 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
90 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
91 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
92 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
93 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
94 define amdgpu_kernel void @test_export_en_src0_src3_f32() #0 {
95 call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
96 call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
100 ; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3_f32:
101 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
102 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
103 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
104 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
105 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
106 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
107 define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_f32() #0 {
108 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
109 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
113 ; GCN-LABEL: {{^}}test_export_mrt7_f32:
114 ; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 0.5
115 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
116 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
117 define amdgpu_kernel void @test_export_mrt7_f32() #0 {
118 call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 false, i1 false)
119 call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 true, i1 false)
123 ; GCN-LABEL: {{^}}test_export_z_f32:
124 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
125 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
126 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
127 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
128 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
129 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
130 define amdgpu_kernel void @test_export_z_f32() #0 {
131 call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
132 call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
136 ; GCN-LABEL: {{^}}test_export_null_f32:
137 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
138 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
139 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
140 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
141 ; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
142 ; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
143 define amdgpu_kernel void @test_export_null_f32() #0 {
144 call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
145 call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
149 ; GCN-LABEL: {{^}}test_export_reserved10_f32:
150 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
151 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
152 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
153 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
154 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
155 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
156 define amdgpu_kernel void @test_export_reserved10_f32() #0 {
157 call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
158 call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
162 ; GCN-LABEL: {{^}}test_export_reserved11_f32:
163 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
164 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
165 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
166 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
167 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
168 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
169 define amdgpu_kernel void @test_export_reserved11_f32() #0 {
170 call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
171 call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
175 ; GCN-LABEL: {{^}}test_export_pos0_f32:
176 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
177 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
178 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
179 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
180 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
181 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
182 define amdgpu_kernel void @test_export_pos0_f32() #0 {
183 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
184 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
188 ; GCN-LABEL: {{^}}test_export_pos3_f32:
189 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
190 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
191 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
192 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
193 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
194 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
195 define amdgpu_kernel void @test_export_pos3_f32() #0 {
196 call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
197 call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
201 ; GCN-LABEL: {{^}}test_export_param0_f32:
202 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
203 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
204 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
205 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
206 ; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
207 ; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
208 define amdgpu_kernel void @test_export_param0_f32() #0 {
209 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
210 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
214 ; GCN-LABEL: {{^}}test_export_param31_f32:
215 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
216 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
217 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
218 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
219 ; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
220 ; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
221 define amdgpu_kernel void @test_export_param31_f32() #0 {
222 call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
223 call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
227 ; GCN-LABEL: {{^}}test_export_vm_f32:
228 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
229 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
230 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
231 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
232 ; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
233 ; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
234 ; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
235 ; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
236 define amdgpu_kernel void @test_export_vm_f32() #0 {
237 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 true)
238 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 true)
256 ; GCN-LABEL: {{^}}test_export_zeroes_i32:
257 ; GCN: exp mrt0 off, off, off, off{{$}}
258 ; GCN: exp mrt0 off, off, off, off done{{$}}
259 define amdgpu_kernel void @test_export_zeroes_i32() #0 {
261 call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false)
262 call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 true, i1 false)
266 ; FIXME: Should not set up registers for the unused source registers.
268 ; GCN-LABEL: {{^}}test_export_en_src0_i32:
269 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
270 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
271 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
272 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
273 ; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
274 define amdgpu_kernel void @test_export_en_src0_i32() #0 {
275 call void @llvm.amdgcn.exp.i32(i32 0, i32 1, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
279 ; GCN-LABEL: {{^}}test_export_en_src1_i32:
280 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
281 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
282 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
283 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
284 ; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
285 define amdgpu_kernel void @test_export_en_src1_i32() #0 {
286 call void @llvm.amdgcn.exp.i32(i32 0, i32 2, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
290 ; GCN-LABEL: {{^}}test_export_en_src2_i32:
291 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
292 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
293 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
294 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
295 ; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
296 define amdgpu_kernel void @test_export_en_src2_i32() #0 {
297 call void @llvm.amdgcn.exp.i32(i32 0, i32 4, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
301 ; GCN-LABEL: {{^}}test_export_en_src3_i32:
302 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
303 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
304 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
305 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
306 ; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
307 define amdgpu_kernel void @test_export_en_src3_i32() #0 {
308 call void @llvm.amdgcn.exp.i32(i32 0, i32 8, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
312 ; GCN-LABEL: {{^}}test_export_en_src0_src1_i32:
313 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
314 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
315 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
316 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
317 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
318 define amdgpu_kernel void @test_export_en_src0_src1_i32() #0 {
319 call void @llvm.amdgcn.exp.i32(i32 0, i32 3, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
323 ; GCN-LABEL: {{^}}test_export_en_src0_src2_i32:
324 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
325 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
326 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
327 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
328 ; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
329 define amdgpu_kernel void @test_export_en_src0_src2_i32() #0 {
330 call void @llvm.amdgcn.exp.i32(i32 0, i32 5, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
334 ; GCN-LABEL: {{^}}test_export_en_src0_src3_i32:
335 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
336 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
337 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
338 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
339 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
340 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
341 define amdgpu_kernel void @test_export_en_src0_src3_i32() #0 {
342 call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
343 call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
347 ; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3_i32:
348 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
349 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
350 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
351 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
352 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
353 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
354 define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_i32() #0 {
355 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
356 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
360 ; GCN-LABEL: {{^}}test_export_mrt7_i32:
361 ; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 5
362 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
363 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
364 define amdgpu_kernel void @test_export_mrt7_i32() #0 {
365 call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 false, i1 false)
366 call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 true, i1 false)
370 ; GCN-LABEL: {{^}}test_export_z_i32:
371 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
372 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
373 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
374 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
375 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
376 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
377 define amdgpu_kernel void @test_export_z_i32() #0 {
378 call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
379 call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
383 ; GCN-LABEL: {{^}}test_export_null_i32:
384 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
385 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
386 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
387 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
388 ; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
389 ; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
390 define amdgpu_kernel void @test_export_null_i32() #0 {
391 call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
392 call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
396 ; GCN-LABEL: {{^}}test_export_reserved10_i32:
397 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
398 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
399 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
400 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
401 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
402 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
403 define amdgpu_kernel void @test_export_reserved10_i32() #0 {
404 call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
405 call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
409 ; GCN-LABEL: {{^}}test_export_reserved11_i32:
410 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
411 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
412 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
413 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
414 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
415 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
416 define amdgpu_kernel void @test_export_reserved11_i32() #0 {
417 call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
418 call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
422 ; GCN-LABEL: {{^}}test_export_pos0_i32:
423 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
424 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
425 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
426 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
427 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
428 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
429 define amdgpu_kernel void @test_export_pos0_i32() #0 {
430 call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
431 call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
435 ; GCN-LABEL: {{^}}test_export_pos3_i32:
436 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
437 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
438 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
439 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
440 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
441 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
442 define amdgpu_kernel void @test_export_pos3_i32() #0 {
443 call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
444 call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
448 ; GCN-LABEL: {{^}}test_export_param0_i32:
449 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
450 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
451 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
452 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
453 ; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
454 ; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
455 define amdgpu_kernel void @test_export_param0_i32() #0 {
456 call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
457 call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
461 ; GCN-LABEL: {{^}}test_export_param31_i32:
462 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
463 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
464 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
465 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
466 ; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
467 ; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
468 define amdgpu_kernel void @test_export_param31_i32() #0 {
469 call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
470 call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
474 ; GCN-LABEL: {{^}}test_export_vm_i32:
475 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
476 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
477 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
478 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
479 ; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
480 ; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
481 ; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
482 ; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
483 define amdgpu_kernel void @test_export_vm_i32() #0 {
484 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 true)
485 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 true)
489 ; GCN-LABEL: {{^}}test_if_export_f32:
490 ; GCN: s_cbranch_execz
492 define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
493 %cc = icmp eq i32 %flag, 0
494 br i1 %cc, label %end, label %exp
497 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
504 ; GCN-LABEL: {{^}}test_if_export_vm_f32:
505 ; GCN: s_cbranch_execz
507 define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
508 %cc = icmp eq i32 %flag, 0
509 br i1 %cc, label %end, label %exp
512 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
519 ; GCN-LABEL: {{^}}test_if_export_done_f32:
520 ; GCN: s_cbranch_execz
522 define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
523 %cc = icmp eq i32 %flag, 0
524 br i1 %cc, label %end, label %exp
527 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
534 ; GCN-LABEL: {{^}}test_if_export_vm_done_f32:
535 ; GCN: s_cbranch_execz
537 define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
538 %cc = icmp eq i32 %flag, 0
539 br i1 %cc, label %end, label %exp
542 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
549 ; GCN-LABEL: {{^}}test_export_clustering:
550 ; PREGFX11-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0
551 ; PREGFX11-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
552 ; PREGFX11-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
553 ; PREGFX11-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
554 ; PREGFX11-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
555 ; PREGFX11-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
556 ; PREGFX11: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
557 ; PREGFX11-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
558 define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
559 %z0 = fadd float %x, %y
560 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false)
561 %z1 = fsub float %y, %x
562 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %x, float %y, float %z1, float 1.0, i1 true, i1 false)
566 ; GCN-LABEL: {{^}}test_export_pos_before_param:
568 ; PREGFX11-NOT: s_waitcnt
569 ; PREGFX11: exp param0
570 define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
571 %z0 = fadd float %x, %y
572 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
573 %z1 = fsub float %y, %x
574 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
578 ; GCN-LABEL: {{^}}test_export_pos4_before_param:
580 ; GFX10-NOT: s_waitcnt
582 define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0 {
583 %z0 = fadd float %x, %y
584 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
585 %z1 = fsub float %y, %x
586 call void @llvm.amdgcn.exp.f32(i32 16, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
590 ; GCN-LABEL: {{^}}test_export_pos_before_param_ordered:
594 ; PREGFX11-NOT: s_waitcnt
595 ; PREGFX11: exp param0
596 ; PREGFX11: exp param1
597 ; PREGFX11: exp param2
598 define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float %y) #0 {
599 %z0 = fadd float %x, %y
600 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
601 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
602 call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
603 %z1 = fsub float %y, %x
604 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 false, i1 false)
605 call void @llvm.amdgcn.exp.f32(i32 13, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 false, i1 false)
606 call void @llvm.amdgcn.exp.f32(i32 14, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
610 ; GCN-LABEL: {{^}}test_export_pos_before_param_across_load:
612 ; PREGFX11-NEXT: exp param0
613 ; PREGFX11-NEXT: exp param1
614 define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 {
615 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
616 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
617 %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
618 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
622 ; GCN-LABEL: {{^}}test_export_across_store_load:
623 ; PREGFX11: buffer_store
624 ; PREGFX11: buffer_load
626 ; PREGFX11: exp param0
627 ; PREGFX11: exp param1
628 define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 {
629 %data0 = alloca <4 x float>, align 8, addrspace(5)
630 %data1 = alloca <4 x float>, align 8, addrspace(5)
631 %cmp = icmp eq i32 %idx, 1
632 %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
633 store float %v, ptr addrspace(5) %data, align 8
634 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
635 %load0 = load float, ptr addrspace(5) %data0, align 8
636 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
637 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
641 attributes #0 = { nounwind }
642 attributes #1 = { nounwind inaccessiblememonly }
643 attributes #2 = { nounwind readnone }