1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10 %s
4 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
5 declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
6 declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #2
8 ; GCN-LABEL: {{^}}test_export_zeroes_f32:
9 ; GCN: exp mrt0 off, off, off, off{{$}}
10 ; GCN: exp mrt0 off, off, off, off done{{$}}
11 define amdgpu_kernel void @test_export_zeroes_f32() #0 {
13 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
14 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
18 ; FIXME: Should not set up registers for the unused source registers.
20 ; GCN-LABEL: {{^}}test_export_en_src0_f32:
21 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
22 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
23 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
24 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
25 ; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
26 define amdgpu_kernel void @test_export_en_src0_f32() #0 {
27 call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
31 ; GCN-LABEL: {{^}}test_export_en_src1_f32:
32 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
33 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
34 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
35 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
36 ; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
37 define amdgpu_kernel void @test_export_en_src1_f32() #0 {
38 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
42 ; GCN-LABEL: {{^}}test_export_en_src2_f32:
43 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
44 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
45 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
46 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
47 ; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
48 define amdgpu_kernel void @test_export_en_src2_f32() #0 {
49 call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
53 ; GCN-LABEL: {{^}}test_export_en_src3_f32:
54 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
55 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
56 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
57 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
58 ; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
59 define amdgpu_kernel void @test_export_en_src3_f32() #0 {
60 call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
64 ; GCN-LABEL: {{^}}test_export_en_src0_src1_f32:
65 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
66 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
67 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
68 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
69 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
70 define amdgpu_kernel void @test_export_en_src0_src1_f32() #0 {
71 call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
75 ; GCN-LABEL: {{^}}test_export_en_src0_src2_f32:
76 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
77 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
78 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
79 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
80 ; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
81 define amdgpu_kernel void @test_export_en_src0_src2_f32() #0 {
82 call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
86 ; GCN-LABEL: {{^}}test_export_en_src0_src3_f32:
87 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
88 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
89 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
90 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
91 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
92 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
93 define amdgpu_kernel void @test_export_en_src0_src3_f32() #0 {
94 call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
95 call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
99 ; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3_f32:
100 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
101 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
102 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
103 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
104 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
105 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
106 define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_f32() #0 {
107 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
108 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
112 ; GCN-LABEL: {{^}}test_export_mrt7_f32:
113 ; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 0.5
114 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
115 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
116 define amdgpu_kernel void @test_export_mrt7_f32() #0 {
117 call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 false, i1 false)
118 call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 true, i1 false)
122 ; GCN-LABEL: {{^}}test_export_z_f32:
123 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
124 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
125 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
126 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
127 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
128 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
129 define amdgpu_kernel void @test_export_z_f32() #0 {
130 call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
131 call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
135 ; GCN-LABEL: {{^}}test_export_null_f32:
136 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
137 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
138 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
139 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
140 ; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
141 ; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
142 define amdgpu_kernel void @test_export_null_f32() #0 {
143 call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
144 call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
148 ; GCN-LABEL: {{^}}test_export_reserved10_f32:
149 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
150 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
151 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
152 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
153 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
154 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
155 define amdgpu_kernel void @test_export_reserved10_f32() #0 {
156 call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
157 call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
161 ; GCN-LABEL: {{^}}test_export_reserved11_f32:
162 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
163 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
164 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
165 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
166 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
167 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
168 define amdgpu_kernel void @test_export_reserved11_f32() #0 {
169 call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
170 call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
174 ; GCN-LABEL: {{^}}test_export_pos0_f32:
175 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
176 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
177 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
178 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
179 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
180 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
181 define amdgpu_kernel void @test_export_pos0_f32() #0 {
182 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
183 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
187 ; GCN-LABEL: {{^}}test_export_pos3_f32:
188 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
189 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
190 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
191 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
192 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
193 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
194 define amdgpu_kernel void @test_export_pos3_f32() #0 {
195 call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
196 call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
200 ; GCN-LABEL: {{^}}test_export_param0_f32:
201 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
202 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
203 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
204 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
205 ; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
206 ; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
207 define amdgpu_kernel void @test_export_param0_f32() #0 {
208 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
209 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
213 ; GCN-LABEL: {{^}}test_export_param31_f32:
214 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
215 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
216 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
217 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
218 ; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
219 ; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
220 define amdgpu_kernel void @test_export_param31_f32() #0 {
221 call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
222 call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
226 ; GCN-LABEL: {{^}}test_export_vm_f32:
227 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
228 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
229 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
230 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
231 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
232 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
233 define amdgpu_kernel void @test_export_vm_f32() #0 {
234 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 true)
235 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 true)
253 ; GCN-LABEL: {{^}}test_export_zeroes_i32:
254 ; GCN: exp mrt0 off, off, off, off{{$}}
255 ; GCN: exp mrt0 off, off, off, off done{{$}}
256 define amdgpu_kernel void @test_export_zeroes_i32() #0 {
258 call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false)
259 call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 true, i1 false)
263 ; FIXME: Should not set up registers for the unused source registers.
265 ; GCN-LABEL: {{^}}test_export_en_src0_i32:
266 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
267 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
268 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
269 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
270 ; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
271 define amdgpu_kernel void @test_export_en_src0_i32() #0 {
272 call void @llvm.amdgcn.exp.i32(i32 0, i32 1, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
276 ; GCN-LABEL: {{^}}test_export_en_src1_i32:
277 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
278 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
279 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
280 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
281 ; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
282 define amdgpu_kernel void @test_export_en_src1_i32() #0 {
283 call void @llvm.amdgcn.exp.i32(i32 0, i32 2, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
287 ; GCN-LABEL: {{^}}test_export_en_src2_i32:
288 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
289 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
290 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
291 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
292 ; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
293 define amdgpu_kernel void @test_export_en_src2_i32() #0 {
294 call void @llvm.amdgcn.exp.i32(i32 0, i32 4, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
298 ; GCN-LABEL: {{^}}test_export_en_src3_i32:
299 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
300 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
301 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
302 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
303 ; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
304 define amdgpu_kernel void @test_export_en_src3_i32() #0 {
305 call void @llvm.amdgcn.exp.i32(i32 0, i32 8, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
309 ; GCN-LABEL: {{^}}test_export_en_src0_src1_i32:
310 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
311 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
312 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
313 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
314 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
315 define amdgpu_kernel void @test_export_en_src0_src1_i32() #0 {
316 call void @llvm.amdgcn.exp.i32(i32 0, i32 3, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
320 ; GCN-LABEL: {{^}}test_export_en_src0_src2_i32:
321 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
322 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
323 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
324 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
325 ; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
326 define amdgpu_kernel void @test_export_en_src0_src2_i32() #0 {
327 call void @llvm.amdgcn.exp.i32(i32 0, i32 5, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
331 ; GCN-LABEL: {{^}}test_export_en_src0_src3_i32:
332 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
333 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
334 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
335 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
336 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
337 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
338 define amdgpu_kernel void @test_export_en_src0_src3_i32() #0 {
339 call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
340 call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
344 ; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3_i32:
345 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
346 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
347 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
348 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
349 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
350 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
351 define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_i32() #0 {
352 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
353 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
357 ; GCN-LABEL: {{^}}test_export_mrt7_i32:
358 ; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 5
359 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
360 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
361 define amdgpu_kernel void @test_export_mrt7_i32() #0 {
362 call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 false, i1 false)
363 call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 true, i1 false)
367 ; GCN-LABEL: {{^}}test_export_z_i32:
368 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
369 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
370 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
371 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
372 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
373 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
374 define amdgpu_kernel void @test_export_z_i32() #0 {
375 call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
376 call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
380 ; GCN-LABEL: {{^}}test_export_null_i32:
381 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
382 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
383 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
384 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
385 ; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
386 ; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
387 define amdgpu_kernel void @test_export_null_i32() #0 {
388 call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
389 call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
393 ; GCN-LABEL: {{^}}test_export_reserved10_i32:
394 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
395 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
396 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
397 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
398 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
399 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
400 define amdgpu_kernel void @test_export_reserved10_i32() #0 {
401 call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
402 call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
406 ; GCN-LABEL: {{^}}test_export_reserved11_i32:
407 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
408 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
409 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
410 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
411 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
412 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
413 define amdgpu_kernel void @test_export_reserved11_i32() #0 {
414 call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
415 call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
419 ; GCN-LABEL: {{^}}test_export_pos0_i32:
420 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
421 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
422 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
423 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
424 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
425 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
426 define amdgpu_kernel void @test_export_pos0_i32() #0 {
427 call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
428 call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
432 ; GCN-LABEL: {{^}}test_export_pos3_i32:
433 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
434 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
435 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
436 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
437 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
438 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
439 define amdgpu_kernel void @test_export_pos3_i32() #0 {
440 call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
441 call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
445 ; GCN-LABEL: {{^}}test_export_param0_i32:
446 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
447 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
448 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
449 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
450 ; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
451 ; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
452 define amdgpu_kernel void @test_export_param0_i32() #0 {
453 call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
454 call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
458 ; GCN-LABEL: {{^}}test_export_param31_i32:
459 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
460 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
461 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
462 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
463 ; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
464 ; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
465 define amdgpu_kernel void @test_export_param31_i32() #0 {
466 call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
467 call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
471 ; GCN-LABEL: {{^}}test_export_vm_i32:
472 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
473 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
474 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
475 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
476 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
477 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
478 define amdgpu_kernel void @test_export_vm_i32() #0 {
479 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 true)
480 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 true)
484 ; GCN-LABEL: {{^}}test_if_export_f32:
485 ; GCN: s_cbranch_execz
487 define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
488 %cc = icmp eq i32 %flag, 0
489 br i1 %cc, label %end, label %exp
492 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
499 ; GCN-LABEL: {{^}}test_if_export_vm_f32:
500 ; GCN: s_cbranch_execz
502 define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
503 %cc = icmp eq i32 %flag, 0
504 br i1 %cc, label %end, label %exp
507 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
514 ; GCN-LABEL: {{^}}test_if_export_done_f32:
515 ; GCN: s_cbranch_execz
517 define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
518 %cc = icmp eq i32 %flag, 0
519 br i1 %cc, label %end, label %exp
522 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
529 ; GCN-LABEL: {{^}}test_if_export_vm_done_f32:
530 ; GCN: s_cbranch_execz
532 define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
533 %cc = icmp eq i32 %flag, 0
534 br i1 %cc, label %end, label %exp
537 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
544 ; GCN-LABEL: {{^}}test_export_clustering:
545 ; GCN-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0
546 ; GCN-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
547 ; GCN-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
548 ; GCN-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
549 ; GCN-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
550 ; GCN-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
551 ; GCN: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
552 ; GCN-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
553 define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
554 %z0 = fadd float %x, %y
555 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false)
556 %z1 = fsub float %y, %x
557 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %x, float %y, float %z1, float 1.0, i1 true, i1 false)
561 ; GCN-LABEL: {{^}}test_export_pos_before_param:
565 define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
566 %z0 = fadd float %x, %y
567 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
568 %z1 = fsub float %y, %x
569 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
573 ; GCN-LABEL: {{^}}test_export_pos4_before_param:
575 ; GFX10-NOT: s_waitcnt
577 define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0 {
578 %z0 = fadd float %x, %y
579 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
580 %z1 = fsub float %y, %x
581 call void @llvm.amdgcn.exp.f32(i32 16, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
585 ; GCN-LABEL: {{^}}test_export_pos_before_param_ordered:
593 define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float %y) #0 {
594 %z0 = fadd float %x, %y
595 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
596 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
597 call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
598 %z1 = fsub float %y, %x
599 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 false, i1 false)
600 call void @llvm.amdgcn.exp.f32(i32 13, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 false, i1 false)
601 call void @llvm.amdgcn.exp.f32(i32 14, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
605 ; GCN-LABEL: {{^}}test_export_pos_before_param_across_load:
607 ; GCN-NEXT: exp param0
608 ; GCN-NEXT: exp param1
609 define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 {
610 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
611 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
612 %load = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i32 0)
613 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
617 ; GCN-LABEL: {{^}}test_export_across_store_load:
623 define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 {
624 %data0 = alloca <4 x float>, align 8, addrspace(5)
625 %data1 = alloca <4 x float>, align 8, addrspace(5)
626 %cmp = icmp eq i32 %idx, 1
627 %data = select i1 %cmp, <4 x float> addrspace(5)* %data0, <4 x float> addrspace(5)* %data1
628 %sptr = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data, i32 0, i32 0
629 store float %v, float addrspace(5)* %sptr, align 8
630 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
631 %ptr0 = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data0, i32 0, i32 0
632 %load0 = load float, float addrspace(5)* %ptr0, align 8
633 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
634 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
638 attributes #0 = { nounwind }
639 attributes #1 = { nounwind inaccessiblememonly }
640 attributes #2 = { nounwind readnone }