1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5 ; FUNC-LABEL: {{^}}udiv24_i8:
7 ; SI-DAG: v_cvt_f32_ubyte
8 ; SI-DAG: v_rcp_iflag_f32
15 define amdgpu_kernel void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
16 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
17 %num = load i8, i8 addrspace(1) * %in
18 %den = load i8, i8 addrspace(1) * %den_ptr
19 %result = udiv i8 %num, %den
20 store i8 %result, i8 addrspace(1)* %out
24 ; FUNC-LABEL: {{^}}udiv24_i16:
34 define amdgpu_kernel void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
35 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
36 %num = load i16, i16 addrspace(1) * %in, align 2
37 %den = load i16, i16 addrspace(1) * %den_ptr, align 2
38 %result = udiv i16 %num, %den
39 store i16 %result, i16 addrspace(1)* %out, align 2
43 ; FUNC-LABEL: {{^}}udiv23_i32:
45 ; SI-DAG: v_cvt_f32_u32
46 ; SI-DAG: v_rcp_iflag_f32
53 define amdgpu_kernel void @udiv23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
54 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
55 %num = load i32, i32 addrspace(1) * %in, align 4
56 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
57 %num.i23.0 = shl i32 %num, 9
58 %den.i23.0 = shl i32 %den, 9
59 %num.i23 = lshr i32 %num.i23.0, 9
60 %den.i23 = lshr i32 %den.i23.0, 9
61 %result = udiv i32 %num.i23, %den.i23
62 store i32 %result, i32 addrspace(1)* %out, align 4
66 ; FUNC-LABEL: {{^}}udiv24_i32:
70 define amdgpu_kernel void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
71 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
72 %num = load i32, i32 addrspace(1) * %in, align 4
73 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
74 %num.i24.0 = shl i32 %num, 8
75 %den.i24.0 = shl i32 %den, 8
76 %num.i24 = lshr i32 %num.i24.0, 8
77 %den.i24 = lshr i32 %den.i24.0, 8
78 %result = udiv i32 %num.i24, %den.i24
79 store i32 %result, i32 addrspace(1)* %out, align 4
83 ; FUNC-LABEL: {{^}}no_udiv24_u23_u24_i32:
87 define amdgpu_kernel void @no_udiv24_u23_u24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
88 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
89 %num = load i32, i32 addrspace(1) * %in, align 4
90 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
91 %num.i23.0 = shl i32 %num, 9
92 %den.i24.0 = shl i32 %den, 8
93 %num.i23 = lshr i32 %num.i23.0, 9
94 %den.i24 = lshr i32 %den.i24.0, 8
95 %result = udiv i32 %num.i23, %den.i24
96 store i32 %result, i32 addrspace(1)* %out, align 4
100 ; FUNC-LABEL: {{^}}no_udiv24_u24_u23_i32:
104 define amdgpu_kernel void @no_udiv24_u24_u23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
105 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
106 %num = load i32, i32 addrspace(1) * %in, align 4
107 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
108 %num.i24.0 = shl i32 %num, 8
109 %den.i23.0 = shl i32 %den, 9
110 %num.i24 = lshr i32 %num.i24.0, 8
111 %den.i23 = lshr i32 %den.i23.0, 9
112 %result = udiv i32 %num.i24, %den.i23
113 store i32 %result, i32 addrspace(1)* %out, align 4
117 ; FUNC-LABEL: {{^}}udiv25_i32:
118 ; RCP_IFLAG is for URECIP in the full 32b alg
122 ; EG-NOT: UINT_TO_FLT
124 define amdgpu_kernel void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
125 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
126 %num = load i32, i32 addrspace(1) * %in, align 4
127 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
128 %num.i25.0 = shl i32 %num, 7
129 %den.i25.0 = shl i32 %den, 7
130 %num.i25 = lshr i32 %num.i25.0, 7
131 %den.i25 = lshr i32 %den.i25.0, 7
132 %result = udiv i32 %num.i25, %den.i25
133 store i32 %result, i32 addrspace(1)* %out, align 4
137 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_1:
138 ; RCP_IFLAG is for URECIP in the full 32b alg
142 ; EG-NOT: UINT_TO_FLT
144 define amdgpu_kernel void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
145 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
146 %num = load i32, i32 addrspace(1) * %in, align 4
147 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
148 %num.i24.0 = shl i32 %num, 8
149 %den.i24.0 = shl i32 %den, 7
150 %num.i24 = lshr i32 %num.i24.0, 8
151 %den.i24 = lshr i32 %den.i24.0, 7
152 %result = udiv i32 %num.i24, %den.i24
153 store i32 %result, i32 addrspace(1)* %out, align 4
157 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_2:
158 ; RCP_IFLAG is for URECIP in the full 32b alg
162 ; EG-NOT: UINT_TO_FLT
164 define amdgpu_kernel void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
165 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
166 %num = load i32, i32 addrspace(1) * %in, align 4
167 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
168 %num.i24.0 = shl i32 %num, 7
169 %den.i24.0 = shl i32 %den, 8
170 %num.i24 = lshr i32 %num.i24.0, 7
171 %den.i24 = lshr i32 %den.i24.0, 8
172 %result = udiv i32 %num.i24, %den.i24
173 store i32 %result, i32 addrspace(1)* %out, align 4
177 ; FUNC-LABEL: {{^}}urem24_i8:
178 ; SI: v_cvt_f32_ubyte
179 ; SI-DAG: v_cvt_f32_ubyte
180 ; SI-DAG: v_rcp_iflag_f32
184 ; EG-DAG: UINT_TO_FLT
187 define amdgpu_kernel void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
188 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
189 %num = load i8, i8 addrspace(1) * %in
190 %den = load i8, i8 addrspace(1) * %den_ptr
191 %result = urem i8 %num, %den
192 store i8 %result, i8 addrspace(1)* %out
196 ; FUNC-LABEL: {{^}}urem24_i16:
199 ; SI: v_rcp_iflag_f32
203 ; EG-DAG: UINT_TO_FLT
206 define amdgpu_kernel void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
207 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
208 %num = load i16, i16 addrspace(1) * %in, align 2
209 %den = load i16, i16 addrspace(1) * %den_ptr, align 2
210 %result = urem i16 %num, %den
211 store i16 %result, i16 addrspace(1)* %out, align 2
215 ; FUNC-LABEL: {{^}}urem24_i32:
218 define amdgpu_kernel void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
219 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
220 %num = load i32, i32 addrspace(1) * %in, align 4
221 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
222 %num.i24.0 = shl i32 %num, 8
223 %den.i24.0 = shl i32 %den, 8
224 %num.i24 = lshr i32 %num.i24.0, 8
225 %den.i24 = lshr i32 %den.i24.0, 8
226 %result = urem i32 %num.i24, %den.i24
227 store i32 %result, i32 addrspace(1)* %out, align 4
231 ; FUNC-LABEL: {{^}}urem25_i32:
232 ; RCP_IFLAG is for URECIP in the full 32b alg
236 ; EG-NOT: UINT_TO_FLT
238 define amdgpu_kernel void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
239 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
240 %num = load i32, i32 addrspace(1) * %in, align 4
241 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
242 %num.i24.0 = shl i32 %num, 7
243 %den.i24.0 = shl i32 %den, 7
244 %num.i24 = lshr i32 %num.i24.0, 7
245 %den.i24 = lshr i32 %den.i24.0, 7
246 %result = urem i32 %num.i24, %den.i24
247 store i32 %result, i32 addrspace(1)* %out, align 4
251 ; FUNC-LABEL: {{^}}test_no_urem24_i32_1:
252 ; RCP_IFLAG is for URECIP in the full 32b alg
256 ; EG-NOT: UINT_TO_FLT
258 define amdgpu_kernel void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
259 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
260 %num = load i32, i32 addrspace(1) * %in, align 4
261 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
262 %num.i24.0 = shl i32 %num, 8
263 %den.i24.0 = shl i32 %den, 7
264 %num.i24 = lshr i32 %num.i24.0, 8
265 %den.i24 = lshr i32 %den.i24.0, 7
266 %result = urem i32 %num.i24, %den.i24
267 store i32 %result, i32 addrspace(1)* %out, align 4
271 ; FUNC-LABEL: {{^}}test_no_urem24_i32_2:
272 ; RCP_IFLAG is for URECIP in the full 32b alg
276 ; EG-NOT: UINT_TO_FLT
278 define amdgpu_kernel void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
279 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
280 %num = load i32, i32 addrspace(1) * %in, align 4
281 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
282 %num.i24.0 = shl i32 %num, 7
283 %den.i24.0 = shl i32 %den, 8
284 %num.i24 = lshr i32 %num.i24.0, 7
285 %den.i24 = lshr i32 %den.i24.0, 8
286 %result = urem i32 %num.i24, %den.i24
287 store i32 %result, i32 addrspace(1)* %out, align 4
291 ; FUNC-LABEL: {{^}}test_udiv24_u16_u23_i32:
292 ; SI-DAG: v_rcp_iflag_f32
293 ; SI-DAG: s_mov_b32 [[MASK:s[0-9]+]], 0x7fffff{{$}}
294 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]],
297 define amdgpu_kernel void @test_udiv24_u16_u23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
298 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
299 %num = load i32, i32 addrspace(1) * %in, align 4
300 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
301 %num.i16.0 = shl i32 %num, 16
302 %den.i23.0 = shl i32 %den, 9
303 %num.i16 = lshr i32 %num.i16.0, 16
304 %den.i23 = lshr i32 %den.i23.0, 9
305 %result = udiv i32 %num.i16, %den.i23
306 store i32 %result, i32 addrspace(1)* %out, align 4
310 ; FUNC-LABEL: {{^}}test_udiv24_u23_u16_i32:
311 ; SI-DAG: v_rcp_iflag_f32
312 ; SI-DAG: s_mov_b32 [[MASK:s[0-9]+]], 0x7fffff{{$}}
313 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]],
316 define amdgpu_kernel void @test_udiv24_u23_u16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
317 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
318 %num = load i32, i32 addrspace(1) * %in, align 4
319 %den = load i32, i32 addrspace(1) * %den_ptr, align 4
320 %num.i23.0 = shl i32 %num, 9
321 %den.i16.0 = shl i32 %den, 16
322 %num.i23 = lshr i32 %num.i23.0, 9
323 %den.i16 = lshr i32 %den.i16.0, 16
324 %result = udiv i32 %num.i23, %den.i16
325 store i32 %result, i32 addrspace(1)* %out, align 4