1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3 ; GCN-LABEL: {{^}}udiv32_invariant_denom:
6 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
7 ; GCN: v_cvt_u32_f32_e32
8 ; GCN-DAG: v_mul_hi_u32
9 ; GCN-DAG: v_mul_lo_i32
10 ; GCN-DAG: v_sub_i32_e32
11 ; GCN-DAG: v_cmp_eq_u32_e64
12 ; GCN-DAG: v_cndmask_b32_e64
13 ; GCN-DAG: v_mul_hi_u32
14 ; GCN-DAG: v_add_i32_e32
15 ; GCN-DAG: v_subrev_i32_e32
16 ; GCN-DAG: v_cndmask_b32_e64
17 ; GCN: [[LOOP:BB[0-9_]+]]:
19 ; GCN: s_cbranch_scc0 [[LOOP]]
21 define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
28 bb3: ; preds = %bb3, %bb
29 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
30 %tmp4 = udiv i32 %tmp, %arg1
31 %tmp5 = zext i32 %tmp to i64
32 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
33 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
34 %tmp7 = add nuw nsw i32 %tmp, 1
35 %tmp8 = icmp eq i32 %tmp7, 1024
36 br i1 %tmp8, label %bb2, label %bb3
39 ; GCN-LABEL: {{^}}urem32_invariant_denom:
41 ; GCN: v_rcp_iflag_f32
42 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
43 ; GCN: v_cvt_u32_f32_e32
44 ; GCN-DAG: v_mul_hi_u32
45 ; GCN-DAG: v_mul_lo_i32
46 ; GCN-DAG: v_sub_i32_e32
47 ; GCN-DAG: v_cmp_eq_u32_e64
48 ; GCN-DAG: v_cndmask_b32_e64
49 ; GCN-DAG: v_mul_hi_u32
50 ; GCN-DAG: v_add_i32_e32
51 ; GCN-DAG: v_subrev_i32_e32
52 ; GCN-DAG: v_cndmask_b32_e64
53 ; GCN: [[LOOP:BB[0-9_]+]]:
55 ; GCN: s_cbranch_scc0 [[LOOP]]
57 define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
64 bb3: ; preds = %bb3, %bb
65 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
66 %tmp4 = urem i32 %tmp, %arg1
67 %tmp5 = zext i32 %tmp to i64
68 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
69 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
70 %tmp7 = add nuw nsw i32 %tmp, 1
71 %tmp8 = icmp eq i32 %tmp7, 1024
72 br i1 %tmp8, label %bb2, label %bb3
75 ; GCN-LABEL: {{^}}sdiv32_invariant_denom:
77 ; GCN: v_rcp_iflag_f32
78 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
79 ; GCN: v_cvt_u32_f32_e32
80 ; GCN-DAG: v_mul_hi_u32
81 ; GCN-DAG: v_mul_lo_i32
82 ; GCN-DAG: v_sub_i32_e32
83 ; GCN-DAG: v_cmp_eq_u32_e64
84 ; GCN-DAG: v_cndmask_b32_e64
85 ; GCN-DAG: v_mul_hi_u32
86 ; GCN-DAG: v_add_i32_e32
87 ; GCN-DAG: v_subrev_i32_e32
88 ; GCN-DAG: v_cndmask_b32_e64
89 ; GCN: [[LOOP:BB[0-9_]+]]:
91 ; GCN: s_cbranch_scc0 [[LOOP]]
93 define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
100 bb3: ; preds = %bb3, %bb
101 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
102 %tmp4 = sdiv i32 %tmp, %arg1
103 %tmp5 = zext i32 %tmp to i64
104 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
105 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
106 %tmp7 = add nuw nsw i32 %tmp, 1
107 %tmp8 = icmp eq i32 %tmp7, 1024
108 br i1 %tmp8, label %bb2, label %bb3
111 ; GCN-LABEL: {{^}}srem32_invariant_denom:
113 ; GCN: v_rcp_iflag_f32
114 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
115 ; GCN: v_cvt_u32_f32_e32
116 ; GCN-DAG: v_mul_hi_u32
117 ; GCN-DAG: v_mul_lo_i32
118 ; GCN-DAG: v_sub_i32_e32
119 ; GCN-DAG: v_cmp_eq_u32_e64
120 ; GCN-DAG: v_cndmask_b32_e64
121 ; GCN-DAG: v_mul_hi_u32
122 ; GCN-DAG: v_add_i32_e32
123 ; GCN-DAG: v_subrev_i32_e32
124 ; GCN-DAG: v_cndmask_b32_e64
125 ; GCN: [[LOOP:BB[0-9_]+]]:
127 ; GCN: s_cbranch_scc0 [[LOOP]]
129 define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
136 bb3: ; preds = %bb3, %bb
137 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
138 %tmp4 = srem i32 %tmp, %arg1
139 %tmp5 = zext i32 %tmp to i64
140 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
141 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
142 %tmp7 = add nuw nsw i32 %tmp, 1
143 %tmp8 = icmp eq i32 %tmp7, 1024
144 br i1 %tmp8, label %bb2, label %bb3
147 ; GCN-LABEL: {{^}}udiv16_invariant_denom:
149 ; GCN: v_rcp_iflag_f32
150 ; GCN: [[LOOP:BB[0-9_]+]]:
152 ; GCN: s_cbranch_scc0 [[LOOP]]
154 define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
161 bb3: ; preds = %bb3, %bb
162 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
163 %tmp4 = udiv i16 %tmp, %arg1
164 %tmp5 = zext i16 %tmp to i64
165 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
166 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
167 %tmp7 = add nuw nsw i16 %tmp, 1
168 %tmp8 = icmp eq i16 %tmp7, 1024
169 br i1 %tmp8, label %bb2, label %bb3
172 ; GCN-LABEL: {{^}}urem16_invariant_denom:
174 ; GCN: v_rcp_iflag_f32
175 ; GCN: [[LOOP:BB[0-9_]+]]:
177 ; GCN: s_cbranch_scc0 [[LOOP]]
179 define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
186 bb3: ; preds = %bb3, %bb
187 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
188 %tmp4 = urem i16 %tmp, %arg1
189 %tmp5 = zext i16 %tmp to i64
190 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
191 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
192 %tmp7 = add nuw nsw i16 %tmp, 1
193 %tmp8 = icmp eq i16 %tmp7, 1024
194 br i1 %tmp8, label %bb2, label %bb3
197 ; GCN-LABEL: {{^}}sdiv16_invariant_denom:
198 ; GCN-DAG: s_sext_i32_i16
199 ; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff
200 ; GCN-DAG: v_cvt_f32_i32
201 ; GCN-DAG: v_rcp_iflag_f32
202 ; GCN: [[LOOP:BB[0-9_]+]]:
204 ; GCN: s_cbranch_scc0 [[LOOP]]
206 define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
213 bb3: ; preds = %bb3, %bb
214 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
215 %tmp4 = sdiv i16 %tmp, %arg1
216 %tmp5 = zext i16 %tmp to i64
217 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
218 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
219 %tmp7 = add nuw nsw i16 %tmp, 1
220 %tmp8 = icmp eq i16 %tmp7, 1024
221 br i1 %tmp8, label %bb2, label %bb3
224 ; GCN-LABEL: {{^}}srem16_invariant_denom:
225 ; GCN-DAG: s_sext_i32_i16
226 ; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff
227 ; GCN-DAG: v_cvt_f32_i32
228 ; GCN-DAG: v_rcp_iflag_f32
229 ; GCN: [[LOOP:BB[0-9_]+]]:
231 ; GCN: s_cbranch_scc0 [[LOOP]]
233 define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
240 bb3: ; preds = %bb3, %bb
241 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
242 %tmp4 = srem i16 %tmp, %arg1
243 %tmp5 = zext i16 %tmp to i64
244 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
245 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
246 %tmp7 = add nuw nsw i16 %tmp, 1
247 %tmp8 = icmp eq i16 %tmp7, 1024
248 br i1 %tmp8, label %bb2, label %bb3