1 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
2 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s| FileCheck -check-prefix=GCN -check-prefix=SI %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s| FileCheck -check-prefixes=GCN,GFX11-FAKE16 %s
4 ; FIXME-TRUE16. In true16 flow, the codegen introduces addtional s2v copy and mov, and revert the operand order thus picking different cmp instructions
5 ; This should be corrected after addtional mov/copy is removed
6 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s| FileCheck -check-prefixes=GCN,GFX11-TRUE16 %s
8 ;;;==========================================================================;;;
9 ;; 16-bit integer comparisons
10 ;;;==========================================================================;;;
12 ; GCN-LABEL: {{^}}i16_eq:
13 ; VI: v_cmp_eq_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
14 ; SI: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
15 ; GFX11-FAKE16: v_cmp_eq_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
16 ; GFX11-TRUE16: v_cmp_eq_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
17 define amdgpu_kernel void @i16_eq(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
19 %tid = call i32 @llvm.amdgcn.workitem.id.x()
20 %tid.ext = sext i32 %tid to i64
21 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
22 %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
23 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
24 %a = load i16, ptr addrspace(1) %a.gep
25 %b = load i16, ptr addrspace(1) %b.gep
26 %tmp0 = icmp eq i16 %a, %b
27 %tmp1 = sext i1 %tmp0 to i32
28 store i32 %tmp1, ptr addrspace(1) %out.gep
32 ; GCN-LABEL: {{^}}i16_ne:
33 ; VI: v_cmp_ne_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
34 ; SI: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
35 ; GFX11-FAKE16: v_cmp_ne_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
36 ; GFX11-TRUE16: v_cmp_ne_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
37 define amdgpu_kernel void @i16_ne(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
39 %tid = call i32 @llvm.amdgcn.workitem.id.x()
40 %tid.ext = sext i32 %tid to i64
41 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
42 %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
43 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
44 %a = load i16, ptr addrspace(1) %a.gep
45 %b = load i16, ptr addrspace(1) %b.gep
46 %tmp0 = icmp ne i16 %a, %b
47 %tmp1 = sext i1 %tmp0 to i32
48 store i32 %tmp1, ptr addrspace(1) %out.gep
52 ; GCN-LABEL: {{^}}i16_ugt:
53 ; VI: v_cmp_gt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
54 ; SI: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
55 ; GFX11-FAKE16: v_cmp_gt_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
56 ; GFX11-TRUE16: v_cmp_gt_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
57 define amdgpu_kernel void @i16_ugt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
59 %tid = call i32 @llvm.amdgcn.workitem.id.x()
60 %tid.ext = sext i32 %tid to i64
61 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
62 %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
63 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
64 %a = load i16, ptr addrspace(1) %a.gep
65 %b = load i16, ptr addrspace(1) %b.gep
66 %tmp0 = icmp ugt i16 %a, %b
67 %tmp1 = sext i1 %tmp0 to i32
68 store i32 %tmp1, ptr addrspace(1) %out.gep
72 ; GCN-LABEL: {{^}}i16_uge:
73 ; VI: v_cmp_ge_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
74 ; SI: v_cmp_ge_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
75 ; GFX11-FAKE16: v_cmp_ge_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
76 ; GFX11-TRUE16: v_cmp_ge_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
77 define amdgpu_kernel void @i16_uge(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
79 %tid = call i32 @llvm.amdgcn.workitem.id.x()
80 %tid.ext = sext i32 %tid to i64
81 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
82 %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
83 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
84 %a = load i16, ptr addrspace(1) %a.gep
85 %b = load i16, ptr addrspace(1) %b.gep
86 %tmp0 = icmp uge i16 %a, %b
87 %tmp1 = sext i1 %tmp0 to i32
88 store i32 %tmp1, ptr addrspace(1) %out.gep
92 ; GCN-LABEL: {{^}}i16_ult:
93 ; VI: v_cmp_lt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
94 ; SI: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
95 ; GFX11-FAKE16: v_cmp_lt_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
96 ; GFX11-TRUE16: v_cmp_lt_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
97 define amdgpu_kernel void @i16_ult(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
99 %tid = call i32 @llvm.amdgcn.workitem.id.x()
100 %tid.ext = sext i32 %tid to i64
101 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
102 %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
103 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
104 %a = load i16, ptr addrspace(1) %a.gep
105 %b = load i16, ptr addrspace(1) %b.gep
106 %tmp0 = icmp ult i16 %a, %b
107 %tmp1 = sext i1 %tmp0 to i32
108 store i32 %tmp1, ptr addrspace(1) %out.gep
112 ; GCN-LABEL: {{^}}i16_ule:
113 ; VI: v_cmp_le_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
114 ; SI: v_cmp_le_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
115 ; GFX11-FAKE16: v_cmp_le_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
116 ; GFX11-TRUE16: v_cmp_le_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
117 define amdgpu_kernel void @i16_ule(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
119 %tid = call i32 @llvm.amdgcn.workitem.id.x()
120 %tid.ext = sext i32 %tid to i64
121 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
122 %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
123 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
124 %a = load i16, ptr addrspace(1) %a.gep
125 %b = load i16, ptr addrspace(1) %b.gep
126 %tmp0 = icmp ule i16 %a, %b
127 %tmp1 = sext i1 %tmp0 to i32
128 store i32 %tmp1, ptr addrspace(1) %out.gep
133 ; GCN-LABEL: {{^}}i16_sgt:
134 ; VI: v_cmp_gt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
135 ; SI: v_cmp_gt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
136 ; GFX11-FAKE16: v_cmp_gt_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
137 ; GFX11-TRUE16: v_cmp_gt_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
138 define amdgpu_kernel void @i16_sgt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
140 %tid = call i32 @llvm.amdgcn.workitem.id.x()
141 %tid.ext = sext i32 %tid to i64
142 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
143 %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
144 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
145 %a = load i16, ptr addrspace(1) %a.gep
146 %b = load i16, ptr addrspace(1) %b.gep
147 %tmp0 = icmp sgt i16 %a, %b
148 %tmp1 = sext i1 %tmp0 to i32
149 store i32 %tmp1, ptr addrspace(1) %out.gep
153 ; GCN-LABEL: {{^}}i16_sge:
154 ; VI: v_cmp_ge_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
155 ; SI: v_cmp_ge_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
156 ; GFX11-FAKE16: v_cmp_ge_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
157 ; GFX11-TRUE16: v_cmp_ge_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
158 define amdgpu_kernel void @i16_sge(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
160 %tid = call i32 @llvm.amdgcn.workitem.id.x()
161 %tid.ext = sext i32 %tid to i64
162 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
163 %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
164 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
165 %a = load i16, ptr addrspace(1) %a.gep
166 %b = load i16, ptr addrspace(1) %b.gep
167 %tmp0 = icmp sge i16 %a, %b
168 %tmp1 = sext i1 %tmp0 to i32
169 store i32 %tmp1, ptr addrspace(1) %out.gep
173 ; GCN-LABEL: {{^}}i16_slt:
174 ; VI: v_cmp_lt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
175 ; SI: v_cmp_lt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
176 ; GFX11-FAKE16: v_cmp_lt_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
177 ; GFX11-TRUE16: v_cmp_lt_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
178 define amdgpu_kernel void @i16_slt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
180 %tid = call i32 @llvm.amdgcn.workitem.id.x()
181 %tid.ext = sext i32 %tid to i64
182 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
183 %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
184 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
185 %a = load i16, ptr addrspace(1) %a.gep
186 %b = load i16, ptr addrspace(1) %b.gep
187 %tmp0 = icmp slt i16 %a, %b
188 %tmp1 = sext i1 %tmp0 to i32
189 store i32 %tmp1, ptr addrspace(1) %out.gep
193 ; GCN-LABEL: {{^}}i16_sle:
194 ; VI: v_cmp_le_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
195 ; SI: v_cmp_le_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
196 ; GFX11-FAKE16: v_cmp_le_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
197 ; GFX11-TRUE16: v_cmp_le_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
198 define amdgpu_kernel void @i16_sle(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
200 %tid = call i32 @llvm.amdgcn.workitem.id.x()
201 %tid.ext = sext i32 %tid to i64
202 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
203 %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
204 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
205 %a = load i16, ptr addrspace(1) %a.gep
206 %b = load i16, ptr addrspace(1) %b.gep
207 %tmp0 = icmp sle i16 %a, %b
208 %tmp1 = sext i1 %tmp0 to i32
209 store i32 %tmp1, ptr addrspace(1) %out.gep
213 ; These should be commuted to reduce code size
214 ; GCN-LABEL: {{^}}i16_eq_v_s:
215 ; VI: v_cmp_eq_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
216 ; SI: v_cmp_eq_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
217 ; GFX11-FAKE16: v_cmp_eq_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
218 ; GFX11-TRUE16: v_cmp_eq_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
219 define amdgpu_kernel void @i16_eq_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
221 %tid = call i32 @llvm.amdgcn.workitem.id.x()
222 %tid.ext = sext i32 %tid to i64
223 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
224 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
225 %a = load i16, ptr addrspace(1) %a.gep
226 %tmp0 = icmp eq i16 %a, %b
227 %tmp1 = sext i1 %tmp0 to i32
228 store i32 %tmp1, ptr addrspace(1) %out.gep
232 ; GCN-LABEL: {{^}}i16_ne_v_s:
233 ; VI: v_cmp_ne_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
234 ; SI: v_cmp_ne_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
235 ; GFX11-FAKE16: v_cmp_ne_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
236 ; GFX11-TRUE16: v_cmp_ne_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
237 define amdgpu_kernel void @i16_ne_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
239 %tid = call i32 @llvm.amdgcn.workitem.id.x()
240 %tid.ext = sext i32 %tid to i64
241 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
242 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
243 %a = load i16, ptr addrspace(1) %a.gep
244 %tmp0 = icmp ne i16 %a, %b
245 %tmp1 = sext i1 %tmp0 to i32
246 store i32 %tmp1, ptr addrspace(1) %out.gep
250 ; GCN-LABEL: {{^}}i16_ugt_v_s:
251 ; VI: v_cmp_lt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
252 ; SI: v_cmp_lt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
253 ; GFX11-FAKE16: v_cmp_lt_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
254 ; GFX11-TRUE16: v_cmp_gt_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
255 define amdgpu_kernel void @i16_ugt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
257 %tid = call i32 @llvm.amdgcn.workitem.id.x()
258 %tid.ext = sext i32 %tid to i64
259 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
260 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
261 %a = load i16, ptr addrspace(1) %a.gep
262 %tmp0 = icmp ugt i16 %a, %b
263 %tmp1 = sext i1 %tmp0 to i32
264 store i32 %tmp1, ptr addrspace(1) %out.gep
268 ; GCN-LABEL: {{^}}i16_uge_v_s:
269 ; VI: v_cmp_le_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
270 ; SI: v_cmp_le_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
271 ; GFX11-FAKE16: v_cmp_le_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
272 ; GFX11-TRUE16: v_cmp_ge_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
273 define amdgpu_kernel void @i16_uge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
275 %tid = call i32 @llvm.amdgcn.workitem.id.x()
276 %tid.ext = sext i32 %tid to i64
277 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
278 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
279 %a = load i16, ptr addrspace(1) %a.gep
280 %tmp0 = icmp uge i16 %a, %b
281 %tmp1 = sext i1 %tmp0 to i32
282 store i32 %tmp1, ptr addrspace(1) %out.gep
286 ; GCN-LABEL: {{^}}i16_ult_v_s:
287 ; VI: v_cmp_gt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
288 ; SI: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
289 ; GFX11-FAKE16: v_cmp_gt_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
290 ; GFX11-TRUE16: v_cmp_lt_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
291 define amdgpu_kernel void @i16_ult_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
293 %tid = call i32 @llvm.amdgcn.workitem.id.x()
294 %tid.ext = sext i32 %tid to i64
295 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
296 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
297 %a = load i16, ptr addrspace(1) %a.gep
298 %tmp0 = icmp ult i16 %a, %b
299 %tmp1 = sext i1 %tmp0 to i32
300 store i32 %tmp1, ptr addrspace(1) %out.gep
304 ; GCN-LABEL: {{^}}i16_ule_v_s:
305 ; VI: v_cmp_ge_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
306 ; SI: v_cmp_ge_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
307 ; GFX11-FAKE16: v_cmp_ge_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
308 ; GFX11-TRUE16: v_cmp_le_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
309 define amdgpu_kernel void @i16_ule_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
311 %tid = call i32 @llvm.amdgcn.workitem.id.x()
312 %tid.ext = sext i32 %tid to i64
313 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
314 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
315 %a = load i16, ptr addrspace(1) %a.gep
316 %tmp0 = icmp ule i16 %a, %b
317 %tmp1 = sext i1 %tmp0 to i32
318 store i32 %tmp1, ptr addrspace(1) %out.gep
322 ; GCN-LABEL: {{^}}i16_sgt_v_s:
323 ; VI: v_cmp_lt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
324 ; SI: v_cmp_lt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
325 ; GFX11-FAKE16: v_cmp_lt_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
326 ; GFX11-TRUE16: v_cmp_gt_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
327 define amdgpu_kernel void @i16_sgt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
329 %tid = call i32 @llvm.amdgcn.workitem.id.x()
330 %tid.ext = sext i32 %tid to i64
331 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
332 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
333 %a = load i16, ptr addrspace(1) %a.gep
334 %tmp0 = icmp sgt i16 %a, %b
335 %tmp1 = sext i1 %tmp0 to i32
336 store i32 %tmp1, ptr addrspace(1) %out.gep
340 ; GCN-LABEL: {{^}}i16_sge_v_s:
341 ; VI: v_cmp_le_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
342 ; SI: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
343 ; GFX11-FAKE16: v_cmp_le_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
344 ; GFX11-TRUE16: v_cmp_ge_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
345 define amdgpu_kernel void @i16_sge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
347 %tid = call i32 @llvm.amdgcn.workitem.id.x()
348 %tid.ext = sext i32 %tid to i64
349 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
350 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
351 %a = load i16, ptr addrspace(1) %a.gep
352 %tmp0 = icmp sge i16 %a, %b
353 %tmp1 = sext i1 %tmp0 to i32
354 store i32 %tmp1, ptr addrspace(1) %out.gep
358 ; GCN-LABEL: {{^}}i16_slt_v_s:
359 ; VI: v_cmp_gt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
360 ; SI: v_cmp_gt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
361 ; GFX11-FAKE16: v_cmp_gt_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
362 ; GFX11-TRUE16: v_cmp_lt_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
363 define amdgpu_kernel void @i16_slt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
365 %tid = call i32 @llvm.amdgcn.workitem.id.x()
366 %tid.ext = sext i32 %tid to i64
367 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
368 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
369 %a = load i16, ptr addrspace(1) %a.gep
370 %tmp0 = icmp slt i16 %a, %b
371 %tmp1 = sext i1 %tmp0 to i32
372 store i32 %tmp1, ptr addrspace(1) %out.gep
376 ; GCN-LABEL: {{^}}i16_sle_v_s:
377 ; VI: v_cmp_ge_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
378 ; SI: v_cmp_ge_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
379 ; GFX11-FAKE16: v_cmp_ge_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
380 ; GFX11-TRUE16: v_cmp_le_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
381 define amdgpu_kernel void @i16_sle_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
383 %tid = call i32 @llvm.amdgcn.workitem.id.x()
384 %tid.ext = sext i32 %tid to i64
385 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
386 %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
387 %a = load i16, ptr addrspace(1) %a.gep
388 %tmp0 = icmp sle i16 %a, %b
389 %tmp1 = sext i1 %tmp0 to i32
390 store i32 %tmp1, ptr addrspace(1) %out.gep
394 declare i32 @llvm.amdgcn.workitem.id.x() #1
396 attributes #0 = { nounwind }
397 attributes #1 = { nounwind readnone }