llvm/test/CodeGen/AMDGPU/icmp.i16.ll

   1 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
   2 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s| FileCheck -check-prefix=GCN -check-prefix=SI %s
   3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s| FileCheck -check-prefixes=GCN,GFX11-FAKE16 %s
   4 ; FIXME-TRUE16. In true16 flow, the codegen introduces addtional s2v copy and mov, and revert the operand order thus picking different cmp instructions
   5 ; This should be corrected after addtional mov/copy is removed
   6 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s| FileCheck -check-prefixes=GCN,GFX11-TRUE16 %s
   7
   8 ;;;==========================================================================;;;
   9 ;; 16-bit integer comparisons
  10 ;;;==========================================================================;;;
  11
  12 ; GCN-LABEL: {{^}}i16_eq:
  13 ; VI: v_cmp_eq_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
  14 ; SI: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
  15 ; GFX11-FAKE16: v_cmp_eq_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
  16 ; GFX11-TRUE16: v_cmp_eq_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
  17 define amdgpu_kernel void @i16_eq(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
  18 entry:
  19   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  20   %tid.ext = sext i32 %tid to i64
  21   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
  22   %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
  23   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
  24   %a = load i16, ptr addrspace(1) %a.gep
  25   %b = load i16, ptr addrspace(1) %b.gep
  26   %tmp0 = icmp eq i16 %a, %b
  27   %tmp1 = sext i1 %tmp0 to i32
  28   store i32 %tmp1, ptr addrspace(1) %out.gep
  29   ret void
  30 }
  31
  32 ; GCN-LABEL: {{^}}i16_ne:
  33 ; VI: v_cmp_ne_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
  34 ; SI: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
  35 ; GFX11-FAKE16: v_cmp_ne_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
  36 ; GFX11-TRUE16: v_cmp_ne_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
  37 define amdgpu_kernel void @i16_ne(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
  38 entry:
  39   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  40   %tid.ext = sext i32 %tid to i64
  41   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
  42   %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
  43   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
  44   %a = load i16, ptr addrspace(1) %a.gep
  45   %b = load i16, ptr addrspace(1) %b.gep
  46   %tmp0 = icmp ne i16 %a, %b
  47   %tmp1 = sext i1 %tmp0 to i32
  48   store i32 %tmp1, ptr addrspace(1) %out.gep
  49   ret void
  50 }
  51
  52 ; GCN-LABEL: {{^}}i16_ugt:
  53 ; VI: v_cmp_gt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
  54 ; SI: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
  55 ; GFX11-FAKE16: v_cmp_gt_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
  56 ; GFX11-TRUE16: v_cmp_gt_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
  57 define amdgpu_kernel void @i16_ugt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
  58 entry:
  59   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  60   %tid.ext = sext i32 %tid to i64
  61   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
  62   %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
  63   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
  64   %a = load i16, ptr addrspace(1) %a.gep
  65   %b = load i16, ptr addrspace(1) %b.gep
  66   %tmp0 = icmp ugt i16 %a, %b
  67   %tmp1 = sext i1 %tmp0 to i32
  68   store i32 %tmp1, ptr addrspace(1) %out.gep
  69   ret void
  70 }
  71
  72 ; GCN-LABEL: {{^}}i16_uge:
  73 ; VI: v_cmp_ge_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
  74 ; SI: v_cmp_ge_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
  75 ; GFX11-FAKE16: v_cmp_ge_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
  76 ; GFX11-TRUE16: v_cmp_ge_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
  77 define amdgpu_kernel void @i16_uge(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
  78 entry:
  79   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  80   %tid.ext = sext i32 %tid to i64
  81   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
  82   %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
  83   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
  84   %a = load i16, ptr addrspace(1) %a.gep
  85   %b = load i16, ptr addrspace(1) %b.gep
  86   %tmp0 = icmp uge i16 %a, %b
  87   %tmp1 = sext i1 %tmp0 to i32
  88   store i32 %tmp1, ptr addrspace(1) %out.gep
  89   ret void
  90 }
  91
  92 ; GCN-LABEL: {{^}}i16_ult:
  93 ; VI: v_cmp_lt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
  94 ; SI: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
  95 ; GFX11-FAKE16: v_cmp_lt_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
  96 ; GFX11-TRUE16: v_cmp_lt_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
  97 define amdgpu_kernel void @i16_ult(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
  98 entry:
  99   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 100   %tid.ext = sext i32 %tid to i64
 101   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 102   %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
 103   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 104   %a = load i16, ptr addrspace(1) %a.gep
 105   %b = load i16, ptr addrspace(1) %b.gep
 106   %tmp0 = icmp ult i16 %a, %b
 107   %tmp1 = sext i1 %tmp0 to i32
 108   store i32 %tmp1, ptr addrspace(1) %out.gep
 109   ret void
 110 }
 111
 112 ; GCN-LABEL: {{^}}i16_ule:
 113 ; VI: v_cmp_le_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 114 ; SI: v_cmp_le_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 115 ; GFX11-FAKE16: v_cmp_le_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
 116 ; GFX11-TRUE16: v_cmp_le_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
 117 define amdgpu_kernel void @i16_ule(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
 118 entry:
 119   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 120   %tid.ext = sext i32 %tid to i64
 121   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 122   %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
 123   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 124   %a = load i16, ptr addrspace(1) %a.gep
 125   %b = load i16, ptr addrspace(1) %b.gep
 126   %tmp0 = icmp ule i16 %a, %b
 127   %tmp1 = sext i1 %tmp0 to i32
 128   store i32 %tmp1, ptr addrspace(1) %out.gep
 129   ret void
 130
 131 }
 132
 133 ; GCN-LABEL: {{^}}i16_sgt:
 134 ; VI: v_cmp_gt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 135 ; SI: v_cmp_gt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 136 ; GFX11-FAKE16: v_cmp_gt_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
 137 ; GFX11-TRUE16: v_cmp_gt_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
 138 define amdgpu_kernel void @i16_sgt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
 139 entry:
 140   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 141   %tid.ext = sext i32 %tid to i64
 142   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 143   %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
 144   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 145   %a = load i16, ptr addrspace(1) %a.gep
 146   %b = load i16, ptr addrspace(1) %b.gep
 147   %tmp0 = icmp sgt i16 %a, %b
 148   %tmp1 = sext i1 %tmp0 to i32
 149   store i32 %tmp1, ptr addrspace(1) %out.gep
 150   ret void
 151 }
 152
 153 ; GCN-LABEL: {{^}}i16_sge:
 154 ; VI: v_cmp_ge_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 155 ; SI: v_cmp_ge_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 156 ; GFX11-FAKE16: v_cmp_ge_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
 157 ; GFX11-TRUE16: v_cmp_ge_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
 158 define amdgpu_kernel void @i16_sge(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
 159 entry:
 160   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 161   %tid.ext = sext i32 %tid to i64
 162   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 163   %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
 164   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 165   %a = load i16, ptr addrspace(1) %a.gep
 166   %b = load i16, ptr addrspace(1) %b.gep
 167   %tmp0 = icmp sge i16 %a, %b
 168   %tmp1 = sext i1 %tmp0 to i32
 169   store i32 %tmp1, ptr addrspace(1) %out.gep
 170   ret void
 171 }
 172
 173 ; GCN-LABEL: {{^}}i16_slt:
 174 ; VI: v_cmp_lt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 175 ; SI: v_cmp_lt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 176 ; GFX11-FAKE16: v_cmp_lt_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
 177 ; GFX11-TRUE16: v_cmp_lt_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
 178 define amdgpu_kernel void @i16_slt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
 179 entry:
 180   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 181   %tid.ext = sext i32 %tid to i64
 182   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 183   %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
 184   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 185   %a = load i16, ptr addrspace(1) %a.gep
 186   %b = load i16, ptr addrspace(1) %b.gep
 187   %tmp0 = icmp slt i16 %a, %b
 188   %tmp1 = sext i1 %tmp0 to i32
 189   store i32 %tmp1, ptr addrspace(1) %out.gep
 190   ret void
 191 }
 192
 193 ; GCN-LABEL: {{^}}i16_sle:
 194 ; VI: v_cmp_le_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 195 ; SI: v_cmp_le_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 196 ; GFX11-FAKE16: v_cmp_le_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
 197 ; GFX11-TRUE16: v_cmp_le_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
 198 define amdgpu_kernel void @i16_sle(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
 199 entry:
 200   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 201   %tid.ext = sext i32 %tid to i64
 202   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 203   %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr, i64 %tid.ext
 204   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 205   %a = load i16, ptr addrspace(1) %a.gep
 206   %b = load i16, ptr addrspace(1) %b.gep
 207   %tmp0 = icmp sle i16 %a, %b
 208   %tmp1 = sext i1 %tmp0 to i32
 209   store i32 %tmp1, ptr addrspace(1) %out.gep
 210   ret void
 211 }
 212
 213 ; These should be commuted to reduce code size
 214 ; GCN-LABEL: {{^}}i16_eq_v_s:
 215 ; VI: v_cmp_eq_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 216 ; SI: v_cmp_eq_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 217 ; GFX11-FAKE16: v_cmp_eq_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
 218 ; GFX11-TRUE16: v_cmp_eq_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
 219 define amdgpu_kernel void @i16_eq_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
 220 entry:
 221   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 222   %tid.ext = sext i32 %tid to i64
 223   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 224   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 225   %a = load i16, ptr addrspace(1) %a.gep
 226   %tmp0 = icmp eq i16 %a, %b
 227   %tmp1 = sext i1 %tmp0 to i32
 228   store i32 %tmp1, ptr addrspace(1) %out.gep
 229   ret void
 230 }
 231
 232 ; GCN-LABEL: {{^}}i16_ne_v_s:
 233 ; VI: v_cmp_ne_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 234 ; SI: v_cmp_ne_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 235 ; GFX11-FAKE16: v_cmp_ne_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
 236 ; GFX11-TRUE16: v_cmp_ne_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
 237 define amdgpu_kernel void @i16_ne_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
 238 entry:
 239   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 240   %tid.ext = sext i32 %tid to i64
 241   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 242   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 243   %a = load i16, ptr addrspace(1) %a.gep
 244   %tmp0 = icmp ne i16 %a, %b
 245   %tmp1 = sext i1 %tmp0 to i32
 246   store i32 %tmp1, ptr addrspace(1) %out.gep
 247   ret void
 248 }
 249
 250 ; GCN-LABEL: {{^}}i16_ugt_v_s:
 251 ; VI: v_cmp_lt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 252 ; SI: v_cmp_lt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 253 ; GFX11-FAKE16: v_cmp_lt_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
 254 ; GFX11-TRUE16: v_cmp_gt_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
 255 define amdgpu_kernel void @i16_ugt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
 256 entry:
 257   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 258   %tid.ext = sext i32 %tid to i64
 259   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 260   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 261   %a = load i16, ptr addrspace(1) %a.gep
 262   %tmp0 = icmp ugt i16 %a, %b
 263   %tmp1 = sext i1 %tmp0 to i32
 264   store i32 %tmp1, ptr addrspace(1) %out.gep
 265   ret void
 266 }
 267
 268 ; GCN-LABEL: {{^}}i16_uge_v_s:
 269 ; VI: v_cmp_le_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 270 ; SI: v_cmp_le_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 271 ; GFX11-FAKE16: v_cmp_le_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
 272 ; GFX11-TRUE16: v_cmp_ge_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
 273 define amdgpu_kernel void @i16_uge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
 274 entry:
 275   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 276   %tid.ext = sext i32 %tid to i64
 277   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 278   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 279   %a = load i16, ptr addrspace(1) %a.gep
 280   %tmp0 = icmp uge i16 %a, %b
 281   %tmp1 = sext i1 %tmp0 to i32
 282   store i32 %tmp1, ptr addrspace(1) %out.gep
 283   ret void
 284 }
 285
 286 ; GCN-LABEL: {{^}}i16_ult_v_s:
 287 ; VI: v_cmp_gt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 288 ; SI: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 289 ; GFX11-FAKE16: v_cmp_gt_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
 290 ; GFX11-TRUE16: v_cmp_lt_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
 291 define amdgpu_kernel void @i16_ult_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
 292 entry:
 293   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 294   %tid.ext = sext i32 %tid to i64
 295   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 296   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 297   %a = load i16, ptr addrspace(1) %a.gep
 298   %tmp0 = icmp ult i16 %a, %b
 299   %tmp1 = sext i1 %tmp0 to i32
 300   store i32 %tmp1, ptr addrspace(1) %out.gep
 301   ret void
 302 }
 303
 304 ; GCN-LABEL: {{^}}i16_ule_v_s:
 305 ; VI: v_cmp_ge_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 306 ; SI: v_cmp_ge_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 307 ; GFX11-FAKE16: v_cmp_ge_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
 308 ; GFX11-TRUE16: v_cmp_le_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
 309 define amdgpu_kernel void @i16_ule_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
 310 entry:
 311   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 312   %tid.ext = sext i32 %tid to i64
 313   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 314   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 315   %a = load i16, ptr addrspace(1) %a.gep
 316   %tmp0 = icmp ule i16 %a, %b
 317   %tmp1 = sext i1 %tmp0 to i32
 318   store i32 %tmp1, ptr addrspace(1) %out.gep
 319   ret void
 320 }
 321
 322 ; GCN-LABEL: {{^}}i16_sgt_v_s:
 323 ; VI: v_cmp_lt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 324 ; SI: v_cmp_lt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 325 ; GFX11-FAKE16: v_cmp_lt_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
 326 ; GFX11-TRUE16: v_cmp_gt_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
 327 define amdgpu_kernel void @i16_sgt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
 328 entry:
 329   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 330   %tid.ext = sext i32 %tid to i64
 331   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 332   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 333   %a = load i16, ptr addrspace(1) %a.gep
 334   %tmp0 = icmp sgt i16 %a, %b
 335   %tmp1 = sext i1 %tmp0 to i32
 336   store i32 %tmp1, ptr addrspace(1) %out.gep
 337   ret void
 338 }
 339
 340 ; GCN-LABEL: {{^}}i16_sge_v_s:
 341 ; VI: v_cmp_le_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 342 ; SI: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 343 ; GFX11-FAKE16: v_cmp_le_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
 344 ; GFX11-TRUE16: v_cmp_ge_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
 345 define amdgpu_kernel void @i16_sge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
 346 entry:
 347   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 348   %tid.ext = sext i32 %tid to i64
 349   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 350   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 351   %a = load i16, ptr addrspace(1) %a.gep
 352   %tmp0 = icmp sge i16 %a, %b
 353   %tmp1 = sext i1 %tmp0 to i32
 354   store i32 %tmp1, ptr addrspace(1) %out.gep
 355   ret void
 356 }
 357
 358 ; GCN-LABEL: {{^}}i16_slt_v_s:
 359 ; VI: v_cmp_gt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 360 ; SI: v_cmp_gt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 361 ; GFX11-FAKE16: v_cmp_gt_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
 362 ; GFX11-TRUE16: v_cmp_lt_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
 363 define amdgpu_kernel void @i16_slt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
 364 entry:
 365   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 366   %tid.ext = sext i32 %tid to i64
 367   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 368   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 369   %a = load i16, ptr addrspace(1) %a.gep
 370   %tmp0 = icmp slt i16 %a, %b
 371   %tmp1 = sext i1 %tmp0 to i32
 372   store i32 %tmp1, ptr addrspace(1) %out.gep
 373   ret void
 374 }
 375
 376 ; GCN-LABEL: {{^}}i16_sle_v_s:
 377 ; VI: v_cmp_ge_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 378 ; SI: v_cmp_ge_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 379 ; GFX11-FAKE16: v_cmp_ge_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
 380 ; GFX11-TRUE16: v_cmp_le_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
 381 define amdgpu_kernel void @i16_sle_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
 382 entry:
 383   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 384   %tid.ext = sext i32 %tid to i64
 385   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i64 %tid.ext
 386   %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext
 387   %a = load i16, ptr addrspace(1) %a.gep
 388   %tmp0 = icmp sle i16 %a, %b
 389   %tmp1 = sext i1 %tmp0 to i32
 390   store i32 %tmp1, ptr addrspace(1) %out.gep
 391   ret void
 392 }
 393
 394 declare i32 @llvm.amdgcn.workitem.id.x() #1
 395
 396 attributes #0 = { nounwind }
 397 attributes #1 = { nounwind readnone }