llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s
   3 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s
   4
   5 define amdgpu_kernel void @add_i3(i3 %a, i3 %b) {
   6 ; SI-LABEL: @add_i3(
   7 ; SI-NEXT:    [[R:%.*]] = add i3 [[A:%.*]], [[B:%.*]]
   8 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
   9 ; SI-NEXT:    ret void
  10 ;
  11 ; VI-LABEL: @add_i3(
  12 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
  13 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
  14 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
  15 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
  16 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
  17 ; VI-NEXT:    ret void
  18 ;
  19   %r = add i3 %a, %b
  20   store volatile i3 %r, ptr addrspace(1) undef
  21   ret void
  22 }
  23
  24 define amdgpu_kernel void @add_nsw_i3(i3 %a, i3 %b) {
  25 ; SI-LABEL: @add_nsw_i3(
  26 ; SI-NEXT:    [[R:%.*]] = add nsw i3 [[A:%.*]], [[B:%.*]]
  27 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
  28 ; SI-NEXT:    ret void
  29 ;
  30 ; VI-LABEL: @add_nsw_i3(
  31 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
  32 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
  33 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
  34 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
  35 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
  36 ; VI-NEXT:    ret void
  37 ;
  38   %r = add nsw i3 %a, %b
  39   store volatile i3 %r, ptr addrspace(1) undef
  40   ret void
  41 }
  42
  43 define amdgpu_kernel void @add_nuw_i3(i3 %a, i3 %b) {
  44 ; SI-LABEL: @add_nuw_i3(
  45 ; SI-NEXT:    [[R:%.*]] = add nuw i3 [[A:%.*]], [[B:%.*]]
  46 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
  47 ; SI-NEXT:    ret void
  48 ;
  49 ; VI-LABEL: @add_nuw_i3(
  50 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
  51 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
  52 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
  53 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
  54 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
  55 ; VI-NEXT:    ret void
  56 ;
  57   %r = add nuw i3 %a, %b
  58   store volatile i3 %r, ptr addrspace(1) undef
  59   ret void
  60 }
  61
  62 define amdgpu_kernel void @add_nuw_nsw_i3(i3 %a, i3 %b) {
  63 ; SI-LABEL: @add_nuw_nsw_i3(
  64 ; SI-NEXT:    [[R:%.*]] = add nuw nsw i3 [[A:%.*]], [[B:%.*]]
  65 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
  66 ; SI-NEXT:    ret void
  67 ;
  68 ; VI-LABEL: @add_nuw_nsw_i3(
  69 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
  70 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
  71 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
  72 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
  73 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
  74 ; VI-NEXT:    ret void
  75 ;
  76   %r = add nuw nsw i3 %a, %b
  77   store volatile i3 %r, ptr addrspace(1) undef
  78   ret void
  79 }
  80
  81 define amdgpu_kernel void @sub_i3(i3 %a, i3 %b) {
  82 ; SI-LABEL: @sub_i3(
  83 ; SI-NEXT:    [[R:%.*]] = sub i3 [[A:%.*]], [[B:%.*]]
  84 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
  85 ; SI-NEXT:    ret void
  86 ;
  87 ; VI-LABEL: @sub_i3(
  88 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
  89 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
  90 ; VI-NEXT:    [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
  91 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
  92 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
  93 ; VI-NEXT:    ret void
  94 ;
  95   %r = sub i3 %a, %b
  96   store volatile i3 %r, ptr addrspace(1) undef
  97   ret void
  98 }
  99
 100 define amdgpu_kernel void @sub_nsw_i3(i3 %a, i3 %b) {
 101 ; SI-LABEL: @sub_nsw_i3(
 102 ; SI-NEXT:    [[R:%.*]] = sub nsw i3 [[A:%.*]], [[B:%.*]]
 103 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 104 ; SI-NEXT:    ret void
 105 ;
 106 ; VI-LABEL: @sub_nsw_i3(
 107 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 108 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 109 ; VI-NEXT:    [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
 110 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 111 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 112 ; VI-NEXT:    ret void
 113 ;
 114   %r = sub nsw i3 %a, %b
 115   store volatile i3 %r, ptr addrspace(1) undef
 116   ret void
 117 }
 118
 119 define amdgpu_kernel void @sub_nuw_i3(i3 %a, i3 %b) {
 120 ; SI-LABEL: @sub_nuw_i3(
 121 ; SI-NEXT:    [[R:%.*]] = sub nuw i3 [[A:%.*]], [[B:%.*]]
 122 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 123 ; SI-NEXT:    ret void
 124 ;
 125 ; VI-LABEL: @sub_nuw_i3(
 126 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 127 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 128 ; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
 129 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 130 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 131 ; VI-NEXT:    ret void
 132 ;
 133   %r = sub nuw i3 %a, %b
 134   store volatile i3 %r, ptr addrspace(1) undef
 135   ret void
 136 }
 137
 138 define amdgpu_kernel void @sub_nuw_nsw_i3(i3 %a, i3 %b) {
 139 ; SI-LABEL: @sub_nuw_nsw_i3(
 140 ; SI-NEXT:    [[R:%.*]] = sub nuw nsw i3 [[A:%.*]], [[B:%.*]]
 141 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 142 ; SI-NEXT:    ret void
 143 ;
 144 ; VI-LABEL: @sub_nuw_nsw_i3(
 145 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 146 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 147 ; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
 148 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 149 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 150 ; VI-NEXT:    ret void
 151 ;
 152   %r = sub nuw nsw i3 %a, %b
 153   store volatile i3 %r, ptr addrspace(1) undef
 154   ret void
 155 }
 156
 157 define amdgpu_kernel void @mul_i3(i3 %a, i3 %b) {
 158 ; SI-LABEL: @mul_i3(
 159 ; SI-NEXT:    [[R:%.*]] = mul i3 [[A:%.*]], [[B:%.*]]
 160 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 161 ; SI-NEXT:    ret void
 162 ;
 163 ; VI-LABEL: @mul_i3(
 164 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 165 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 166 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
 167 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 168 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 169 ; VI-NEXT:    ret void
 170 ;
 171   %r = mul i3 %a, %b
 172   store volatile i3 %r, ptr addrspace(1) undef
 173   ret void
 174 }
 175
 176 define amdgpu_kernel void @mul_nsw_i3(i3 %a, i3 %b) {
 177 ; SI-LABEL: @mul_nsw_i3(
 178 ; SI-NEXT:    [[R:%.*]] = mul nsw i3 [[A:%.*]], [[B:%.*]]
 179 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 180 ; SI-NEXT:    ret void
 181 ;
 182 ; VI-LABEL: @mul_nsw_i3(
 183 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 184 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 185 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
 186 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 187 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 188 ; VI-NEXT:    ret void
 189 ;
 190   %r = mul nsw i3 %a, %b
 191   store volatile i3 %r, ptr addrspace(1) undef
 192   ret void
 193 }
 194
 195 define amdgpu_kernel void @mul_nuw_i3(i3 %a, i3 %b) {
 196 ; SI-LABEL: @mul_nuw_i3(
 197 ; SI-NEXT:    [[R:%.*]] = mul nuw i3 [[A:%.*]], [[B:%.*]]
 198 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 199 ; SI-NEXT:    ret void
 200 ;
 201 ; VI-LABEL: @mul_nuw_i3(
 202 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 203 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 204 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
 205 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 206 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 207 ; VI-NEXT:    ret void
 208 ;
 209   %r = mul nuw i3 %a, %b
 210   store volatile i3 %r, ptr addrspace(1) undef
 211   ret void
 212 }
 213
 214 define amdgpu_kernel void @mul_nuw_nsw_i3(i3 %a, i3 %b) {
 215 ; SI-LABEL: @mul_nuw_nsw_i3(
 216 ; SI-NEXT:    [[R:%.*]] = mul nuw nsw i3 [[A:%.*]], [[B:%.*]]
 217 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 218 ; SI-NEXT:    ret void
 219 ;
 220 ; VI-LABEL: @mul_nuw_nsw_i3(
 221 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 222 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 223 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
 224 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 225 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 226 ; VI-NEXT:    ret void
 227 ;
 228   %r = mul nuw nsw i3 %a, %b
 229   store volatile i3 %r, ptr addrspace(1) undef
 230   ret void
 231 }
 232
 233 define amdgpu_kernel void @shl_i3(i3 %a, i3 %b) {
 234 ; SI-LABEL: @shl_i3(
 235 ; SI-NEXT:    [[R:%.*]] = shl i3 [[A:%.*]], [[B:%.*]]
 236 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 237 ; SI-NEXT:    ret void
 238 ;
 239 ; VI-LABEL: @shl_i3(
 240 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 241 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 242 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
 243 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 244 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 245 ; VI-NEXT:    ret void
 246 ;
 247   %r = shl i3 %a, %b
 248   store volatile i3 %r, ptr addrspace(1) undef
 249   ret void
 250 }
 251
 252 define amdgpu_kernel void @shl_nsw_i3(i3 %a, i3 %b) {
 253 ; SI-LABEL: @shl_nsw_i3(
 254 ; SI-NEXT:    [[R:%.*]] = shl nsw i3 [[A:%.*]], [[B:%.*]]
 255 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 256 ; SI-NEXT:    ret void
 257 ;
 258 ; VI-LABEL: @shl_nsw_i3(
 259 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 260 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 261 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
 262 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 263 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 264 ; VI-NEXT:    ret void
 265 ;
 266   %r = shl nsw i3 %a, %b
 267   store volatile i3 %r, ptr addrspace(1) undef
 268   ret void
 269 }
 270
 271 define amdgpu_kernel void @shl_nuw_i3(i3 %a, i3 %b) {
 272 ; SI-LABEL: @shl_nuw_i3(
 273 ; SI-NEXT:    [[R:%.*]] = shl nuw i3 [[A:%.*]], [[B:%.*]]
 274 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 275 ; SI-NEXT:    ret void
 276 ;
 277 ; VI-LABEL: @shl_nuw_i3(
 278 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 279 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 280 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
 281 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 282 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 283 ; VI-NEXT:    ret void
 284 ;
 285   %r = shl nuw i3 %a, %b
 286   store volatile i3 %r, ptr addrspace(1) undef
 287   ret void
 288 }
 289
 290 define amdgpu_kernel void @shl_nuw_nsw_i3(i3 %a, i3 %b) {
 291 ; SI-LABEL: @shl_nuw_nsw_i3(
 292 ; SI-NEXT:    [[R:%.*]] = shl nuw nsw i3 [[A:%.*]], [[B:%.*]]
 293 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 294 ; SI-NEXT:    ret void
 295 ;
 296 ; VI-LABEL: @shl_nuw_nsw_i3(
 297 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 298 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 299 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
 300 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 301 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 302 ; VI-NEXT:    ret void
 303 ;
 304   %r = shl nuw nsw i3 %a, %b
 305   store volatile i3 %r, ptr addrspace(1) undef
 306   ret void
 307 }
 308
 309 define amdgpu_kernel void @lshr_i3(i3 %a, i3 %b) {
 310 ; SI-LABEL: @lshr_i3(
 311 ; SI-NEXT:    [[R:%.*]] = lshr i3 [[A:%.*]], [[B:%.*]]
 312 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 313 ; SI-NEXT:    ret void
 314 ;
 315 ; VI-LABEL: @lshr_i3(
 316 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 317 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 318 ; VI-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP1]], [[TMP2]]
 319 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 320 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 321 ; VI-NEXT:    ret void
 322 ;
 323   %r = lshr i3 %a, %b
 324   store volatile i3 %r, ptr addrspace(1) undef
 325   ret void
 326 }
 327
 328 define amdgpu_kernel void @lshr_exact_i3(i3 %a, i3 %b) {
 329 ; SI-LABEL: @lshr_exact_i3(
 330 ; SI-NEXT:    [[R:%.*]] = lshr exact i3 [[A:%.*]], [[B:%.*]]
 331 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 332 ; SI-NEXT:    ret void
 333 ;
 334 ; VI-LABEL: @lshr_exact_i3(
 335 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 336 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 337 ; VI-NEXT:    [[TMP3:%.*]] = lshr exact i32 [[TMP1]], [[TMP2]]
 338 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 339 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 340 ; VI-NEXT:    ret void
 341 ;
 342   %r = lshr exact i3 %a, %b
 343   store volatile i3 %r, ptr addrspace(1) undef
 344   ret void
 345 }
 346
 347 define amdgpu_kernel void @ashr_i3(i3 %a, i3 %b) {
 348 ; SI-LABEL: @ashr_i3(
 349 ; SI-NEXT:    [[R:%.*]] = ashr i3 [[A:%.*]], [[B:%.*]]
 350 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 351 ; SI-NEXT:    ret void
 352 ;
 353 ; VI-LABEL: @ashr_i3(
 354 ; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
 355 ; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
 356 ; VI-NEXT:    [[TMP3:%.*]] = ashr i32 [[TMP1]], [[TMP2]]
 357 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 358 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 359 ; VI-NEXT:    ret void
 360 ;
 361   %r = ashr i3 %a, %b
 362   store volatile i3 %r, ptr addrspace(1) undef
 363   ret void
 364 }
 365
 366 define amdgpu_kernel void @ashr_exact_i3(i3 %a, i3 %b) {
 367 ; SI-LABEL: @ashr_exact_i3(
 368 ; SI-NEXT:    [[R:%.*]] = ashr exact i3 [[A:%.*]], [[B:%.*]]
 369 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 370 ; SI-NEXT:    ret void
 371 ;
 372 ; VI-LABEL: @ashr_exact_i3(
 373 ; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
 374 ; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
 375 ; VI-NEXT:    [[TMP3:%.*]] = ashr exact i32 [[TMP1]], [[TMP2]]
 376 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 377 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 378 ; VI-NEXT:    ret void
 379 ;
 380   %r = ashr exact i3 %a, %b
 381   store volatile i3 %r, ptr addrspace(1) undef
 382   ret void
 383 }
 384
 385 define amdgpu_kernel void @and_i3(i3 %a, i3 %b) {
 386 ; SI-LABEL: @and_i3(
 387 ; SI-NEXT:    [[R:%.*]] = and i3 [[A:%.*]], [[B:%.*]]
 388 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 389 ; SI-NEXT:    ret void
 390 ;
 391 ; VI-LABEL: @and_i3(
 392 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 393 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 394 ; VI-NEXT:    [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]]
 395 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 396 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 397 ; VI-NEXT:    ret void
 398 ;
 399   %r = and i3 %a, %b
 400   store volatile i3 %r, ptr addrspace(1) undef
 401   ret void
 402 }
 403
 404 define amdgpu_kernel void @or_i3(i3 %a, i3 %b) {
 405 ; SI-LABEL: @or_i3(
 406 ; SI-NEXT:    [[R:%.*]] = or i3 [[A:%.*]], [[B:%.*]]
 407 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 408 ; SI-NEXT:    ret void
 409 ;
 410 ; VI-LABEL: @or_i3(
 411 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 412 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 413 ; VI-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
 414 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 415 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 416 ; VI-NEXT:    ret void
 417 ;
 418   %r = or i3 %a, %b
 419   store volatile i3 %r, ptr addrspace(1) undef
 420   ret void
 421 }
 422
 423 define amdgpu_kernel void @xor_i3(i3 %a, i3 %b) {
 424 ; SI-LABEL: @xor_i3(
 425 ; SI-NEXT:    [[R:%.*]] = xor i3 [[A:%.*]], [[B:%.*]]
 426 ; SI-NEXT:    store volatile i3 [[R]], ptr addrspace(1) undef, align 1
 427 ; SI-NEXT:    ret void
 428 ;
 429 ; VI-LABEL: @xor_i3(
 430 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 431 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 432 ; VI-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
 433 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 434 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 435 ; VI-NEXT:    ret void
 436 ;
 437   %r = xor i3 %a, %b
 438   store volatile i3 %r, ptr addrspace(1) undef
 439   ret void
 440 }
 441
 442 define amdgpu_kernel void @select_eq_i3(i3 %a, i3 %b) {
 443 ; SI-LABEL: @select_eq_i3(
 444 ; SI-NEXT:    [[CMP:%.*]] = icmp eq i3 [[A:%.*]], [[B:%.*]]
 445 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
 446 ; SI-NEXT:    store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
 447 ; SI-NEXT:    ret void
 448 ;
 449 ; VI-LABEL: @select_eq_i3(
 450 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 451 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 452 ; VI-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]]
 453 ; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
 454 ; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
 455 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
 456 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
 457 ; VI-NEXT:    store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
 458 ; VI-NEXT:    ret void
 459 ;
 460   %cmp = icmp eq i3 %a, %b
 461   %sel = select i1 %cmp, i3 %a, i3 %b
 462   store volatile i3 %sel, ptr addrspace(1) undef
 463   ret void
 464 }
 465
 466 define amdgpu_kernel void @select_ne_i3(i3 %a, i3 %b) {
 467 ; SI-LABEL: @select_ne_i3(
 468 ; SI-NEXT:    [[CMP:%.*]] = icmp ne i3 [[A:%.*]], [[B:%.*]]
 469 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
 470 ; SI-NEXT:    store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
 471 ; SI-NEXT:    ret void
 472 ;
 473 ; VI-LABEL: @select_ne_i3(
 474 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 475 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 476 ; VI-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
 477 ; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
 478 ; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
 479 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
 480 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
 481 ; VI-NEXT:    store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
 482 ; VI-NEXT:    ret void
 483 ;
 484   %cmp = icmp ne i3 %a, %b
 485   %sel = select i1 %cmp, i3 %a, i3 %b
 486   store volatile i3 %sel, ptr addrspace(1) undef
 487   ret void
 488 }
 489
 490 define amdgpu_kernel void @select_ugt_i3(i3 %a, i3 %b) {
 491 ; SI-LABEL: @select_ugt_i3(
 492 ; SI-NEXT:    [[CMP:%.*]] = icmp ugt i3 [[A:%.*]], [[B:%.*]]
 493 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
 494 ; SI-NEXT:    store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
 495 ; SI-NEXT:    ret void
 496 ;
 497 ; VI-LABEL: @select_ugt_i3(
 498 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 499 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 500 ; VI-NEXT:    [[TMP3:%.*]] = icmp ugt i32 [[TMP1]], [[TMP2]]
 501 ; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
 502 ; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
 503 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
 504 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
 505 ; VI-NEXT:    store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
 506 ; VI-NEXT:    ret void
 507 ;
 508   %cmp = icmp ugt i3 %a, %b
 509   %sel = select i1 %cmp, i3 %a, i3 %b
 510   store volatile i3 %sel, ptr addrspace(1) undef
 511   ret void
 512 }
 513
 514 define amdgpu_kernel void @select_uge_i3(i3 %a, i3 %b) {
 515 ; SI-LABEL: @select_uge_i3(
 516 ; SI-NEXT:    [[CMP:%.*]] = icmp uge i3 [[A:%.*]], [[B:%.*]]
 517 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
 518 ; SI-NEXT:    store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
 519 ; SI-NEXT:    ret void
 520 ;
 521 ; VI-LABEL: @select_uge_i3(
 522 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 523 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 524 ; VI-NEXT:    [[TMP3:%.*]] = icmp uge i32 [[TMP1]], [[TMP2]]
 525 ; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
 526 ; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
 527 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
 528 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
 529 ; VI-NEXT:    store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
 530 ; VI-NEXT:    ret void
 531 ;
 532   %cmp = icmp uge i3 %a, %b
 533   %sel = select i1 %cmp, i3 %a, i3 %b
 534   store volatile i3 %sel, ptr addrspace(1) undef
 535   ret void
 536 }
 537
 538 define amdgpu_kernel void @select_ult_i3(i3 %a, i3 %b) {
 539 ; SI-LABEL: @select_ult_i3(
 540 ; SI-NEXT:    [[CMP:%.*]] = icmp ult i3 [[A:%.*]], [[B:%.*]]
 541 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
 542 ; SI-NEXT:    store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
 543 ; SI-NEXT:    ret void
 544 ;
 545 ; VI-LABEL: @select_ult_i3(
 546 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 547 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 548 ; VI-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP1]], [[TMP2]]
 549 ; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
 550 ; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
 551 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
 552 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
 553 ; VI-NEXT:    store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
 554 ; VI-NEXT:    ret void
 555 ;
 556   %cmp = icmp ult i3 %a, %b
 557   %sel = select i1 %cmp, i3 %a, i3 %b
 558   store volatile i3 %sel, ptr addrspace(1) undef
 559   ret void
 560 }
 561
 562 define amdgpu_kernel void @select_ule_i3(i3 %a, i3 %b) {
 563 ; SI-LABEL: @select_ule_i3(
 564 ; SI-NEXT:    [[CMP:%.*]] = icmp ule i3 [[A:%.*]], [[B:%.*]]
 565 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
 566 ; SI-NEXT:    store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
 567 ; SI-NEXT:    ret void
 568 ;
 569 ; VI-LABEL: @select_ule_i3(
 570 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 571 ; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
 572 ; VI-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]]
 573 ; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
 574 ; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
 575 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
 576 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
 577 ; VI-NEXT:    store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
 578 ; VI-NEXT:    ret void
 579 ;
 580   %cmp = icmp ule i3 %a, %b
 581   %sel = select i1 %cmp, i3 %a, i3 %b
 582   store volatile i3 %sel, ptr addrspace(1) undef
 583   ret void
 584 }
 585
 586 define amdgpu_kernel void @select_sgt_i3(i3 %a, i3 %b) {
 587 ; SI-LABEL: @select_sgt_i3(
 588 ; SI-NEXT:    [[CMP:%.*]] = icmp sgt i3 [[A:%.*]], [[B:%.*]]
 589 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
 590 ; SI-NEXT:    store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
 591 ; SI-NEXT:    ret void
 592 ;
 593 ; VI-LABEL: @select_sgt_i3(
 594 ; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
 595 ; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
 596 ; VI-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
 597 ; VI-NEXT:    [[TMP4:%.*]] = sext i3 [[A]] to i32
 598 ; VI-NEXT:    [[TMP5:%.*]] = sext i3 [[B]] to i32
 599 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
 600 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
 601 ; VI-NEXT:    store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
 602 ; VI-NEXT:    ret void
 603 ;
 604   %cmp = icmp sgt i3 %a, %b
 605   %sel = select i1 %cmp, i3 %a, i3 %b
 606   store volatile i3 %sel, ptr addrspace(1) undef
 607   ret void
 608 }
 609
 610 define amdgpu_kernel void @select_sge_i3(i3 %a, i3 %b) {
 611 ; SI-LABEL: @select_sge_i3(
 612 ; SI-NEXT:    [[CMP:%.*]] = icmp sge i3 [[A:%.*]], [[B:%.*]]
 613 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
 614 ; SI-NEXT:    store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
 615 ; SI-NEXT:    ret void
 616 ;
 617 ; VI-LABEL: @select_sge_i3(
 618 ; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
 619 ; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
 620 ; VI-NEXT:    [[TMP3:%.*]] = icmp sge i32 [[TMP1]], [[TMP2]]
 621 ; VI-NEXT:    [[TMP4:%.*]] = sext i3 [[A]] to i32
 622 ; VI-NEXT:    [[TMP5:%.*]] = sext i3 [[B]] to i32
 623 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
 624 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
 625 ; VI-NEXT:    store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
 626 ; VI-NEXT:    ret void
 627 ;
 628   %cmp = icmp sge i3 %a, %b
 629   %sel = select i1 %cmp, i3 %a, i3 %b
 630   store volatile i3 %sel, ptr addrspace(1) undef
 631   ret void
 632 }
 633
 634 define amdgpu_kernel void @select_slt_i3(i3 %a, i3 %b) {
 635 ; SI-LABEL: @select_slt_i3(
 636 ; SI-NEXT:    [[CMP:%.*]] = icmp slt i3 [[A:%.*]], [[B:%.*]]
 637 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
 638 ; SI-NEXT:    store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
 639 ; SI-NEXT:    ret void
 640 ;
 641 ; VI-LABEL: @select_slt_i3(
 642 ; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
 643 ; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
 644 ; VI-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]]
 645 ; VI-NEXT:    [[TMP4:%.*]] = sext i3 [[A]] to i32
 646 ; VI-NEXT:    [[TMP5:%.*]] = sext i3 [[B]] to i32
 647 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
 648 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
 649 ; VI-NEXT:    store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
 650 ; VI-NEXT:    ret void
 651 ;
 652   %cmp = icmp slt i3 %a, %b
 653   %sel = select i1 %cmp, i3 %a, i3 %b
 654   store volatile i3 %sel, ptr addrspace(1) undef
 655   ret void
 656 }
 657
 658 define amdgpu_kernel void @select_sle_i3(i3 %a, i3 %b) {
 659 ; SI-LABEL: @select_sle_i3(
 660 ; SI-NEXT:    [[CMP:%.*]] = icmp sle i3 [[A:%.*]], [[B:%.*]]
 661 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
 662 ; SI-NEXT:    store volatile i3 [[SEL]], ptr addrspace(1) undef, align 1
 663 ; SI-NEXT:    ret void
 664 ;
 665 ; VI-LABEL: @select_sle_i3(
 666 ; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
 667 ; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
 668 ; VI-NEXT:    [[TMP3:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
 669 ; VI-NEXT:    [[TMP4:%.*]] = sext i3 [[A]] to i32
 670 ; VI-NEXT:    [[TMP5:%.*]] = sext i3 [[B]] to i32
 671 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
 672 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
 673 ; VI-NEXT:    store volatile i3 [[TMP7]], ptr addrspace(1) undef, align 1
 674 ; VI-NEXT:    ret void
 675 ;
 676   %cmp = icmp sle i3 %a, %b
 677   %sel = select i1 %cmp, i3 %a, i3 %b
 678   store volatile i3 %sel, ptr addrspace(1) undef
 679   ret void
 680 }
 681
 682 declare i3 @llvm.bitreverse.i3(i3)
 683 define amdgpu_kernel void @bitreverse_i3(i3 %a) {
 684 ; SI-LABEL: @bitreverse_i3(
 685 ; SI-NEXT:    [[BREV:%.*]] = call i3 @llvm.bitreverse.i3(i3 [[A:%.*]])
 686 ; SI-NEXT:    store volatile i3 [[BREV]], ptr addrspace(1) undef, align 1
 687 ; SI-NEXT:    ret void
 688 ;
 689 ; VI-LABEL: @bitreverse_i3(
 690 ; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
 691 ; VI-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]])
 692 ; VI-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 29
 693 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
 694 ; VI-NEXT:    store volatile i3 [[TMP4]], ptr addrspace(1) undef, align 1
 695 ; VI-NEXT:    ret void
 696 ;
 697   %brev = call i3 @llvm.bitreverse.i3(i3 %a)
 698   store volatile i3 %brev, ptr addrspace(1) undef
 699   ret void
 700 }
 701
 702 define amdgpu_kernel void @add_i16(i16 %a, i16 %b) {
 703 ; SI-LABEL: @add_i16(
 704 ; SI-NEXT:    [[R:%.*]] = add i16 [[A:%.*]], [[B:%.*]]
 705 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 706 ; SI-NEXT:    ret void
 707 ;
 708 ; VI-LABEL: @add_i16(
 709 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 710 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 711 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
 712 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 713 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 714 ; VI-NEXT:    ret void
 715 ;
 716   %r = add i16 %a, %b
 717   store volatile i16 %r, ptr addrspace(1) undef
 718   ret void
 719 }
 720
 721 define amdgpu_kernel void @constant_add_i16() {
 722 ; SI-LABEL: @constant_add_i16(
 723 ; SI-NEXT:    [[R:%.*]] = add i16 1, 2
 724 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 725 ; SI-NEXT:    ret void
 726 ;
 727 ; VI-LABEL: @constant_add_i16(
 728 ; VI-NEXT:    store volatile i16 3, ptr addrspace(1) undef, align 2
 729 ; VI-NEXT:    ret void
 730 ;
 731   %r = add i16 1, 2
 732   store volatile i16 %r, ptr addrspace(1) undef
 733   ret void
 734 }
 735
 736 define amdgpu_kernel void @constant_add_nsw_i16() {
 737 ; SI-LABEL: @constant_add_nsw_i16(
 738 ; SI-NEXT:    [[R:%.*]] = add nsw i16 1, 2
 739 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 740 ; SI-NEXT:    ret void
 741 ;
 742 ; VI-LABEL: @constant_add_nsw_i16(
 743 ; VI-NEXT:    store volatile i16 3, ptr addrspace(1) undef, align 2
 744 ; VI-NEXT:    ret void
 745 ;
 746   %r = add nsw i16 1, 2
 747   store volatile i16 %r, ptr addrspace(1) undef
 748   ret void
 749 }
 750
 751 define amdgpu_kernel void @constant_add_nuw_i16() {
 752 ; SI-LABEL: @constant_add_nuw_i16(
 753 ; SI-NEXT:    [[R:%.*]] = add nsw i16 1, 2
 754 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 755 ; SI-NEXT:    ret void
 756 ;
 757 ; VI-LABEL: @constant_add_nuw_i16(
 758 ; VI-NEXT:    store volatile i16 3, ptr addrspace(1) undef, align 2
 759 ; VI-NEXT:    ret void
 760 ;
 761   %r = add nsw i16 1, 2
 762   store volatile i16 %r, ptr addrspace(1) undef
 763   ret void
 764 }
 765
 766 define amdgpu_kernel void @add_nsw_i16(i16 %a, i16 %b) {
 767 ; SI-LABEL: @add_nsw_i16(
 768 ; SI-NEXT:    [[R:%.*]] = add nsw i16 [[A:%.*]], [[B:%.*]]
 769 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 770 ; SI-NEXT:    ret void
 771 ;
 772 ; VI-LABEL: @add_nsw_i16(
 773 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 774 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 775 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
 776 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 777 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 778 ; VI-NEXT:    ret void
 779 ;
 780   %r = add nsw i16 %a, %b
 781   store volatile i16 %r, ptr addrspace(1) undef
 782   ret void
 783 }
 784
 785 define amdgpu_kernel void @add_nuw_i16(i16 %a, i16 %b) {
 786 ; SI-LABEL: @add_nuw_i16(
 787 ; SI-NEXT:    [[R:%.*]] = add nuw i16 [[A:%.*]], [[B:%.*]]
 788 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 789 ; SI-NEXT:    ret void
 790 ;
 791 ; VI-LABEL: @add_nuw_i16(
 792 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 793 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 794 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
 795 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 796 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 797 ; VI-NEXT:    ret void
 798 ;
 799   %r = add nuw i16 %a, %b
 800   store volatile i16 %r, ptr addrspace(1) undef
 801   ret void
 802 }
 803
 804 define amdgpu_kernel void @add_nuw_nsw_i16(i16 %a, i16 %b) {
 805 ; SI-LABEL: @add_nuw_nsw_i16(
 806 ; SI-NEXT:    [[R:%.*]] = add nuw nsw i16 [[A:%.*]], [[B:%.*]]
 807 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 808 ; SI-NEXT:    ret void
 809 ;
 810 ; VI-LABEL: @add_nuw_nsw_i16(
 811 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 812 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 813 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
 814 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 815 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 816 ; VI-NEXT:    ret void
 817 ;
 818   %r = add nuw nsw i16 %a, %b
 819   store volatile i16 %r, ptr addrspace(1) undef
 820   ret void
 821 }
 822
 823 define amdgpu_kernel void @sub_i16(i16 %a, i16 %b) {
 824 ; SI-LABEL: @sub_i16(
 825 ; SI-NEXT:    [[R:%.*]] = sub i16 [[A:%.*]], [[B:%.*]]
 826 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 827 ; SI-NEXT:    ret void
 828 ;
 829 ; VI-LABEL: @sub_i16(
 830 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 831 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 832 ; VI-NEXT:    [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
 833 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 834 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 835 ; VI-NEXT:    ret void
 836 ;
 837   %r = sub i16 %a, %b
 838   store volatile i16 %r, ptr addrspace(1) undef
 839   ret void
 840 }
 841
 842 define amdgpu_kernel void @sub_nsw_i16(i16 %a, i16 %b) {
 843 ; SI-LABEL: @sub_nsw_i16(
 844 ; SI-NEXT:    [[R:%.*]] = sub nsw i16 [[A:%.*]], [[B:%.*]]
 845 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 846 ; SI-NEXT:    ret void
 847 ;
 848 ; VI-LABEL: @sub_nsw_i16(
 849 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 850 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 851 ; VI-NEXT:    [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
 852 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 853 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 854 ; VI-NEXT:    ret void
 855 ;
 856   %r = sub nsw i16 %a, %b
 857   store volatile i16 %r, ptr addrspace(1) undef
 858   ret void
 859 }
 860
 861 define amdgpu_kernel void @sub_nuw_i16(i16 %a, i16 %b) {
 862 ; SI-LABEL: @sub_nuw_i16(
 863 ; SI-NEXT:    [[R:%.*]] = sub nuw i16 [[A:%.*]], [[B:%.*]]
 864 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 865 ; SI-NEXT:    ret void
 866 ;
 867 ; VI-LABEL: @sub_nuw_i16(
 868 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 869 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 870 ; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
 871 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 872 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 873 ; VI-NEXT:    ret void
 874 ;
 875   %r = sub nuw i16 %a, %b
 876   store volatile i16 %r, ptr addrspace(1) undef
 877   ret void
 878 }
 879
 880 define amdgpu_kernel void @sub_nuw_nsw_i16(i16 %a, i16 %b) {
 881 ; SI-LABEL: @sub_nuw_nsw_i16(
 882 ; SI-NEXT:    [[R:%.*]] = sub nuw nsw i16 [[A:%.*]], [[B:%.*]]
 883 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 884 ; SI-NEXT:    ret void
 885 ;
 886 ; VI-LABEL: @sub_nuw_nsw_i16(
 887 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 888 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 889 ; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
 890 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 891 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 892 ; VI-NEXT:    ret void
 893 ;
 894   %r = sub nuw nsw i16 %a, %b
 895   store volatile i16 %r, ptr addrspace(1) undef
 896   ret void
 897 }
 898
 899 define amdgpu_kernel void @mul_i16(i16 %a, i16 %b) {
 900 ; SI-LABEL: @mul_i16(
 901 ; SI-NEXT:    [[R:%.*]] = mul i16 [[A:%.*]], [[B:%.*]]
 902 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 903 ; SI-NEXT:    ret void
 904 ;
 905 ; VI-LABEL: @mul_i16(
 906 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 907 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 908 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
 909 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 910 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 911 ; VI-NEXT:    ret void
 912 ;
 913   %r = mul i16 %a, %b
 914   store volatile i16 %r, ptr addrspace(1) undef
 915   ret void
 916 }
 917
 918 define amdgpu_kernel void @mul_nsw_i16(i16 %a, i16 %b) {
 919 ; SI-LABEL: @mul_nsw_i16(
 920 ; SI-NEXT:    [[R:%.*]] = mul nsw i16 [[A:%.*]], [[B:%.*]]
 921 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 922 ; SI-NEXT:    ret void
 923 ;
 924 ; VI-LABEL: @mul_nsw_i16(
 925 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 926 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 927 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
 928 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 929 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 930 ; VI-NEXT:    ret void
 931 ;
 932   %r = mul nsw i16 %a, %b
 933   store volatile i16 %r, ptr addrspace(1) undef
 934   ret void
 935 }
 936
 937 define amdgpu_kernel void @mul_nuw_i16(i16 %a, i16 %b) {
 938 ; SI-LABEL: @mul_nuw_i16(
 939 ; SI-NEXT:    [[R:%.*]] = mul nuw i16 [[A:%.*]], [[B:%.*]]
 940 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 941 ; SI-NEXT:    ret void
 942 ;
 943 ; VI-LABEL: @mul_nuw_i16(
 944 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 945 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 946 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
 947 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 948 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 949 ; VI-NEXT:    ret void
 950 ;
 951   %r = mul nuw i16 %a, %b
 952   store volatile i16 %r, ptr addrspace(1) undef
 953   ret void
 954 }
 955
 956 define amdgpu_kernel void @mul_nuw_nsw_i16(i16 %a, i16 %b) {
 957 ; SI-LABEL: @mul_nuw_nsw_i16(
 958 ; SI-NEXT:    [[R:%.*]] = mul nuw nsw i16 [[A:%.*]], [[B:%.*]]
 959 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 960 ; SI-NEXT:    ret void
 961 ;
 962 ; VI-LABEL: @mul_nuw_nsw_i16(
 963 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 964 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 965 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
 966 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 967 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 968 ; VI-NEXT:    ret void
 969 ;
 970   %r = mul nuw nsw i16 %a, %b
 971   store volatile i16 %r, ptr addrspace(1) undef
 972   ret void
 973 }
 974
 975 define amdgpu_kernel void @shl_i16(i16 %a, i16 %b) {
 976 ; SI-LABEL: @shl_i16(
 977 ; SI-NEXT:    [[R:%.*]] = shl i16 [[A:%.*]], [[B:%.*]]
 978 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 979 ; SI-NEXT:    ret void
 980 ;
 981 ; VI-LABEL: @shl_i16(
 982 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 983 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 984 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
 985 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
 986 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
 987 ; VI-NEXT:    ret void
 988 ;
 989   %r = shl i16 %a, %b
 990   store volatile i16 %r, ptr addrspace(1) undef
 991   ret void
 992 }
 993
 994 define amdgpu_kernel void @shl_nsw_i16(i16 %a, i16 %b) {
 995 ; SI-LABEL: @shl_nsw_i16(
 996 ; SI-NEXT:    [[R:%.*]] = shl nsw i16 [[A:%.*]], [[B:%.*]]
 997 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
 998 ; SI-NEXT:    ret void
 999 ;
1000 ; VI-LABEL: @shl_nsw_i16(
1001 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1002 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1003 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
1004 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1005 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1006 ; VI-NEXT:    ret void
1007 ;
1008   %r = shl nsw i16 %a, %b
1009   store volatile i16 %r, ptr addrspace(1) undef
1010   ret void
1011 }
1012
1013 define amdgpu_kernel void @shl_nuw_i16(i16 %a, i16 %b) {
1014 ; SI-LABEL: @shl_nuw_i16(
1015 ; SI-NEXT:    [[R:%.*]] = shl nuw i16 [[A:%.*]], [[B:%.*]]
1016 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1017 ; SI-NEXT:    ret void
1018 ;
1019 ; VI-LABEL: @shl_nuw_i16(
1020 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1021 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1022 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
1023 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1024 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1025 ; VI-NEXT:    ret void
1026 ;
1027   %r = shl nuw i16 %a, %b
1028   store volatile i16 %r, ptr addrspace(1) undef
1029   ret void
1030 }
1031
1032 define amdgpu_kernel void @shl_nuw_nsw_i16(i16 %a, i16 %b) {
1033 ; SI-LABEL: @shl_nuw_nsw_i16(
1034 ; SI-NEXT:    [[R:%.*]] = shl nuw nsw i16 [[A:%.*]], [[B:%.*]]
1035 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1036 ; SI-NEXT:    ret void
1037 ;
1038 ; VI-LABEL: @shl_nuw_nsw_i16(
1039 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1040 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1041 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
1042 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1043 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1044 ; VI-NEXT:    ret void
1045 ;
1046   %r = shl nuw nsw i16 %a, %b
1047   store volatile i16 %r, ptr addrspace(1) undef
1048   ret void
1049 }
1050
1051 define amdgpu_kernel void @lshr_i16(i16 %a, i16 %b) {
1052 ; SI-LABEL: @lshr_i16(
1053 ; SI-NEXT:    [[R:%.*]] = lshr i16 [[A:%.*]], [[B:%.*]]
1054 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1055 ; SI-NEXT:    ret void
1056 ;
1057 ; VI-LABEL: @lshr_i16(
1058 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1059 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1060 ; VI-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP1]], [[TMP2]]
1061 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1062 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1063 ; VI-NEXT:    ret void
1064 ;
1065   %r = lshr i16 %a, %b
1066   store volatile i16 %r, ptr addrspace(1) undef
1067   ret void
1068 }
1069
1070 define amdgpu_kernel void @lshr_exact_i16(i16 %a, i16 %b) {
1071 ; SI-LABEL: @lshr_exact_i16(
1072 ; SI-NEXT:    [[R:%.*]] = lshr exact i16 [[A:%.*]], [[B:%.*]]
1073 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1074 ; SI-NEXT:    ret void
1075 ;
1076 ; VI-LABEL: @lshr_exact_i16(
1077 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1078 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1079 ; VI-NEXT:    [[TMP3:%.*]] = lshr exact i32 [[TMP1]], [[TMP2]]
1080 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1081 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1082 ; VI-NEXT:    ret void
1083 ;
1084   %r = lshr exact i16 %a, %b
1085   store volatile i16 %r, ptr addrspace(1) undef
1086   ret void
1087 }
1088
1089 define amdgpu_kernel void @ashr_i16(i16 %a, i16 %b) {
1090 ; SI-LABEL: @ashr_i16(
1091 ; SI-NEXT:    [[R:%.*]] = ashr i16 [[A:%.*]], [[B:%.*]]
1092 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1093 ; SI-NEXT:    ret void
1094 ;
1095 ; VI-LABEL: @ashr_i16(
1096 ; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1097 ; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1098 ; VI-NEXT:    [[TMP3:%.*]] = ashr i32 [[TMP1]], [[TMP2]]
1099 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1100 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1101 ; VI-NEXT:    ret void
1102 ;
1103   %r = ashr i16 %a, %b
1104   store volatile i16 %r, ptr addrspace(1) undef
1105   ret void
1106 }
1107
1108 define amdgpu_kernel void @ashr_exact_i16(i16 %a, i16 %b) {
1109 ; SI-LABEL: @ashr_exact_i16(
1110 ; SI-NEXT:    [[R:%.*]] = ashr exact i16 [[A:%.*]], [[B:%.*]]
1111 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1112 ; SI-NEXT:    ret void
1113 ;
1114 ; VI-LABEL: @ashr_exact_i16(
1115 ; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1116 ; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1117 ; VI-NEXT:    [[TMP3:%.*]] = ashr exact i32 [[TMP1]], [[TMP2]]
1118 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1119 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1120 ; VI-NEXT:    ret void
1121 ;
1122   %r = ashr exact i16 %a, %b
1123   store volatile i16 %r, ptr addrspace(1) undef
1124   ret void
1125 }
1126
1127 define amdgpu_kernel void @constant_lshr_exact_i16(i16 %a, i16 %b) {
1128 ; SI-LABEL: @constant_lshr_exact_i16(
1129 ; SI-NEXT:    [[R:%.*]] = lshr exact i16 4, 1
1130 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1131 ; SI-NEXT:    ret void
1132 ;
1133 ; VI-LABEL: @constant_lshr_exact_i16(
1134 ; VI-NEXT:    store volatile i16 2, ptr addrspace(1) undef, align 2
1135 ; VI-NEXT:    ret void
1136 ;
1137   %r = lshr exact i16 4, 1
1138   store volatile i16 %r, ptr addrspace(1) undef
1139   ret void
1140 }
1141
1142 define amdgpu_kernel void @and_i16(i16 %a, i16 %b) {
1143 ; SI-LABEL: @and_i16(
1144 ; SI-NEXT:    [[R:%.*]] = and i16 [[A:%.*]], [[B:%.*]]
1145 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1146 ; SI-NEXT:    ret void
1147 ;
1148 ; VI-LABEL: @and_i16(
1149 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1150 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1151 ; VI-NEXT:    [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]]
1152 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1153 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1154 ; VI-NEXT:    ret void
1155 ;
1156   %r = and i16 %a, %b
1157   store volatile i16 %r, ptr addrspace(1) undef
1158   ret void
1159 }
1160
1161 define amdgpu_kernel void @or_i16(i16 %a, i16 %b) {
1162 ; SI-LABEL: @or_i16(
1163 ; SI-NEXT:    [[R:%.*]] = or i16 [[A:%.*]], [[B:%.*]]
1164 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1165 ; SI-NEXT:    ret void
1166 ;
1167 ; VI-LABEL: @or_i16(
1168 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1169 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1170 ; VI-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
1171 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1172 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1173 ; VI-NEXT:    ret void
1174 ;
1175   %r = or i16 %a, %b
1176   store volatile i16 %r, ptr addrspace(1) undef
1177   ret void
1178 }
1179
1180 define amdgpu_kernel void @xor_i16(i16 %a, i16 %b) {
1181 ; SI-LABEL: @xor_i16(
1182 ; SI-NEXT:    [[R:%.*]] = xor i16 [[A:%.*]], [[B:%.*]]
1183 ; SI-NEXT:    store volatile i16 [[R]], ptr addrspace(1) undef, align 2
1184 ; SI-NEXT:    ret void
1185 ;
1186 ; VI-LABEL: @xor_i16(
1187 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1188 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1189 ; VI-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
1190 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1191 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1192 ; VI-NEXT:    ret void
1193 ;
1194   %r = xor i16 %a, %b
1195   store volatile i16 %r, ptr addrspace(1) undef
1196   ret void
1197 }
1198
1199 define amdgpu_kernel void @select_eq_i16(i16 %a, i16 %b) {
1200 ; SI-LABEL: @select_eq_i16(
1201 ; SI-NEXT:    [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]]
1202 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1203 ; SI-NEXT:    store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1204 ; SI-NEXT:    ret void
1205 ;
1206 ; VI-LABEL: @select_eq_i16(
1207 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1208 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1209 ; VI-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]]
1210 ; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1211 ; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1212 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1213 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1214 ; VI-NEXT:    store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1215 ; VI-NEXT:    ret void
1216 ;
1217   %cmp = icmp eq i16 %a, %b
1218   %sel = select i1 %cmp, i16 %a, i16 %b
1219   store volatile i16 %sel, ptr addrspace(1) undef
1220   ret void
1221 }
1222
1223 define amdgpu_kernel void @select_ne_i16(i16 %a, i16 %b) {
1224 ; SI-LABEL: @select_ne_i16(
1225 ; SI-NEXT:    [[CMP:%.*]] = icmp ne i16 [[A:%.*]], [[B:%.*]]
1226 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1227 ; SI-NEXT:    store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1228 ; SI-NEXT:    ret void
1229 ;
1230 ; VI-LABEL: @select_ne_i16(
1231 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1232 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1233 ; VI-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
1234 ; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1235 ; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1236 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1237 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1238 ; VI-NEXT:    store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1239 ; VI-NEXT:    ret void
1240 ;
1241   %cmp = icmp ne i16 %a, %b
1242   %sel = select i1 %cmp, i16 %a, i16 %b
1243   store volatile i16 %sel, ptr addrspace(1) undef
1244   ret void
1245 }
1246
1247 define amdgpu_kernel void @select_ugt_i16(i16 %a, i16 %b) {
1248 ; SI-LABEL: @select_ugt_i16(
1249 ; SI-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[A:%.*]], [[B:%.*]]
1250 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1251 ; SI-NEXT:    store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1252 ; SI-NEXT:    ret void
1253 ;
1254 ; VI-LABEL: @select_ugt_i16(
1255 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1256 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1257 ; VI-NEXT:    [[TMP3:%.*]] = icmp ugt i32 [[TMP1]], [[TMP2]]
1258 ; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1259 ; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1260 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1261 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1262 ; VI-NEXT:    store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1263 ; VI-NEXT:    ret void
1264 ;
1265   %cmp = icmp ugt i16 %a, %b
1266   %sel = select i1 %cmp, i16 %a, i16 %b
1267   store volatile i16 %sel, ptr addrspace(1) undef
1268   ret void
1269 }
1270
1271 define amdgpu_kernel void @select_uge_i16(i16 %a, i16 %b) {
1272 ; SI-LABEL: @select_uge_i16(
1273 ; SI-NEXT:    [[CMP:%.*]] = icmp uge i16 [[A:%.*]], [[B:%.*]]
1274 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1275 ; SI-NEXT:    store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1276 ; SI-NEXT:    ret void
1277 ;
1278 ; VI-LABEL: @select_uge_i16(
1279 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1280 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1281 ; VI-NEXT:    [[TMP3:%.*]] = icmp uge i32 [[TMP1]], [[TMP2]]
1282 ; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1283 ; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1284 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1285 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1286 ; VI-NEXT:    store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1287 ; VI-NEXT:    ret void
1288 ;
1289   %cmp = icmp uge i16 %a, %b
1290   %sel = select i1 %cmp, i16 %a, i16 %b
1291   store volatile i16 %sel, ptr addrspace(1) undef
1292   ret void
1293 }
1294
1295 define amdgpu_kernel void @select_ult_i16(i16 %a, i16 %b) {
1296 ; SI-LABEL: @select_ult_i16(
1297 ; SI-NEXT:    [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]]
1298 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1299 ; SI-NEXT:    store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1300 ; SI-NEXT:    ret void
1301 ;
1302 ; VI-LABEL: @select_ult_i16(
1303 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1304 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1305 ; VI-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP1]], [[TMP2]]
1306 ; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1307 ; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1308 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1309 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1310 ; VI-NEXT:    store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1311 ; VI-NEXT:    ret void
1312 ;
1313   %cmp = icmp ult i16 %a, %b
1314   %sel = select i1 %cmp, i16 %a, i16 %b
1315   store volatile i16 %sel, ptr addrspace(1) undef
1316   ret void
1317 }
1318
1319 define amdgpu_kernel void @select_ule_i16(i16 %a, i16 %b) {
1320 ; SI-LABEL: @select_ule_i16(
1321 ; SI-NEXT:    [[CMP:%.*]] = icmp ule i16 [[A:%.*]], [[B:%.*]]
1322 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1323 ; SI-NEXT:    store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1324 ; SI-NEXT:    ret void
1325 ;
1326 ; VI-LABEL: @select_ule_i16(
1327 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1328 ; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1329 ; VI-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]]
1330 ; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1331 ; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1332 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1333 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1334 ; VI-NEXT:    store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1335 ; VI-NEXT:    ret void
1336 ;
1337   %cmp = icmp ule i16 %a, %b
1338   %sel = select i1 %cmp, i16 %a, i16 %b
1339   store volatile i16 %sel, ptr addrspace(1) undef
1340   ret void
1341 }
1342
1343 define amdgpu_kernel void @select_sgt_i16(i16 %a, i16 %b) {
1344 ; SI-LABEL: @select_sgt_i16(
1345 ; SI-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[A:%.*]], [[B:%.*]]
1346 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1347 ; SI-NEXT:    store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1348 ; SI-NEXT:    ret void
1349 ;
1350 ; VI-LABEL: @select_sgt_i16(
1351 ; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1352 ; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1353 ; VI-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
1354 ; VI-NEXT:    [[TMP4:%.*]] = sext i16 [[A]] to i32
1355 ; VI-NEXT:    [[TMP5:%.*]] = sext i16 [[B]] to i32
1356 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1357 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1358 ; VI-NEXT:    store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1359 ; VI-NEXT:    ret void
1360 ;
1361   %cmp = icmp sgt i16 %a, %b
1362   %sel = select i1 %cmp, i16 %a, i16 %b
1363   store volatile i16 %sel, ptr addrspace(1) undef
1364   ret void
1365 }
1366
1367 define amdgpu_kernel void @select_sge_i16(i16 %a, i16 %b) {
1368 ; SI-LABEL: @select_sge_i16(
1369 ; SI-NEXT:    [[CMP:%.*]] = icmp sge i16 [[A:%.*]], [[B:%.*]]
1370 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1371 ; SI-NEXT:    store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1372 ; SI-NEXT:    ret void
1373 ;
1374 ; VI-LABEL: @select_sge_i16(
1375 ; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1376 ; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1377 ; VI-NEXT:    [[TMP3:%.*]] = icmp sge i32 [[TMP1]], [[TMP2]]
1378 ; VI-NEXT:    [[TMP4:%.*]] = sext i16 [[A]] to i32
1379 ; VI-NEXT:    [[TMP5:%.*]] = sext i16 [[B]] to i32
1380 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1381 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1382 ; VI-NEXT:    store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1383 ; VI-NEXT:    ret void
1384 ;
1385   %cmp = icmp sge i16 %a, %b
1386   %sel = select i1 %cmp, i16 %a, i16 %b
1387   store volatile i16 %sel, ptr addrspace(1) undef
1388   ret void
1389 }
1390
1391 define amdgpu_kernel void @select_slt_i16(i16 %a, i16 %b) {
1392 ; SI-LABEL: @select_slt_i16(
1393 ; SI-NEXT:    [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
1394 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1395 ; SI-NEXT:    store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1396 ; SI-NEXT:    ret void
1397 ;
1398 ; VI-LABEL: @select_slt_i16(
1399 ; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1400 ; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1401 ; VI-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]]
1402 ; VI-NEXT:    [[TMP4:%.*]] = sext i16 [[A]] to i32
1403 ; VI-NEXT:    [[TMP5:%.*]] = sext i16 [[B]] to i32
1404 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1405 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1406 ; VI-NEXT:    store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1407 ; VI-NEXT:    ret void
1408 ;
1409   %cmp = icmp slt i16 %a, %b
1410   %sel = select i1 %cmp, i16 %a, i16 %b
1411   store volatile i16 %sel, ptr addrspace(1) undef
1412   ret void
1413 }
1414
1415 define amdgpu_kernel void @select_sle_i16(i16 %a, i16 %b) {
1416 ; SI-LABEL: @select_sle_i16(
1417 ; SI-NEXT:    [[CMP:%.*]] = icmp sle i16 [[A:%.*]], [[B:%.*]]
1418 ; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1419 ; SI-NEXT:    store volatile i16 [[SEL]], ptr addrspace(1) undef, align 2
1420 ; SI-NEXT:    ret void
1421 ;
1422 ; VI-LABEL: @select_sle_i16(
1423 ; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1424 ; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1425 ; VI-NEXT:    [[TMP3:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
1426 ; VI-NEXT:    [[TMP4:%.*]] = sext i16 [[A]] to i32
1427 ; VI-NEXT:    [[TMP5:%.*]] = sext i16 [[B]] to i32
1428 ; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1429 ; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1430 ; VI-NEXT:    store volatile i16 [[TMP7]], ptr addrspace(1) undef, align 2
1431 ; VI-NEXT:    ret void
1432 ;
1433   %cmp = icmp sle i16 %a, %b
1434   %sel = select i1 %cmp, i16 %a, i16 %b
1435   store volatile i16 %sel, ptr addrspace(1) undef
1436   ret void
1437 }
1438
1439 declare i16 @llvm.bitreverse.i16(i16)
1440
1441 define amdgpu_kernel void @bitreverse_i16(i16 %a) {
1442 ; SI-LABEL: @bitreverse_i16(
1443 ; SI-NEXT:    [[BREV:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[A:%.*]])
1444 ; SI-NEXT:    store volatile i16 [[BREV]], ptr addrspace(1) undef, align 2
1445 ; SI-NEXT:    ret void
1446 ;
1447 ; VI-LABEL: @bitreverse_i16(
1448 ; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1449 ; VI-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]])
1450 ; VI-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 16
1451 ; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1452 ; VI-NEXT:    store volatile i16 [[TMP4]], ptr addrspace(1) undef, align 2
1453 ; VI-NEXT:    ret void
1454 ;
1455   %brev = call i16 @llvm.bitreverse.i16(i16 %a)
1456   store volatile i16 %brev, ptr addrspace(1) undef
1457   ret void
1458 }
1459
1460 define amdgpu_kernel void @add_3xi15(<3 x i15> %a, <3 x i15> %b) {
1461 ; SI-LABEL: @add_3xi15(
1462 ; SI-NEXT:    [[R:%.*]] = add <3 x i15> [[A:%.*]], [[B:%.*]]
1463 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1464 ; SI-NEXT:    ret void
1465 ;
1466 ; VI-LABEL: @add_3xi15(
1467 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1468 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1469 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1470 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1471 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1472 ; VI-NEXT:    ret void
1473 ;
1474   %r = add <3 x i15> %a, %b
1475   store volatile <3 x i15> %r, ptr addrspace(1) undef
1476   ret void
1477 }
1478
1479 define amdgpu_kernel void @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1480 ; SI-LABEL: @add_nsw_3xi15(
1481 ; SI-NEXT:    [[R:%.*]] = add nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1482 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1483 ; SI-NEXT:    ret void
1484 ;
1485 ; VI-LABEL: @add_nsw_3xi15(
1486 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1487 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1488 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1489 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1490 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1491 ; VI-NEXT:    ret void
1492 ;
1493   %r = add nsw <3 x i15> %a, %b
1494   store volatile <3 x i15> %r, ptr addrspace(1) undef
1495   ret void
1496 }
1497
1498 define amdgpu_kernel void @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1499 ; SI-LABEL: @add_nuw_3xi15(
1500 ; SI-NEXT:    [[R:%.*]] = add nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1501 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1502 ; SI-NEXT:    ret void
1503 ;
1504 ; VI-LABEL: @add_nuw_3xi15(
1505 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1506 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1507 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1508 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1509 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1510 ; VI-NEXT:    ret void
1511 ;
1512   %r = add nuw <3 x i15> %a, %b
1513   store volatile <3 x i15> %r, ptr addrspace(1) undef
1514   ret void
1515 }
1516
1517 define amdgpu_kernel void @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1518 ; SI-LABEL: @add_nuw_nsw_3xi15(
1519 ; SI-NEXT:    [[R:%.*]] = add nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1520 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1521 ; SI-NEXT:    ret void
1522 ;
1523 ; VI-LABEL: @add_nuw_nsw_3xi15(
1524 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1525 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1526 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1527 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1528 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1529 ; VI-NEXT:    ret void
1530 ;
1531   %r = add nuw nsw <3 x i15> %a, %b
1532   store volatile <3 x i15> %r, ptr addrspace(1) undef
1533   ret void
1534 }
1535
1536 define amdgpu_kernel void @sub_3xi15(<3 x i15> %a, <3 x i15> %b) {
1537 ; SI-LABEL: @sub_3xi15(
1538 ; SI-NEXT:    [[R:%.*]] = sub <3 x i15> [[A:%.*]], [[B:%.*]]
1539 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1540 ; SI-NEXT:    ret void
1541 ;
1542 ; VI-LABEL: @sub_3xi15(
1543 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1544 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1545 ; VI-NEXT:    [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
1546 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1547 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1548 ; VI-NEXT:    ret void
1549 ;
1550   %r = sub <3 x i15> %a, %b
1551   store volatile <3 x i15> %r, ptr addrspace(1) undef
1552   ret void
1553 }
1554
1555 define amdgpu_kernel void @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1556 ; SI-LABEL: @sub_nsw_3xi15(
1557 ; SI-NEXT:    [[R:%.*]] = sub nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1558 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1559 ; SI-NEXT:    ret void
1560 ;
1561 ; VI-LABEL: @sub_nsw_3xi15(
1562 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1563 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1564 ; VI-NEXT:    [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
1565 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1566 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1567 ; VI-NEXT:    ret void
1568 ;
1569   %r = sub nsw <3 x i15> %a, %b
1570   store volatile <3 x i15> %r, ptr addrspace(1) undef
1571   ret void
1572 }
1573
1574 define amdgpu_kernel void @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1575 ; SI-LABEL: @sub_nuw_3xi15(
1576 ; SI-NEXT:    [[R:%.*]] = sub nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1577 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1578 ; SI-NEXT:    ret void
1579 ;
1580 ; VI-LABEL: @sub_nuw_3xi15(
1581 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1582 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1583 ; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1584 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1585 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1586 ; VI-NEXT:    ret void
1587 ;
1588   %r = sub nuw <3 x i15> %a, %b
1589   store volatile <3 x i15> %r, ptr addrspace(1) undef
1590   ret void
1591 }
1592
1593 define amdgpu_kernel void @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1594 ; SI-LABEL: @sub_nuw_nsw_3xi15(
1595 ; SI-NEXT:    [[R:%.*]] = sub nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1596 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1597 ; SI-NEXT:    ret void
1598 ;
1599 ; VI-LABEL: @sub_nuw_nsw_3xi15(
1600 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1601 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1602 ; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1603 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1604 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1605 ; VI-NEXT:    ret void
1606 ;
1607   %r = sub nuw nsw <3 x i15> %a, %b
1608   store volatile <3 x i15> %r, ptr addrspace(1) undef
1609   ret void
1610 }
1611
1612 define amdgpu_kernel void @mul_3xi15(<3 x i15> %a, <3 x i15> %b) {
1613 ; SI-LABEL: @mul_3xi15(
1614 ; SI-NEXT:    [[R:%.*]] = mul <3 x i15> [[A:%.*]], [[B:%.*]]
1615 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1616 ; SI-NEXT:    ret void
1617 ;
1618 ; VI-LABEL: @mul_3xi15(
1619 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1620 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1621 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
1622 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1623 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1624 ; VI-NEXT:    ret void
1625 ;
1626   %r = mul <3 x i15> %a, %b
1627   store volatile <3 x i15> %r, ptr addrspace(1) undef
1628   ret void
1629 }
1630
1631 define amdgpu_kernel void @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1632 ; SI-LABEL: @mul_nsw_3xi15(
1633 ; SI-NEXT:    [[R:%.*]] = mul nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1634 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1635 ; SI-NEXT:    ret void
1636 ;
1637 ; VI-LABEL: @mul_nsw_3xi15(
1638 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1639 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1640 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
1641 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1642 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1643 ; VI-NEXT:    ret void
1644 ;
1645   %r = mul nsw <3 x i15> %a, %b
1646   store volatile <3 x i15> %r, ptr addrspace(1) undef
1647   ret void
1648 }
1649
1650 define amdgpu_kernel void @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1651 ; SI-LABEL: @mul_nuw_3xi15(
1652 ; SI-NEXT:    [[R:%.*]] = mul nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1653 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1654 ; SI-NEXT:    ret void
1655 ;
1656 ; VI-LABEL: @mul_nuw_3xi15(
1657 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1658 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1659 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1660 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1661 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1662 ; VI-NEXT:    ret void
1663 ;
1664   %r = mul nuw <3 x i15> %a, %b
1665   store volatile <3 x i15> %r, ptr addrspace(1) undef
1666   ret void
1667 }
1668
1669 define amdgpu_kernel void @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1670 ; SI-LABEL: @mul_nuw_nsw_3xi15(
1671 ; SI-NEXT:    [[R:%.*]] = mul nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1672 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1673 ; SI-NEXT:    ret void
1674 ;
1675 ; VI-LABEL: @mul_nuw_nsw_3xi15(
1676 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1677 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1678 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1679 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1680 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1681 ; VI-NEXT:    ret void
1682 ;
1683   %r = mul nuw nsw <3 x i15> %a, %b
1684   store volatile <3 x i15> %r, ptr addrspace(1) undef
1685   ret void
1686 }
1687
1688 define amdgpu_kernel void @shl_3xi15(<3 x i15> %a, <3 x i15> %b) {
1689 ; SI-LABEL: @shl_3xi15(
1690 ; SI-NEXT:    [[R:%.*]] = shl <3 x i15> [[A:%.*]], [[B:%.*]]
1691 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1692 ; SI-NEXT:    ret void
1693 ;
1694 ; VI-LABEL: @shl_3xi15(
1695 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1696 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1697 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1698 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1699 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1700 ; VI-NEXT:    ret void
1701 ;
1702   %r = shl <3 x i15> %a, %b
1703   store volatile <3 x i15> %r, ptr addrspace(1) undef
1704   ret void
1705 }
1706
1707 define amdgpu_kernel void @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1708 ; SI-LABEL: @shl_nsw_3xi15(
1709 ; SI-NEXT:    [[R:%.*]] = shl nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1710 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1711 ; SI-NEXT:    ret void
1712 ;
1713 ; VI-LABEL: @shl_nsw_3xi15(
1714 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1715 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1716 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1717 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1718 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1719 ; VI-NEXT:    ret void
1720 ;
1721   %r = shl nsw <3 x i15> %a, %b
1722   store volatile <3 x i15> %r, ptr addrspace(1) undef
1723   ret void
1724 }
1725
1726 define amdgpu_kernel void @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1727 ; SI-LABEL: @shl_nuw_3xi15(
1728 ; SI-NEXT:    [[R:%.*]] = shl nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1729 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1730 ; SI-NEXT:    ret void
1731 ;
1732 ; VI-LABEL: @shl_nuw_3xi15(
1733 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1734 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1735 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1736 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1737 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1738 ; VI-NEXT:    ret void
1739 ;
1740   %r = shl nuw <3 x i15> %a, %b
1741   store volatile <3 x i15> %r, ptr addrspace(1) undef
1742   ret void
1743 }
1744
1745 define amdgpu_kernel void @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1746 ; SI-LABEL: @shl_nuw_nsw_3xi15(
1747 ; SI-NEXT:    [[R:%.*]] = shl nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1748 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1749 ; SI-NEXT:    ret void
1750 ;
1751 ; VI-LABEL: @shl_nuw_nsw_3xi15(
1752 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1753 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1754 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1755 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1756 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1757 ; VI-NEXT:    ret void
1758 ;
1759   %r = shl nuw nsw <3 x i15> %a, %b
1760   store volatile <3 x i15> %r, ptr addrspace(1) undef
1761   ret void
1762 }
1763
1764 define amdgpu_kernel void @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) {
1765 ; SI-LABEL: @lshr_3xi15(
1766 ; SI-NEXT:    [[R:%.*]] = lshr <3 x i15> [[A:%.*]], [[B:%.*]]
1767 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1768 ; SI-NEXT:    ret void
1769 ;
1770 ; VI-LABEL: @lshr_3xi15(
1771 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1772 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1773 ; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP1]], [[TMP2]]
1774 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1775 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1776 ; VI-NEXT:    ret void
1777 ;
1778   %r = lshr <3 x i15> %a, %b
1779   store volatile <3 x i15> %r, ptr addrspace(1) undef
1780   ret void
1781 }
1782
1783 define amdgpu_kernel void @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
1784 ; SI-LABEL: @lshr_exact_3xi15(
1785 ; SI-NEXT:    [[R:%.*]] = lshr exact <3 x i15> [[A:%.*]], [[B:%.*]]
1786 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1787 ; SI-NEXT:    ret void
1788 ;
1789 ; VI-LABEL: @lshr_exact_3xi15(
1790 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1791 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1792 ; VI-NEXT:    [[TMP3:%.*]] = lshr exact <3 x i32> [[TMP1]], [[TMP2]]
1793 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1794 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1795 ; VI-NEXT:    ret void
1796 ;
1797   %r = lshr exact <3 x i15> %a, %b
1798   store volatile <3 x i15> %r, ptr addrspace(1) undef
1799   ret void
1800 }
1801
1802 define amdgpu_kernel void @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) {
1803 ; SI-LABEL: @ashr_3xi15(
1804 ; SI-NEXT:    [[R:%.*]] = ashr <3 x i15> [[A:%.*]], [[B:%.*]]
1805 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1806 ; SI-NEXT:    ret void
1807 ;
1808 ; VI-LABEL: @ashr_3xi15(
1809 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
1810 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
1811 ; VI-NEXT:    [[TMP3:%.*]] = ashr <3 x i32> [[TMP1]], [[TMP2]]
1812 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1813 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1814 ; VI-NEXT:    ret void
1815 ;
1816   %r = ashr <3 x i15> %a, %b
1817   store volatile <3 x i15> %r, ptr addrspace(1) undef
1818   ret void
1819 }
1820
1821 define amdgpu_kernel void @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
1822 ; SI-LABEL: @ashr_exact_3xi15(
1823 ; SI-NEXT:    [[R:%.*]] = ashr exact <3 x i15> [[A:%.*]], [[B:%.*]]
1824 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1825 ; SI-NEXT:    ret void
1826 ;
1827 ; VI-LABEL: @ashr_exact_3xi15(
1828 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
1829 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
1830 ; VI-NEXT:    [[TMP3:%.*]] = ashr exact <3 x i32> [[TMP1]], [[TMP2]]
1831 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1832 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1833 ; VI-NEXT:    ret void
1834 ;
1835   %r = ashr exact <3 x i15> %a, %b
1836   store volatile <3 x i15> %r, ptr addrspace(1) undef
1837   ret void
1838 }
1839
1840 define amdgpu_kernel void @and_3xi15(<3 x i15> %a, <3 x i15> %b) {
1841 ; SI-LABEL: @and_3xi15(
1842 ; SI-NEXT:    [[R:%.*]] = and <3 x i15> [[A:%.*]], [[B:%.*]]
1843 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1844 ; SI-NEXT:    ret void
1845 ;
1846 ; VI-LABEL: @and_3xi15(
1847 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1848 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1849 ; VI-NEXT:    [[TMP3:%.*]] = and <3 x i32> [[TMP1]], [[TMP2]]
1850 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1851 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1852 ; VI-NEXT:    ret void
1853 ;
1854   %r = and <3 x i15> %a, %b
1855   store volatile <3 x i15> %r, ptr addrspace(1) undef
1856   ret void
1857 }
1858
1859 define amdgpu_kernel void @or_3xi15(<3 x i15> %a, <3 x i15> %b) {
1860 ; SI-LABEL: @or_3xi15(
1861 ; SI-NEXT:    [[R:%.*]] = or <3 x i15> [[A:%.*]], [[B:%.*]]
1862 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1863 ; SI-NEXT:    ret void
1864 ;
1865 ; VI-LABEL: @or_3xi15(
1866 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1867 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1868 ; VI-NEXT:    [[TMP3:%.*]] = or <3 x i32> [[TMP1]], [[TMP2]]
1869 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1870 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1871 ; VI-NEXT:    ret void
1872 ;
1873   %r = or <3 x i15> %a, %b
1874   store volatile <3 x i15> %r, ptr addrspace(1) undef
1875   ret void
1876 }
1877
1878 define amdgpu_kernel void @xor_3xi15(<3 x i15> %a, <3 x i15> %b) {
1879 ; SI-LABEL: @xor_3xi15(
1880 ; SI-NEXT:    [[R:%.*]] = xor <3 x i15> [[A:%.*]], [[B:%.*]]
1881 ; SI-NEXT:    store volatile <3 x i15> [[R]], ptr addrspace(1) undef, align 8
1882 ; SI-NEXT:    ret void
1883 ;
1884 ; VI-LABEL: @xor_3xi15(
1885 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1886 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1887 ; VI-NEXT:    [[TMP3:%.*]] = xor <3 x i32> [[TMP1]], [[TMP2]]
1888 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1889 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
1890 ; VI-NEXT:    ret void
1891 ;
1892   %r = xor <3 x i15> %a, %b
1893   store volatile <3 x i15> %r, ptr addrspace(1) undef
1894   ret void
1895 }
1896
1897 define amdgpu_kernel void @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) {
1898 ; SI-LABEL: @select_eq_3xi15(
1899 ; SI-NEXT:    [[CMP:%.*]] = icmp eq <3 x i15> [[A:%.*]], [[B:%.*]]
1900 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1901 ; SI-NEXT:    store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
1902 ; SI-NEXT:    ret void
1903 ;
1904 ; VI-LABEL: @select_eq_3xi15(
1905 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1906 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1907 ; VI-NEXT:    [[TMP3:%.*]] = icmp eq <3 x i32> [[TMP1]], [[TMP2]]
1908 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1909 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1910 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1911 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1912 ; VI-NEXT:    store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
1913 ; VI-NEXT:    ret void
1914 ;
1915   %cmp = icmp eq <3 x i15> %a, %b
1916   %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1917   store volatile <3 x i15> %sel, ptr addrspace(1) undef
1918   ret void
1919 }
1920
1921 define amdgpu_kernel void @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) {
1922 ; SI-LABEL: @select_ne_3xi15(
1923 ; SI-NEXT:    [[CMP:%.*]] = icmp ne <3 x i15> [[A:%.*]], [[B:%.*]]
1924 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1925 ; SI-NEXT:    store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
1926 ; SI-NEXT:    ret void
1927 ;
1928 ; VI-LABEL: @select_ne_3xi15(
1929 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1930 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1931 ; VI-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP1]], [[TMP2]]
1932 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1933 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1934 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1935 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1936 ; VI-NEXT:    store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
1937 ; VI-NEXT:    ret void
1938 ;
1939   %cmp = icmp ne <3 x i15> %a, %b
1940   %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1941   store volatile <3 x i15> %sel, ptr addrspace(1) undef
1942   ret void
1943 }
1944
1945 define amdgpu_kernel void @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) {
1946 ; SI-LABEL: @select_ugt_3xi15(
1947 ; SI-NEXT:    [[CMP:%.*]] = icmp ugt <3 x i15> [[A:%.*]], [[B:%.*]]
1948 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1949 ; SI-NEXT:    store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
1950 ; SI-NEXT:    ret void
1951 ;
1952 ; VI-LABEL: @select_ugt_3xi15(
1953 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1954 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1955 ; VI-NEXT:    [[TMP3:%.*]] = icmp ugt <3 x i32> [[TMP1]], [[TMP2]]
1956 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1957 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1958 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1959 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1960 ; VI-NEXT:    store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
1961 ; VI-NEXT:    ret void
1962 ;
1963   %cmp = icmp ugt <3 x i15> %a, %b
1964   %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1965   store volatile <3 x i15> %sel, ptr addrspace(1) undef
1966   ret void
1967 }
1968
1969 define amdgpu_kernel void @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) {
1970 ; SI-LABEL: @select_uge_3xi15(
1971 ; SI-NEXT:    [[CMP:%.*]] = icmp uge <3 x i15> [[A:%.*]], [[B:%.*]]
1972 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1973 ; SI-NEXT:    store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
1974 ; SI-NEXT:    ret void
1975 ;
1976 ; VI-LABEL: @select_uge_3xi15(
1977 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1978 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1979 ; VI-NEXT:    [[TMP3:%.*]] = icmp uge <3 x i32> [[TMP1]], [[TMP2]]
1980 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1981 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1982 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1983 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1984 ; VI-NEXT:    store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
1985 ; VI-NEXT:    ret void
1986 ;
1987   %cmp = icmp uge <3 x i15> %a, %b
1988   %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1989   store volatile <3 x i15> %sel, ptr addrspace(1) undef
1990   ret void
1991 }
1992
1993 define amdgpu_kernel void @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) {
1994 ; SI-LABEL: @select_ult_3xi15(
1995 ; SI-NEXT:    [[CMP:%.*]] = icmp ult <3 x i15> [[A:%.*]], [[B:%.*]]
1996 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1997 ; SI-NEXT:    store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
1998 ; SI-NEXT:    ret void
1999 ;
2000 ; VI-LABEL: @select_ult_3xi15(
2001 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
2002 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
2003 ; VI-NEXT:    [[TMP3:%.*]] = icmp ult <3 x i32> [[TMP1]], [[TMP2]]
2004 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
2005 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
2006 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2007 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2008 ; VI-NEXT:    store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2009 ; VI-NEXT:    ret void
2010 ;
2011   %cmp = icmp ult <3 x i15> %a, %b
2012   %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2013   store volatile <3 x i15> %sel, ptr addrspace(1) undef
2014   ret void
2015 }
2016
2017 define amdgpu_kernel void @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) {
2018 ; SI-LABEL: @select_ule_3xi15(
2019 ; SI-NEXT:    [[CMP:%.*]] = icmp ule <3 x i15> [[A:%.*]], [[B:%.*]]
2020 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2021 ; SI-NEXT:    store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
2022 ; SI-NEXT:    ret void
2023 ;
2024 ; VI-LABEL: @select_ule_3xi15(
2025 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
2026 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
2027 ; VI-NEXT:    [[TMP3:%.*]] = icmp ule <3 x i32> [[TMP1]], [[TMP2]]
2028 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
2029 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
2030 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2031 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2032 ; VI-NEXT:    store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2033 ; VI-NEXT:    ret void
2034 ;
2035   %cmp = icmp ule <3 x i15> %a, %b
2036   %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2037   store volatile <3 x i15> %sel, ptr addrspace(1) undef
2038   ret void
2039 }
2040
2041 define amdgpu_kernel void @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) {
2042 ; SI-LABEL: @select_sgt_3xi15(
2043 ; SI-NEXT:    [[CMP:%.*]] = icmp sgt <3 x i15> [[A:%.*]], [[B:%.*]]
2044 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2045 ; SI-NEXT:    store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
2046 ; SI-NEXT:    ret void
2047 ;
2048 ; VI-LABEL: @select_sgt_3xi15(
2049 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2050 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2051 ; VI-NEXT:    [[TMP3:%.*]] = icmp sgt <3 x i32> [[TMP1]], [[TMP2]]
2052 ; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2053 ; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2054 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2055 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2056 ; VI-NEXT:    store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2057 ; VI-NEXT:    ret void
2058 ;
2059   %cmp = icmp sgt <3 x i15> %a, %b
2060   %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2061   store volatile <3 x i15> %sel, ptr addrspace(1) undef
2062   ret void
2063 }
2064
2065 define amdgpu_kernel void @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) {
2066 ; SI-LABEL: @select_sge_3xi15(
2067 ; SI-NEXT:    [[CMP:%.*]] = icmp sge <3 x i15> [[A:%.*]], [[B:%.*]]
2068 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2069 ; SI-NEXT:    store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
2070 ; SI-NEXT:    ret void
2071 ;
2072 ; VI-LABEL: @select_sge_3xi15(
2073 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2074 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2075 ; VI-NEXT:    [[TMP3:%.*]] = icmp sge <3 x i32> [[TMP1]], [[TMP2]]
2076 ; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2077 ; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2078 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2079 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2080 ; VI-NEXT:    store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2081 ; VI-NEXT:    ret void
2082 ;
2083   %cmp = icmp sge <3 x i15> %a, %b
2084   %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2085   store volatile <3 x i15> %sel, ptr addrspace(1) undef
2086   ret void
2087 }
2088
2089 define amdgpu_kernel void @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) {
2090 ; SI-LABEL: @select_slt_3xi15(
2091 ; SI-NEXT:    [[CMP:%.*]] = icmp slt <3 x i15> [[A:%.*]], [[B:%.*]]
2092 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2093 ; SI-NEXT:    store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
2094 ; SI-NEXT:    ret void
2095 ;
2096 ; VI-LABEL: @select_slt_3xi15(
2097 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2098 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2099 ; VI-NEXT:    [[TMP3:%.*]] = icmp slt <3 x i32> [[TMP1]], [[TMP2]]
2100 ; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2101 ; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2102 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2103 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2104 ; VI-NEXT:    store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2105 ; VI-NEXT:    ret void
2106 ;
2107   %cmp = icmp slt <3 x i15> %a, %b
2108   %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2109   store volatile <3 x i15> %sel, ptr addrspace(1) undef
2110   ret void
2111 }
2112
2113 define amdgpu_kernel void @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) {
2114 ; SI-LABEL: @select_sle_3xi15(
2115 ; SI-NEXT:    [[CMP:%.*]] = icmp sle <3 x i15> [[A:%.*]], [[B:%.*]]
2116 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2117 ; SI-NEXT:    store volatile <3 x i15> [[SEL]], ptr addrspace(1) undef, align 8
2118 ; SI-NEXT:    ret void
2119 ;
2120 ; VI-LABEL: @select_sle_3xi15(
2121 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2122 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2123 ; VI-NEXT:    [[TMP3:%.*]] = icmp sle <3 x i32> [[TMP1]], [[TMP2]]
2124 ; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2125 ; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2126 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2127 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2128 ; VI-NEXT:    store volatile <3 x i15> [[TMP7]], ptr addrspace(1) undef, align 8
2129 ; VI-NEXT:    ret void
2130 ;
2131   %cmp = icmp sle <3 x i15> %a, %b
2132   %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2133   store volatile <3 x i15> %sel, ptr addrspace(1) undef
2134   ret void
2135 }
2136
2137 declare <3 x i15> @llvm.bitreverse.v3i15(<3 x i15>)
2138 define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) {
2139 ; SI-LABEL: @bitreverse_3xi15(
2140 ; SI-NEXT:    [[BREV:%.*]] = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> [[A:%.*]])
2141 ; SI-NEXT:    store volatile <3 x i15> [[BREV]], ptr addrspace(1) undef, align 8
2142 ; SI-NEXT:    ret void
2143 ;
2144 ; VI-LABEL: @bitreverse_3xi15(
2145 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
2146 ; VI-NEXT:    [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]])
2147 ; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 17, i32 17, i32 17>
2148 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
2149 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
2150 ; VI-NEXT:    ret void
2151 ;
2152   %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a)
2153   store volatile <3 x i15> %brev, ptr addrspace(1) undef
2154   ret void
2155 }
2156
2157 define amdgpu_kernel void @add_3xi16(<3 x i16> %a, <3 x i16> %b) {
2158 ; SI-LABEL: @add_3xi16(
2159 ; SI-NEXT:    [[R:%.*]] = add <3 x i16> [[A:%.*]], [[B:%.*]]
2160 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2161 ; SI-NEXT:    ret void
2162 ;
2163 ; VI-LABEL: @add_3xi16(
2164 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2165 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2166 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2167 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2168 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2169 ; VI-NEXT:    ret void
2170 ;
2171   %r = add <3 x i16> %a, %b
2172   store volatile <3 x i16> %r, ptr addrspace(1) undef
2173   ret void
2174 }
2175
2176 define amdgpu_kernel void @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2177 ; SI-LABEL: @add_nsw_3xi16(
2178 ; SI-NEXT:    [[R:%.*]] = add nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2179 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2180 ; SI-NEXT:    ret void
2181 ;
2182 ; VI-LABEL: @add_nsw_3xi16(
2183 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2184 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2185 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2186 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2187 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2188 ; VI-NEXT:    ret void
2189 ;
2190   %r = add nsw <3 x i16> %a, %b
2191   store volatile <3 x i16> %r, ptr addrspace(1) undef
2192   ret void
2193 }
2194
2195 define amdgpu_kernel void @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2196 ; SI-LABEL: @add_nuw_3xi16(
2197 ; SI-NEXT:    [[R:%.*]] = add nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2198 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2199 ; SI-NEXT:    ret void
2200 ;
2201 ; VI-LABEL: @add_nuw_3xi16(
2202 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2203 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2204 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2205 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2206 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2207 ; VI-NEXT:    ret void
2208 ;
2209   %r = add nuw <3 x i16> %a, %b
2210   store volatile <3 x i16> %r, ptr addrspace(1) undef
2211   ret void
2212 }
2213
2214 define amdgpu_kernel void @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2215 ; SI-LABEL: @add_nuw_nsw_3xi16(
2216 ; SI-NEXT:    [[R:%.*]] = add nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2217 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2218 ; SI-NEXT:    ret void
2219 ;
2220 ; VI-LABEL: @add_nuw_nsw_3xi16(
2221 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2222 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2223 ; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2224 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2225 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2226 ; VI-NEXT:    ret void
2227 ;
2228   %r = add nuw nsw <3 x i16> %a, %b
2229   store volatile <3 x i16> %r, ptr addrspace(1) undef
2230   ret void
2231 }
2232
2233 define amdgpu_kernel void @sub_3xi16(<3 x i16> %a, <3 x i16> %b) {
2234 ; SI-LABEL: @sub_3xi16(
2235 ; SI-NEXT:    [[R:%.*]] = sub <3 x i16> [[A:%.*]], [[B:%.*]]
2236 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2237 ; SI-NEXT:    ret void
2238 ;
2239 ; VI-LABEL: @sub_3xi16(
2240 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2241 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2242 ; VI-NEXT:    [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
2243 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2244 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2245 ; VI-NEXT:    ret void
2246 ;
2247   %r = sub <3 x i16> %a, %b
2248   store volatile <3 x i16> %r, ptr addrspace(1) undef
2249   ret void
2250 }
2251
2252 define amdgpu_kernel void @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2253 ; SI-LABEL: @sub_nsw_3xi16(
2254 ; SI-NEXT:    [[R:%.*]] = sub nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2255 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2256 ; SI-NEXT:    ret void
2257 ;
2258 ; VI-LABEL: @sub_nsw_3xi16(
2259 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2260 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2261 ; VI-NEXT:    [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
2262 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2263 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2264 ; VI-NEXT:    ret void
2265 ;
2266   %r = sub nsw <3 x i16> %a, %b
2267   store volatile <3 x i16> %r, ptr addrspace(1) undef
2268   ret void
2269 }
2270
2271 define amdgpu_kernel void @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2272 ; SI-LABEL: @sub_nuw_3xi16(
2273 ; SI-NEXT:    [[R:%.*]] = sub nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2274 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2275 ; SI-NEXT:    ret void
2276 ;
2277 ; VI-LABEL: @sub_nuw_3xi16(
2278 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2279 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2280 ; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2281 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2282 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2283 ; VI-NEXT:    ret void
2284 ;
2285   %r = sub nuw <3 x i16> %a, %b
2286   store volatile <3 x i16> %r, ptr addrspace(1) undef
2287   ret void
2288 }
2289
2290 define amdgpu_kernel void @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2291 ; SI-LABEL: @sub_nuw_nsw_3xi16(
2292 ; SI-NEXT:    [[R:%.*]] = sub nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2293 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2294 ; SI-NEXT:    ret void
2295 ;
2296 ; VI-LABEL: @sub_nuw_nsw_3xi16(
2297 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2298 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2299 ; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2300 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2301 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2302 ; VI-NEXT:    ret void
2303 ;
2304   %r = sub nuw nsw <3 x i16> %a, %b
2305   store volatile <3 x i16> %r, ptr addrspace(1) undef
2306   ret void
2307 }
2308
2309 define amdgpu_kernel void @mul_3xi16(<3 x i16> %a, <3 x i16> %b) {
2310 ; SI-LABEL: @mul_3xi16(
2311 ; SI-NEXT:    [[R:%.*]] = mul <3 x i16> [[A:%.*]], [[B:%.*]]
2312 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2313 ; SI-NEXT:    ret void
2314 ;
2315 ; VI-LABEL: @mul_3xi16(
2316 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2317 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2318 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
2319 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2320 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2321 ; VI-NEXT:    ret void
2322 ;
2323   %r = mul <3 x i16> %a, %b
2324   store volatile <3 x i16> %r, ptr addrspace(1) undef
2325   ret void
2326 }
2327
2328 define amdgpu_kernel void @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2329 ; SI-LABEL: @mul_nsw_3xi16(
2330 ; SI-NEXT:    [[R:%.*]] = mul nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2331 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2332 ; SI-NEXT:    ret void
2333 ;
2334 ; VI-LABEL: @mul_nsw_3xi16(
2335 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2336 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2337 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
2338 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2339 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2340 ; VI-NEXT:    ret void
2341 ;
2342   %r = mul nsw <3 x i16> %a, %b
2343   store volatile <3 x i16> %r, ptr addrspace(1) undef
2344   ret void
2345 }
2346
2347 define amdgpu_kernel void @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2348 ; SI-LABEL: @mul_nuw_3xi16(
2349 ; SI-NEXT:    [[R:%.*]] = mul nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2350 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2351 ; SI-NEXT:    ret void
2352 ;
2353 ; VI-LABEL: @mul_nuw_3xi16(
2354 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2355 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2356 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2357 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2358 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2359 ; VI-NEXT:    ret void
2360 ;
2361   %r = mul nuw <3 x i16> %a, %b
2362   store volatile <3 x i16> %r, ptr addrspace(1) undef
2363   ret void
2364 }
2365
2366 define amdgpu_kernel void @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2367 ; SI-LABEL: @mul_nuw_nsw_3xi16(
2368 ; SI-NEXT:    [[R:%.*]] = mul nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2369 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2370 ; SI-NEXT:    ret void
2371 ;
2372 ; VI-LABEL: @mul_nuw_nsw_3xi16(
2373 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2374 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2375 ; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2376 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2377 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2378 ; VI-NEXT:    ret void
2379 ;
2380   %r = mul nuw nsw <3 x i16> %a, %b
2381   store volatile <3 x i16> %r, ptr addrspace(1) undef
2382   ret void
2383 }
2384
2385 define amdgpu_kernel void @shl_3xi16(<3 x i16> %a, <3 x i16> %b) {
2386 ; SI-LABEL: @shl_3xi16(
2387 ; SI-NEXT:    [[R:%.*]] = shl <3 x i16> [[A:%.*]], [[B:%.*]]
2388 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2389 ; SI-NEXT:    ret void
2390 ;
2391 ; VI-LABEL: @shl_3xi16(
2392 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2393 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2394 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2395 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2396 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2397 ; VI-NEXT:    ret void
2398 ;
2399   %r = shl <3 x i16> %a, %b
2400   store volatile <3 x i16> %r, ptr addrspace(1) undef
2401   ret void
2402 }
2403
2404 define amdgpu_kernel void @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2405 ; SI-LABEL: @shl_nsw_3xi16(
2406 ; SI-NEXT:    [[R:%.*]] = shl nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2407 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2408 ; SI-NEXT:    ret void
2409 ;
2410 ; VI-LABEL: @shl_nsw_3xi16(
2411 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2412 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2413 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2414 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2415 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2416 ; VI-NEXT:    ret void
2417 ;
2418   %r = shl nsw <3 x i16> %a, %b
2419   store volatile <3 x i16> %r, ptr addrspace(1) undef
2420   ret void
2421 }
2422
2423 define amdgpu_kernel void @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2424 ; SI-LABEL: @shl_nuw_3xi16(
2425 ; SI-NEXT:    [[R:%.*]] = shl nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2426 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2427 ; SI-NEXT:    ret void
2428 ;
2429 ; VI-LABEL: @shl_nuw_3xi16(
2430 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2431 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2432 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2433 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2434 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2435 ; VI-NEXT:    ret void
2436 ;
2437   %r = shl nuw <3 x i16> %a, %b
2438   store volatile <3 x i16> %r, ptr addrspace(1) undef
2439   ret void
2440 }
2441
2442 define amdgpu_kernel void @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2443 ; SI-LABEL: @shl_nuw_nsw_3xi16(
2444 ; SI-NEXT:    [[R:%.*]] = shl nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2445 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2446 ; SI-NEXT:    ret void
2447 ;
2448 ; VI-LABEL: @shl_nuw_nsw_3xi16(
2449 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2450 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2451 ; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2452 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2453 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2454 ; VI-NEXT:    ret void
2455 ;
2456   %r = shl nuw nsw <3 x i16> %a, %b
2457   store volatile <3 x i16> %r, ptr addrspace(1) undef
2458   ret void
2459 }
2460
2461 define amdgpu_kernel void @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) {
2462 ; SI-LABEL: @lshr_3xi16(
2463 ; SI-NEXT:    [[R:%.*]] = lshr <3 x i16> [[A:%.*]], [[B:%.*]]
2464 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2465 ; SI-NEXT:    ret void
2466 ;
2467 ; VI-LABEL: @lshr_3xi16(
2468 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2469 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2470 ; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP1]], [[TMP2]]
2471 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2472 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2473 ; VI-NEXT:    ret void
2474 ;
2475   %r = lshr <3 x i16> %a, %b
2476   store volatile <3 x i16> %r, ptr addrspace(1) undef
2477   ret void
2478 }
2479
2480 define amdgpu_kernel void @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
2481 ; SI-LABEL: @lshr_exact_3xi16(
2482 ; SI-NEXT:    [[R:%.*]] = lshr exact <3 x i16> [[A:%.*]], [[B:%.*]]
2483 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2484 ; SI-NEXT:    ret void
2485 ;
2486 ; VI-LABEL: @lshr_exact_3xi16(
2487 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2488 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2489 ; VI-NEXT:    [[TMP3:%.*]] = lshr exact <3 x i32> [[TMP1]], [[TMP2]]
2490 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2491 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2492 ; VI-NEXT:    ret void
2493 ;
2494   %r = lshr exact <3 x i16> %a, %b
2495   store volatile <3 x i16> %r, ptr addrspace(1) undef
2496   ret void
2497 }
2498
2499 define amdgpu_kernel void @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) {
2500 ; SI-LABEL: @ashr_3xi16(
2501 ; SI-NEXT:    [[R:%.*]] = ashr <3 x i16> [[A:%.*]], [[B:%.*]]
2502 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2503 ; SI-NEXT:    ret void
2504 ;
2505 ; VI-LABEL: @ashr_3xi16(
2506 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2507 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2508 ; VI-NEXT:    [[TMP3:%.*]] = ashr <3 x i32> [[TMP1]], [[TMP2]]
2509 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2510 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2511 ; VI-NEXT:    ret void
2512 ;
2513   %r = ashr <3 x i16> %a, %b
2514   store volatile <3 x i16> %r, ptr addrspace(1) undef
2515   ret void
2516 }
2517
2518 define amdgpu_kernel void @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
2519 ; SI-LABEL: @ashr_exact_3xi16(
2520 ; SI-NEXT:    [[R:%.*]] = ashr exact <3 x i16> [[A:%.*]], [[B:%.*]]
2521 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2522 ; SI-NEXT:    ret void
2523 ;
2524 ; VI-LABEL: @ashr_exact_3xi16(
2525 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2526 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2527 ; VI-NEXT:    [[TMP3:%.*]] = ashr exact <3 x i32> [[TMP1]], [[TMP2]]
2528 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2529 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2530 ; VI-NEXT:    ret void
2531 ;
2532   %r = ashr exact <3 x i16> %a, %b
2533   store volatile <3 x i16> %r, ptr addrspace(1) undef
2534   ret void
2535 }
2536
2537 define amdgpu_kernel void @and_3xi16(<3 x i16> %a, <3 x i16> %b) {
2538 ; SI-LABEL: @and_3xi16(
2539 ; SI-NEXT:    [[R:%.*]] = and <3 x i16> [[A:%.*]], [[B:%.*]]
2540 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2541 ; SI-NEXT:    ret void
2542 ;
2543 ; VI-LABEL: @and_3xi16(
2544 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2545 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2546 ; VI-NEXT:    [[TMP3:%.*]] = and <3 x i32> [[TMP1]], [[TMP2]]
2547 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2548 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2549 ; VI-NEXT:    ret void
2550 ;
2551   %r = and <3 x i16> %a, %b
2552   store volatile <3 x i16> %r, ptr addrspace(1) undef
2553   ret void
2554 }
2555
2556 define amdgpu_kernel void @or_3xi16(<3 x i16> %a, <3 x i16> %b) {
2557 ; SI-LABEL: @or_3xi16(
2558 ; SI-NEXT:    [[R:%.*]] = or <3 x i16> [[A:%.*]], [[B:%.*]]
2559 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2560 ; SI-NEXT:    ret void
2561 ;
2562 ; VI-LABEL: @or_3xi16(
2563 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2564 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2565 ; VI-NEXT:    [[TMP3:%.*]] = or <3 x i32> [[TMP1]], [[TMP2]]
2566 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2567 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2568 ; VI-NEXT:    ret void
2569 ;
2570   %r = or <3 x i16> %a, %b
2571   store volatile <3 x i16> %r, ptr addrspace(1) undef
2572   ret void
2573 }
2574
2575 define amdgpu_kernel void @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
2576 ; SI-LABEL: @xor_3xi16(
2577 ; SI-NEXT:    [[R:%.*]] = xor <3 x i16> [[A:%.*]], [[B:%.*]]
2578 ; SI-NEXT:    store volatile <3 x i16> [[R]], ptr addrspace(1) undef, align 8
2579 ; SI-NEXT:    ret void
2580 ;
2581 ; VI-LABEL: @xor_3xi16(
2582 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2583 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2584 ; VI-NEXT:    [[TMP3:%.*]] = xor <3 x i32> [[TMP1]], [[TMP2]]
2585 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2586 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2587 ; VI-NEXT:    ret void
2588 ;
2589   %r = xor <3 x i16> %a, %b
2590   store volatile <3 x i16> %r, ptr addrspace(1) undef
2591   ret void
2592 }
2593
2594 define amdgpu_kernel void @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) {
2595 ; SI-LABEL: @select_eq_3xi16(
2596 ; SI-NEXT:    [[CMP:%.*]] = icmp eq <3 x i16> [[A:%.*]], [[B:%.*]]
2597 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2598 ; SI-NEXT:    store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2599 ; SI-NEXT:    ret void
2600 ;
2601 ; VI-LABEL: @select_eq_3xi16(
2602 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2603 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2604 ; VI-NEXT:    [[TMP3:%.*]] = icmp eq <3 x i32> [[TMP1]], [[TMP2]]
2605 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2606 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2607 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2608 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2609 ; VI-NEXT:    store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2610 ; VI-NEXT:    ret void
2611 ;
2612   %cmp = icmp eq <3 x i16> %a, %b
2613   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2614   store volatile <3 x i16> %sel, ptr addrspace(1) undef
2615   ret void
2616 }
2617
2618 define amdgpu_kernel void @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) {
2619 ; SI-LABEL: @select_ne_3xi16(
2620 ; SI-NEXT:    [[CMP:%.*]] = icmp ne <3 x i16> [[A:%.*]], [[B:%.*]]
2621 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2622 ; SI-NEXT:    store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2623 ; SI-NEXT:    ret void
2624 ;
2625 ; VI-LABEL: @select_ne_3xi16(
2626 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2627 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2628 ; VI-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP1]], [[TMP2]]
2629 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2630 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2631 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2632 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2633 ; VI-NEXT:    store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2634 ; VI-NEXT:    ret void
2635 ;
2636   %cmp = icmp ne <3 x i16> %a, %b
2637   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2638   store volatile <3 x i16> %sel, ptr addrspace(1) undef
2639   ret void
2640 }
2641
2642 define amdgpu_kernel void @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2643 ; SI-LABEL: @select_ugt_3xi16(
2644 ; SI-NEXT:    [[CMP:%.*]] = icmp ugt <3 x i16> [[A:%.*]], [[B:%.*]]
2645 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2646 ; SI-NEXT:    store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2647 ; SI-NEXT:    ret void
2648 ;
2649 ; VI-LABEL: @select_ugt_3xi16(
2650 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2651 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2652 ; VI-NEXT:    [[TMP3:%.*]] = icmp ugt <3 x i32> [[TMP1]], [[TMP2]]
2653 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2654 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2655 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2656 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2657 ; VI-NEXT:    store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2658 ; VI-NEXT:    ret void
2659 ;
2660   %cmp = icmp ugt <3 x i16> %a, %b
2661   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2662   store volatile <3 x i16> %sel, ptr addrspace(1) undef
2663   ret void
2664 }
2665
2666 define amdgpu_kernel void @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) {
2667 ; SI-LABEL: @select_uge_3xi16(
2668 ; SI-NEXT:    [[CMP:%.*]] = icmp uge <3 x i16> [[A:%.*]], [[B:%.*]]
2669 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2670 ; SI-NEXT:    store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2671 ; SI-NEXT:    ret void
2672 ;
2673 ; VI-LABEL: @select_uge_3xi16(
2674 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2675 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2676 ; VI-NEXT:    [[TMP3:%.*]] = icmp uge <3 x i32> [[TMP1]], [[TMP2]]
2677 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2678 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2679 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2680 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2681 ; VI-NEXT:    store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2682 ; VI-NEXT:    ret void
2683 ;
2684   %cmp = icmp uge <3 x i16> %a, %b
2685   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2686   store volatile <3 x i16> %sel, ptr addrspace(1) undef
2687   ret void
2688 }
2689
2690 define amdgpu_kernel void @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) {
2691 ; SI-LABEL: @select_ult_3xi16(
2692 ; SI-NEXT:    [[CMP:%.*]] = icmp ult <3 x i16> [[A:%.*]], [[B:%.*]]
2693 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2694 ; SI-NEXT:    store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2695 ; SI-NEXT:    ret void
2696 ;
2697 ; VI-LABEL: @select_ult_3xi16(
2698 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2699 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2700 ; VI-NEXT:    [[TMP3:%.*]] = icmp ult <3 x i32> [[TMP1]], [[TMP2]]
2701 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2702 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2703 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2704 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2705 ; VI-NEXT:    store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2706 ; VI-NEXT:    ret void
2707 ;
2708   %cmp = icmp ult <3 x i16> %a, %b
2709   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2710   store volatile <3 x i16> %sel, ptr addrspace(1) undef
2711   ret void
2712 }
2713
2714 define amdgpu_kernel void @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) {
2715 ; SI-LABEL: @select_ule_3xi16(
2716 ; SI-NEXT:    [[CMP:%.*]] = icmp ule <3 x i16> [[A:%.*]], [[B:%.*]]
2717 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2718 ; SI-NEXT:    store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2719 ; SI-NEXT:    ret void
2720 ;
2721 ; VI-LABEL: @select_ule_3xi16(
2722 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2723 ; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2724 ; VI-NEXT:    [[TMP3:%.*]] = icmp ule <3 x i32> [[TMP1]], [[TMP2]]
2725 ; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2726 ; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2727 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2728 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2729 ; VI-NEXT:    store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2730 ; VI-NEXT:    ret void
2731 ;
2732   %cmp = icmp ule <3 x i16> %a, %b
2733   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2734   store volatile <3 x i16> %sel, ptr addrspace(1) undef
2735   ret void
2736 }
2737
2738 define amdgpu_kernel void @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2739 ; SI-LABEL: @select_sgt_3xi16(
2740 ; SI-NEXT:    [[CMP:%.*]] = icmp sgt <3 x i16> [[A:%.*]], [[B:%.*]]
2741 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2742 ; SI-NEXT:    store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2743 ; SI-NEXT:    ret void
2744 ;
2745 ; VI-LABEL: @select_sgt_3xi16(
2746 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2747 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2748 ; VI-NEXT:    [[TMP3:%.*]] = icmp sgt <3 x i32> [[TMP1]], [[TMP2]]
2749 ; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2750 ; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2751 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2752 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2753 ; VI-NEXT:    store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2754 ; VI-NEXT:    ret void
2755 ;
2756   %cmp = icmp sgt <3 x i16> %a, %b
2757   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2758   store volatile <3 x i16> %sel, ptr addrspace(1) undef
2759   ret void
2760 }
2761
2762 define amdgpu_kernel void @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) {
2763 ; SI-LABEL: @select_sge_3xi16(
2764 ; SI-NEXT:    [[CMP:%.*]] = icmp sge <3 x i16> [[A:%.*]], [[B:%.*]]
2765 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2766 ; SI-NEXT:    store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2767 ; SI-NEXT:    ret void
2768 ;
2769 ; VI-LABEL: @select_sge_3xi16(
2770 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2771 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2772 ; VI-NEXT:    [[TMP3:%.*]] = icmp sge <3 x i32> [[TMP1]], [[TMP2]]
2773 ; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2774 ; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2775 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2776 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2777 ; VI-NEXT:    store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2778 ; VI-NEXT:    ret void
2779 ;
2780   %cmp = icmp sge <3 x i16> %a, %b
2781   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2782   store volatile <3 x i16> %sel, ptr addrspace(1) undef
2783   ret void
2784 }
2785
2786 define amdgpu_kernel void @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2787 ; SI-LABEL: @select_slt_3xi16(
2788 ; SI-NEXT:    [[CMP:%.*]] = icmp slt <3 x i16> [[A:%.*]], [[B:%.*]]
2789 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2790 ; SI-NEXT:    store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2791 ; SI-NEXT:    ret void
2792 ;
2793 ; VI-LABEL: @select_slt_3xi16(
2794 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2795 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2796 ; VI-NEXT:    [[TMP3:%.*]] = icmp slt <3 x i32> [[TMP1]], [[TMP2]]
2797 ; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2798 ; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2799 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2800 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2801 ; VI-NEXT:    store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2802 ; VI-NEXT:    ret void
2803 ;
2804   %cmp = icmp slt <3 x i16> %a, %b
2805   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2806   store volatile <3 x i16> %sel, ptr addrspace(1) undef
2807   ret void
2808 }
2809
2810 define amdgpu_kernel void @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) {
2811 ; SI-LABEL: @select_sle_3xi16(
2812 ; SI-NEXT:    [[CMP:%.*]] = icmp sle <3 x i16> [[A:%.*]], [[B:%.*]]
2813 ; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2814 ; SI-NEXT:    store volatile <3 x i16> [[SEL]], ptr addrspace(1) undef, align 8
2815 ; SI-NEXT:    ret void
2816 ;
2817 ; VI-LABEL: @select_sle_3xi16(
2818 ; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2819 ; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2820 ; VI-NEXT:    [[TMP3:%.*]] = icmp sle <3 x i32> [[TMP1]], [[TMP2]]
2821 ; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2822 ; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2823 ; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2824 ; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2825 ; VI-NEXT:    store volatile <3 x i16> [[TMP7]], ptr addrspace(1) undef, align 8
2826 ; VI-NEXT:    ret void
2827 ;
2828   %cmp = icmp sle <3 x i16> %a, %b
2829   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2830   store volatile <3 x i16> %sel, ptr addrspace(1) undef
2831   ret void
2832 }
2833
2834 declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>)
2835
2836 define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) {
2837 ; SI-LABEL: @bitreverse_3xi16(
2838 ; SI-NEXT:    [[BREV:%.*]] = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> [[A:%.*]])
2839 ; SI-NEXT:    store volatile <3 x i16> [[BREV]], ptr addrspace(1) undef, align 8
2840 ; SI-NEXT:    ret void
2841 ;
2842 ; VI-LABEL: @bitreverse_3xi16(
2843 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2844 ; VI-NEXT:    [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]])
2845 ; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 16, i32 16, i32 16>
2846 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2847 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
2848 ; VI-NEXT:    ret void
2849 ;
2850   %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
2851   store volatile <3 x i16> %brev, ptr addrspace(1) undef
2852   ret void
2853 }