Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / llvm.amdgcn.raw.ptr.tbuffer.load.d16.ll
blobdce0bf59e107ef4c7a8762456c6a8e6b7fbac69d
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=PREGFX10-UNPACKED %s
3 ; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
4 ; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
5 ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s
6 ; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s
8 define amdgpu_ps half @tbuffer_load_d16_x(ptr addrspace(8) inreg %rsrc) {
9 ; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x:
10 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
11 ; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x00,0xb4,0xe8,0x00,0x00,0x00,0x80]
12 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
13 ; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
15 ; PREGFX10-PACKED-LABEL: tbuffer_load_d16_x:
16 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
17 ; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
18 ; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
19 ; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
21 ; GFX10-PACKED-LABEL: tbuffer_load_d16_x:
22 ; GFX10-PACKED:       ; %bb.0: ; %main_body
23 ; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
24 ; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
25 ; GFX10-PACKED-NEXT:    ; return to shader part epilog
27 ; GFX11-PACKED-LABEL: tbuffer_load_d16_x:
28 ; GFX11-PACKED:       ; %bb.0: ; %main_body
29 ; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
30 ; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
31 ; GFX11-PACKED-NEXT:    ; return to shader part epilog
32 main_body:
33   %data = call half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 22, i32 0)
34   ret half %data
37 define amdgpu_ps half @tbuffer_load_d16_xy(ptr addrspace(8) inreg %rsrc) {
38 ; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xy:
39 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
40 ; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_xy v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x80,0xb4,0xe8,0x00,0x00,0x00,0x80]
41 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
42 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e]
43 ; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
45 ; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xy:
46 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
47 ; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_xy v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
48 ; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
49 ; PREGFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
50 ; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
52 ; GFX10-PACKED-LABEL: tbuffer_load_d16_xy:
53 ; GFX10-PACKED:       ; %bb.0: ; %main_body
54 ; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_xy v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
55 ; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
56 ; GFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
57 ; GFX10-PACKED-NEXT:    ; return to shader part epilog
59 ; GFX11-PACKED-LABEL: tbuffer_load_d16_xy:
60 ; GFX11-PACKED:       ; %bb.0: ; %main_body
61 ; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_xy v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
62 ; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
63 ; GFX11-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
64 ; GFX11-PACKED-NEXT:    ; return to shader part epilog
65 main_body:
66   %data = call <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 22, i32 0)
67   %elt = extractelement <2 x half> %data, i32 1
68   ret half %elt
71 define amdgpu_ps half @tbuffer_load_d16_xyz(ptr addrspace(8) inreg %rsrc) {
72 ; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyz:
73 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
74 ; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_xyz v[0:2], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x00,0xb5,0xe8,0x00,0x00,0x00,0x80]
75 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
76 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e]
77 ; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
79 ; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyz:
80 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
81 ; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
82 ; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
83 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, v1
84 ; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
86 ; GFX10-PACKED-LABEL: tbuffer_load_d16_xyz:
87 ; GFX10-PACKED:       ; %bb.0: ; %main_body
88 ; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
89 ; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
90 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, v1
91 ; GFX10-PACKED-NEXT:    ; return to shader part epilog
93 ; GFX11-PACKED-LABEL: tbuffer_load_d16_xyz:
94 ; GFX11-PACKED:       ; %bb.0: ; %main_body
95 ; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
96 ; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
97 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, v1
98 ; GFX11-PACKED-NEXT:    ; return to shader part epilog
99 main_body:
100   %data = call <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 22, i32 0)
101   %elt = extractelement <3 x half> %data, i32 2
102   ret half %elt
105 define amdgpu_ps half @tbuffer_load_d16_xyzw(ptr addrspace(8) inreg %rsrc) {
106 ; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyzw:
107 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
108 ; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x80,0xb5,0xe8,0x00,0x00,0x00,0x80]
109 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
110 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, v3 ; encoding: [0x03,0x03,0x00,0x7e]
111 ; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
113 ; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyzw:
114 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
115 ; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
116 ; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
117 ; PREGFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
118 ; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
120 ; GFX10-PACKED-LABEL: tbuffer_load_d16_xyzw:
121 ; GFX10-PACKED:       ; %bb.0: ; %main_body
122 ; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
123 ; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
124 ; GFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
125 ; GFX10-PACKED-NEXT:    ; return to shader part epilog
127 ; GFX11-PACKED-LABEL: tbuffer_load_d16_xyzw:
128 ; GFX11-PACKED:       ; %bb.0: ; %main_body
129 ; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
130 ; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
131 ; GFX11-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
132 ; GFX11-PACKED-NEXT:    ; return to shader part epilog
133 main_body:
134   %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 22, i32 0)
135   %elt = extractelement <4 x half> %data, i32 3
136   ret half %elt
139 declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32)
140 declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32)
141 declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32)
142 declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32)