1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=PACKED %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=UNPACKED %s
7 define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
8 ; PACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
9 ; PACKED: bb.1 (%ir-block.0):
10 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
12 ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
13 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
14 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
15 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
16 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
17 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
18 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
19 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
20 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
21 ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
22 ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
23 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
25 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
26 ; UNPACKED: bb.1 (%ir-block.0):
27 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
28 ; UNPACKED-NEXT: {{ $}}
29 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
30 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
31 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
32 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
33 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
34 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
35 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
36 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
37 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
38 ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
39 ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
40 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
41 %val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
45 define amdgpu_ps <2 x half> @struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
46 ; PACKED-LABEL: name: struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
47 ; PACKED: bb.1 (%ir-block.0):
48 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
50 ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
51 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
52 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
53 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
54 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
55 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
56 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
57 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
58 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
59 ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8)
60 ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN]]
61 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
63 ; UNPACKED-LABEL: name: struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
64 ; UNPACKED: bb.1 (%ir-block.0):
65 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
66 ; UNPACKED-NEXT: {{ $}}
67 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
68 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
69 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
70 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
71 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
72 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
73 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
74 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
75 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
76 ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8)
77 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0
78 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1
79 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
80 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
81 ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec
82 ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
83 ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY10]], implicit $exec
84 ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16
85 ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
86 ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY11]], [[V_AND_B32_e64_1]], implicit $exec
87 ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec
88 ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]]
89 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
90 %val = call <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
95 ; define amdgpu_ps <3 x half> @struct_tbuffer_load_v3f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
96 ; %val = call <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
100 define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
101 ; PACKED-LABEL: name: struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
102 ; PACKED: bb.1 (%ir-block.0):
103 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
104 ; PACKED-NEXT: {{ $}}
105 ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
106 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
107 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
108 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
109 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
110 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
111 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
112 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
113 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
114 ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8)
115 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0
116 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1
117 ; PACKED-NEXT: $vgpr0 = COPY [[COPY7]]
118 ; PACKED-NEXT: $vgpr1 = COPY [[COPY8]]
119 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
121 ; UNPACKED-LABEL: name: struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
122 ; UNPACKED: bb.1 (%ir-block.0):
123 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
124 ; UNPACKED-NEXT: {{ $}}
125 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
126 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
127 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
128 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
129 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
130 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
131 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
132 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
133 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
134 ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8)
135 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0
136 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1
137 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2
138 ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3
139 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
140 ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
141 ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec
142 ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
143 ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY12]], implicit $exec
144 ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16
145 ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
146 ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY13]], [[V_AND_B32_e64_1]], implicit $exec
147 ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec
148 ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
149 ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec
150 ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
151 ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY10]], [[COPY15]], implicit $exec
152 ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
153 ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[V_AND_B32_e64_3]], implicit $exec
154 ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec
155 ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]]
156 ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]]
157 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
158 %val = call <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
162 define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
163 ; PACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0
164 ; PACKED: bb.1 (%ir-block.0):
165 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
166 ; PACKED-NEXT: {{ $}}
167 ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
168 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
169 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
170 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
171 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
172 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
173 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
174 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
175 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
176 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
177 ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
178 ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
179 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
181 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0
182 ; UNPACKED: bb.1 (%ir-block.0):
183 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
184 ; UNPACKED-NEXT: {{ $}}
185 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
186 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
187 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
188 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
189 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
190 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
191 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
192 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
193 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
194 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
195 ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
196 ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
197 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
198 %val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0)
202 define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 %soffset) {
203 ; PACKED-LABEL: name: struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset
204 ; PACKED: bb.1 (%ir-block.0):
205 ; PACKED-NEXT: successors: %bb.2(0x80000000)
206 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
207 ; PACKED-NEXT: {{ $}}
208 ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
209 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
210 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
211 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
212 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
213 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
214 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
215 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4
216 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
217 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
218 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
219 ; PACKED-NEXT: {{ $}}
221 ; PACKED-NEXT: successors: %bb.3(0x80000000)
222 ; PACKED-NEXT: {{ $}}
223 ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
224 ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
225 ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
226 ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
227 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
228 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
229 ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
230 ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
231 ; PACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
232 ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
233 ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
234 ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
235 ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
236 ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
237 ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
238 ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
239 ; PACKED-NEXT: {{ $}}
241 ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
242 ; PACKED-NEXT: {{ $}}
243 ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
244 ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8)
245 ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
246 ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
247 ; PACKED-NEXT: {{ $}}
249 ; PACKED-NEXT: successors: %bb.5(0x80000000)
250 ; PACKED-NEXT: {{ $}}
251 ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
252 ; PACKED-NEXT: {{ $}}
254 ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0
255 ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1
256 ; PACKED-NEXT: $vgpr0 = COPY [[COPY13]]
257 ; PACKED-NEXT: $vgpr1 = COPY [[COPY14]]
258 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
260 ; UNPACKED-LABEL: name: struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset
261 ; UNPACKED: bb.1 (%ir-block.0):
262 ; UNPACKED-NEXT: successors: %bb.2(0x80000000)
263 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
264 ; UNPACKED-NEXT: {{ $}}
265 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
266 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
267 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
268 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
269 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
270 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
271 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
272 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4
273 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
274 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
275 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
276 ; UNPACKED-NEXT: {{ $}}
277 ; UNPACKED-NEXT: bb.2:
278 ; UNPACKED-NEXT: successors: %bb.3(0x80000000)
279 ; UNPACKED-NEXT: {{ $}}
280 ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
281 ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
282 ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
283 ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
284 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
285 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
286 ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
287 ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
288 ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
289 ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
290 ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
291 ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
292 ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
293 ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
294 ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
295 ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
296 ; UNPACKED-NEXT: {{ $}}
297 ; UNPACKED-NEXT: bb.3:
298 ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
299 ; UNPACKED-NEXT: {{ $}}
300 ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
301 ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8)
302 ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
303 ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
304 ; UNPACKED-NEXT: {{ $}}
305 ; UNPACKED-NEXT: bb.4:
306 ; UNPACKED-NEXT: successors: %bb.5(0x80000000)
307 ; UNPACKED-NEXT: {{ $}}
308 ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
309 ; UNPACKED-NEXT: {{ $}}
310 ; UNPACKED-NEXT: bb.5:
311 ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0
312 ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1
313 ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2
314 ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3
315 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
316 ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
317 ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY13]], [[COPY17]], implicit $exec
318 ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
319 ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY14]], [[COPY18]], implicit $exec
320 ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16
321 ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
322 ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY19]], [[V_AND_B32_e64_1]], implicit $exec
323 ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec
324 ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
325 ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY15]], [[COPY20]], implicit $exec
326 ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
327 ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY16]], [[COPY21]], implicit $exec
328 ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
329 ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY22]], [[V_AND_B32_e64_3]], implicit $exec
330 ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec
331 ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]]
332 ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]]
333 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
334 %val = call <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
338 define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset.base, i32 inreg %soffset) {
339 ; PACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095
340 ; PACKED: bb.1 (%ir-block.0):
341 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
342 ; PACKED-NEXT: {{ $}}
343 ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
344 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
345 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
346 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
347 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
348 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
349 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
350 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
351 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
352 ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
353 ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
354 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
356 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095
357 ; UNPACKED: bb.1 (%ir-block.0):
358 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
359 ; UNPACKED-NEXT: {{ $}}
360 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
361 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
362 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
363 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
364 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
365 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
366 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
367 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
368 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
369 ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
370 ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
371 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
372 %voffset = add i32 %voffset.base, 4095
373 %val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
377 declare half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
378 declare <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
379 declare <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
380 declare <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
382 attributes #0 = { nounwind readonly }